45 Commits

Author SHA1 Message Date
f75a47e273 Add post about sidenotes 2019-12-08 23:47:52 -08:00
9eae560cae Make sidenotes mobile-friendly 2019-12-07 00:16:59 -08:00
b0529a9124 Add initial implementation of sidenotes 2019-12-06 00:10:26 -08:00
3df9c57482 Fix naming issue (this is really a compiler bug) 2019-12-05 19:36:47 -08:00
cb5163e1d9 Add gitignore file 2019-12-04 14:35:23 -08:00
c309ac4c14 Rename page and add pop instruction to part 5 of compiler series 2019-11-14 11:05:17 -08:00
58c9d5f982 Fix another typo in compiler series 2019-11-13 13:51:32 -08:00
dc9a68ad10 Fix mistakes in blog posts 2019-11-13 13:49:47 -08:00
db16dbda18 Fix incorrect CMakeLists.txt 2019-11-13 13:47:04 -08:00
172630c2ee Stop justifying titles and add lang attribute 2019-11-11 15:36:59 -08:00
6dc7734c70 Remove more draft labels 2019-11-06 22:32:57 -08:00
19a1ffbc98 Remove draft label 2019-11-06 22:32:21 -08:00
2cce2859bb Fix some typos 2019-11-06 22:27:52 -08:00
654239e29f Fix last sentence of compiler post 2019-11-06 21:34:29 -08:00
50fbe3e196 Finish draft of post 8 in compiler series 2019-11-06 21:10:53 -08:00
1a8a1c3052 Work on writing up the rest of part 8 in compiler series 2019-11-06 14:44:53 -08:00
2994f8983d Add the push operation in code in compiler series 2019-11-06 13:23:59 -08:00
64227f2873 Finish implementation of compiler 2019-11-06 12:52:42 -08:00
9aef499deb Factor out definition into separate file in compiler series 2019-11-05 10:40:51 -08:00
c79b5a4120 Start writing actual compillation code in compiler series 2019-11-05 00:42:33 -08:00
81ee50d0d4 Implement function and type creation, add text to blog in compiler series 2019-11-04 18:25:54 -08:00
43b140285f Fix missing line in runtime header in compiler series 2019-11-04 13:30:18 -08:00
adb894869e Remove code border. 2019-11-02 17:53:31 -07:00
1f6032a30e Start work on chapter 8 code for compilers 2019-11-02 17:53:15 -07:00
9531f4d8e3 Add chapter 8 starting code for compiler series 2019-11-02 16:38:11 -07:00
37097d3a40 Change SCSS to use darken, and remove input styles. 2019-10-31 14:41:52 -07:00
3aa468c2f6 Remove debug printf 2019-10-31 14:38:06 -07:00
c704187012 Use darken to specify link color 2019-10-31 14:32:34 -07:00
a834fd578e Finish initial draft of runtime posts. 2019-10-30 14:21:13 -07:00
4b5e2f4454 Write some more about runetime 2019-10-30 00:19:56 -07:00
7812b1064b Make progress on compiler posts 2019-10-26 20:30:29 -07:00
65b9f385cf Start working on runtime chapter 2019-10-15 11:13:13 -07:00
ed88d54aa6 Add post about LSP idea 2019-10-12 13:12:18 -07:00
d1b515ec5b Make max width higher 2019-10-12 00:31:43 -07:00
1ffc43af98 Link compiler posts together 2019-10-10 18:15:37 -07:00
b27dc19e57 Finish draft of part 6 of compiler series 2019-10-10 18:00:13 -07:00
df0b819b0e Fix bug from small improvements 2019-10-10 17:59:44 -07:00
21f90d85c5 Add finishing touches to code for part 6 of compiler series 2019-10-10 13:14:00 -07:00
18e3f2af55 Fix definition to resolve its own types 2019-10-09 22:51:19 -07:00
3901c9b115 Add print methods to instructions 2019-10-09 22:46:17 -07:00
d9486d08ae Fix type in compiler blog 2019-10-08 23:50:21 -07:00
d90993a93c Implement ast_case::compile for compiler series and reference code 2019-10-08 23:46:35 -07:00
7e9bd95846 Write explanations of AST refactor in compiler series 2019-10-08 21:42:25 -07:00
d3d73e0e9c Fix up compile in compiler blog part 6, and add more text. 2019-10-08 14:10:05 -07:00
d9c151d774 Continue implementation of compilation 2019-10-01 23:23:52 -07:00
95 changed files with 5994 additions and 279 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
**/build/*

View File

@@ -1,14 +1,13 @@
$basic-border: 1px solid #bfbfbf; @import "style.scss";
.gmachine-instruction { .gmachine-instruction {
display: flex; display: flex;
border: $basic-border; @include bordered-block;
border-radius: 2px;
} }
.gmachine-instruction-name { .gmachine-instruction-name {
padding: 10px; padding: 10px;
border-right: $basic-border; border-right: $standard-border;
flex-grow: 1; flex-grow: 1;
flex-basis: 20%; flex-basis: 20%;
text-align: center; text-align: center;
@@ -20,7 +19,7 @@ $basic-border: 1px solid #bfbfbf;
} }
.gmachine-inner { .gmachine-inner {
border-bottom: $basic-border; border-bottom: $standard-border;
width: 100%; width: 100%;
&:last-child { &:last-child {

View File

@@ -97,10 +97,6 @@ type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var); type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
type_ptr branch_type = mgr.new_type(); type_ptr branch_type = mgr.new_type();
if(!dynamic_cast<type_base*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
for(auto& branch : branches) { for(auto& branch : branches) {
type_env new_env = env.scope(); type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env); branch->pat->match(case_type, mgr, new_env);
@@ -108,6 +104,11 @@ type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
mgr.unify(branch_type, curr_branch_type); mgr.unify(branch_type, curr_branch_type);
} }
case_type = mgr.resolve(case_type, var);
if(!dynamic_cast<type_base*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
return branch_type; return branch_type;
} }

View File

@@ -18,6 +18,8 @@ add_executable(compiler
env.cpp env.hpp env.cpp env.hpp
type.cpp type.hpp type.cpp type.hpp
error.cpp error.hpp error.cpp error.hpp
binop.cpp binop.hpp
instruction.cpp instruction.hpp
${BISON_parser_OUTPUTS} ${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS} ${FLEX_scanner_OUTPUTS}
main.cpp main.cpp

View File

@@ -2,18 +2,22 @@
#include <ostream> #include <ostream>
#include "error.hpp" #include "error.hpp"
std::string op_name(binop op) { static void print_indent(int n, std::ostream& to) {
switch(op) { while(n--) to << " ";
case PLUS: return "+";
case MINUS: return "-";
case TIMES: return "*";
case DIVIDE: return "/";
}
return "??";
} }
void print_indent(int n, std::ostream& to) { type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
while(n--) to << " "; node_type = typecheck(mgr, env);
return node_type;
}
void ast::resolve_common(const type_mgr& mgr) {
type_var* var;
type_ptr resolved_type = mgr.resolve(node_type, var);
if(var) throw type_error("ambiguously typed program");
resolve(mgr);
node_type = std::move(resolved_type);
} }
void ast_int::print(int indent, std::ostream& to) const { void ast_int::print(int indent, std::ostream& to) const {
@@ -25,6 +29,14 @@ type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
return type_ptr(new type_base("Int")); return type_ptr(new type_base("Int"));
} }
void ast_int::resolve(const type_mgr& mgr) const {
}
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushint(value)));
}
void ast_lid::print(int indent, std::ostream& to) const { void ast_lid::print(int indent, std::ostream& to) const {
print_indent(indent, to); print_indent(indent, to);
to << "LID: " << id << std::endl; to << "LID: " << id << std::endl;
@@ -34,6 +46,17 @@ type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id); return env.lookup(id);
} }
void ast_lid::resolve(const type_mgr& mgr) const {
}
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(
env->has_variable(id) ?
(instruction*) new instruction_push(env->get_offset(id)) :
(instruction*) new instruction_pushglobal(id)));
}
void ast_uid::print(int indent, std::ostream& to) const { void ast_uid::print(int indent, std::ostream& to) const {
print_indent(indent, to); print_indent(indent, to);
to << "UID: " << id << std::endl; to << "UID: " << id << std::endl;
@@ -43,6 +66,14 @@ type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id); return env.lookup(id);
} }
void ast_uid::resolve(const type_mgr& mgr) const {
}
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
}
void ast_binop::print(int indent, std::ostream& to) const { void ast_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to); print_indent(indent, to);
to << "BINOP: " << op_name(op) << std::endl; to << "BINOP: " << op_name(op) << std::endl;
@@ -51,8 +82,8 @@ void ast_binop::print(int indent, std::ostream& to) const {
} }
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const { type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env); type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck(mgr, env); type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr ftype = env.lookup(op_name(op)); type_ptr ftype = env.lookup(op_name(op));
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op)); if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
@@ -64,6 +95,20 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
return return_type; return return_type;
} }
void ast_binop::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
into.push_back(instruction_ptr(new instruction_mkapp()));
into.push_back(instruction_ptr(new instruction_mkapp()));
}
void ast_app::print(int indent, std::ostream& to) const { void ast_app::print(int indent, std::ostream& to) const {
print_indent(indent, to); print_indent(indent, to);
to << "APP:" << std::endl; to << "APP:" << std::endl;
@@ -72,8 +117,8 @@ void ast_app::print(int indent, std::ostream& to) const {
} }
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const { type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env); type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck(mgr, env); type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr return_type = mgr.new_type(); type_ptr return_type = mgr.new_type();
type_ptr arrow = type_ptr(new type_arr(rtype, return_type)); type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
@@ -81,6 +126,17 @@ type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
return return_type; return return_type;
} }
void ast_app::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_mkapp()));
}
void ast_case::print(int indent, std::ostream& to) const { void ast_case::print(int indent, std::ostream& to) const {
print_indent(indent, to); print_indent(indent, to);
to << "CASE: " << std::endl; to << "CASE: " << std::endl;
@@ -94,23 +150,85 @@ void ast_case::print(int indent, std::ostream& to) const {
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const { type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_var* var; type_var* var;
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var); type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
type_ptr branch_type = mgr.new_type(); type_ptr branch_type = mgr.new_type();
if(!dynamic_cast<type_base*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
for(auto& branch : branches) { for(auto& branch : branches) {
type_env new_env = env.scope(); type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env); branch->pat->match(case_type, mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env); type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
mgr.unify(branch_type, curr_branch_type); mgr.unify(branch_type, curr_branch_type);
} }
case_type = mgr.resolve(case_type, var);
if(!dynamic_cast<type_data*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
return branch_type; return branch_type;
} }
void ast_case::resolve(const type_mgr& mgr) const {
of->resolve_common(mgr);
for(auto& branch : branches) {
branch->expr->resolve_common(mgr);
}
}
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
of->compile(env, into);
into.push_back(instruction_ptr(new instruction_eval()));
instruction_jump* jump_instruction = new instruction_jump();
into.push_back(instruction_ptr(jump_instruction));
for(auto& branch : branches) {
std::vector<instruction_ptr> branch_instructions;
pattern_var* vpat;
pattern_constr* cpat;
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
jump_instruction->tag_mappings.end())
break;
jump_instruction->tag_mappings[constr_pair.second.tag] =
jump_instruction->branches.size();
}
jump_instruction->branches.push_back(std::move(branch_instructions));
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
env_ptr new_env = env;
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
branch_instructions.push_back(instruction_ptr(new instruction_split()));
branch->expr->compile(new_env, branch_instructions);
branch_instructions.push_back(instruction_ptr(new instruction_slide(
cpat->params.size())));
int new_tag = type->constructors[cpat->constr].tag;
if(jump_instruction->tag_mappings.find(new_tag) !=
jump_instruction->tag_mappings.end())
throw type_error("technically not a type error: duplicate pattern");
jump_instruction->tag_mappings[new_tag] =
jump_instruction->branches.size();
jump_instruction->branches.push_back(std::move(branch_instructions));
}
}
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
jump_instruction->tag_mappings.end())
throw type_error("non-total pattern");
}
}
void pattern_var::print(std::ostream& to) const { void pattern_var::print(std::ostream& to) const {
to << var; to << var;
} }

View File

@@ -8,12 +8,18 @@
#include "env.hpp" #include "env.hpp"
struct ast { struct ast {
type_ptr node_type;
virtual ~ast() = default; virtual ~ast() = default;
virtual void print(int indent, std::ostream& to) const = 0; virtual void print(int indent, std::ostream& to) const = 0;
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0; virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
virtual void compile(const env_ptr env, virtual void resolve(const type_mgr& mgr) const = 0;
std::vector<instruction>& into) const; virtual void compile(const env_ptr& env,
std::vector<instruction_ptr>& into) const = 0;
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
void resolve_common(const type_mgr& mgr);
}; };
using ast_ptr = std::unique_ptr<ast>; using ast_ptr = std::unique_ptr<ast>;
@@ -40,6 +46,7 @@ using branch_ptr = std::unique_ptr<branch>;
struct constructor { struct constructor {
std::string name; std::string name;
std::vector<std::string> types; std::vector<std::string> types;
int8_t tag;
constructor(std::string n, std::vector<std::string> ts) constructor(std::string n, std::vector<std::string> ts)
: name(std::move(n)), types(std::move(ts)) {} : name(std::move(n)), types(std::move(ts)) {}
@@ -52,6 +59,8 @@ struct definition {
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0; virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0; virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) = 0;
virtual void compile() = 0;
}; };
using definition_ptr = std::unique_ptr<definition>; using definition_ptr = std::unique_ptr<definition>;
@@ -64,6 +73,8 @@ struct ast_int : public ast {
void print(int indent, std::ostream& to) const; void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
}; };
struct ast_lid : public ast { struct ast_lid : public ast {
@@ -74,6 +85,8 @@ struct ast_lid : public ast {
void print(int indent, std::ostream& to) const; void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
}; };
struct ast_uid : public ast { struct ast_uid : public ast {
@@ -84,6 +97,8 @@ struct ast_uid : public ast {
void print(int indent, std::ostream& to) const; void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
}; };
struct ast_binop : public ast { struct ast_binop : public ast {
@@ -96,6 +111,8 @@ struct ast_binop : public ast {
void print(int indent, std::ostream& to) const; void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
}; };
struct ast_app : public ast { struct ast_app : public ast {
@@ -107,6 +124,8 @@ struct ast_app : public ast {
void print(int indent, std::ostream& to) const; void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
}; };
struct ast_case : public ast { struct ast_case : public ast {
@@ -118,6 +137,8 @@ struct ast_case : public ast {
void print(int indent, std::ostream& to) const; void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const; type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
}; };
struct pattern_var : public pattern { struct pattern_var : public pattern {
@@ -149,6 +170,8 @@ struct definition_defn : public definition {
type_ptr return_type; type_ptr return_type;
std::vector<type_ptr> param_types; std::vector<type_ptr> param_types;
std::vector<instruction_ptr> instructions;
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b) definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
: name(std::move(n)), params(std::move(p)), body(std::move(b)) { : name(std::move(n)), params(std::move(p)), body(std::move(b)) {
@@ -156,6 +179,8 @@ struct definition_defn : public definition {
void typecheck_first(type_mgr& mgr, type_env& env); void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const; void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr);
void compile();
}; };
struct definition_data : public definition { struct definition_data : public definition {
@@ -167,4 +192,6 @@ struct definition_data : public definition {
void typecheck_first(type_mgr& mgr, type_env& env); void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const; void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr);
void compile();
}; };

View File

@@ -0,0 +1,21 @@
#include "binop.hpp"
std::string op_name(binop op) {
switch(op) {
case PLUS: return "+";
case MINUS: return "-";
case TIMES: return "*";
case DIVIDE: return "/";
}
return "??";
}
std::string op_action(binop op) {
switch(op) {
case PLUS: return "plus";
case MINUS: return "minus";
case TIMES: return "times";
case DIVIDE: return "divide";
}
return "??";
}

View File

@@ -1,4 +1,5 @@
#pragma once #pragma once
#include <string>
enum binop { enum binop {
PLUS, PLUS,
@@ -7,3 +8,5 @@ enum binop {
DIVIDE DIVIDE
}; };
std::string op_name(binop op);
std::string op_action(binop op);

View File

@@ -1,4 +1,5 @@
#include "ast.hpp" #include "ast.hpp"
#include "error.hpp"
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) { void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
return_type = mgr.new_type(); return_type = mgr.new_type();
@@ -24,16 +25,42 @@ void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const
type_it++; type_it++;
} }
type_ptr body_type = body->typecheck(mgr, new_env); type_ptr body_type = body->typecheck_common(mgr, new_env);
mgr.unify(return_type, body_type); mgr.unify(return_type, body_type);
} }
void definition_defn::resolve(const type_mgr& mgr) {
type_var* var;
body->resolve_common(mgr);
return_type = mgr.resolve(return_type, var);
if(var) throw type_error("ambiguously typed program");
for(auto& param_type : param_types) {
param_type = mgr.resolve(param_type, var);
if(var) throw type_error("ambiguously typed program");
}
}
void definition_defn::compile() {
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
for(auto it = params.rbegin(); it != params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
body->compile(new_env, instructions);
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
}
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) { void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
type_ptr return_type = type_ptr(new type_base(name)); type_data* this_type = new type_data(name);
type_ptr return_type = type_ptr(this_type);
int next_tag = 0;
for(auto& constructor : constructors) { for(auto& constructor : constructors) {
type_ptr full_type = return_type; constructor->tag = next_tag;
this_type->constructors[constructor->name] = { next_tag++ };
type_ptr full_type = return_type;
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) { for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
type_ptr type = type_ptr(new type_base(*it)); type_ptr type = type_ptr(new type_base(*it));
full_type = type_ptr(new type_arr(type, full_type)); full_type = type_ptr(new type_arr(type, full_type));
@@ -46,3 +73,11 @@ void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const { void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
// Nothing // Nothing
} }
void definition_data::resolve(const type_mgr& mgr) {
// Nothing
}
void definition_data::compile() {
}

23
code/compiler/06/env.cpp Normal file
View File

@@ -0,0 +1,23 @@
#include "env.hpp"
int env_var::get_offset(const std::string& name) const {
if(name == this->name) return 0;
if(parent) return parent->get_offset(name) + 1;
throw 0;
}
bool env_var::has_variable(const std::string& name) const {
if(name == this->name) return true;
if(parent) return parent->has_variable(name);
return false;
}
int env_offset::get_offset(const std::string& name) const {
if(parent) return parent->get_offset(name) + offset;
throw 0;
}
bool env_offset::has_variable(const std::string& name) const {
if(parent) return parent->has_variable(name);
return false;
}

View File

@@ -6,26 +6,29 @@ struct env {
virtual ~env() = default; virtual ~env() = default;
virtual int get_offset(const std::string& name) const = 0; virtual int get_offset(const std::string& name) const = 0;
virtual bool has_variable(const std::string& name) const = 0;
}; };
using env_ptr = std::shared_ptr<env>; using env_ptr = std::shared_ptr<env>;
struct env_var { struct env_var : public env {
std::string name; std::string name;
env_ptr parent; env_ptr parent;
env_var(std::string& n, env_ptr p) env_var(std::string& n, env_ptr p)
: name(std::move(n)), parent(std::move(p)) {} : name(std::move(n)), parent(std::move(p)) {}
virtual int get_offset(const std::string& name) const; int get_offset(const std::string& name) const;
bool has_variable(const std::string& name) const;
}; };
struct env_offset { struct env_offset : public env {
int offset; int offset;
env_ptr parent; env_ptr parent;
env_offset(int o, env_ptr p) env_offset(int o, env_ptr p)
: offset(o), parent(std::move(p)) {} : offset(o), parent(std::move(p)) {}
virtual int get_offset(const std::string& name) const; int get_offset(const std::string& name) const;
bool has_variable(const std::string& name) const;
}; };

View File

@@ -0,0 +1,83 @@
#include "instruction.hpp"
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
void instruction_pushint::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushInt(" << value << ")" << std::endl;
}
void instruction_pushglobal::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushGlobal(" << name << ")" << std::endl;
}
void instruction_push::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Push(" << offset << ")" << std::endl;
}
void instruction_pop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Pop(" << count << ")" << std::endl;
}
void instruction_mkapp::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "MkApp()" << std::endl;
}
void instruction_update::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Update(" << offset << ")" << std::endl;
}
void instruction_pack::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Pack(" << tag << ", " << size << ")" << std::endl;
}
void instruction_split::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Split()" << std::endl;
}
void instruction_jump::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Jump(" << std::endl;
for(auto& instruction_set : branches) {
for(auto& instruction : instruction_set) {
instruction->print(indent + 2, to);
}
to << std::endl;
}
print_indent(indent, to);
to << ")" << std::endl;
}
void instruction_slide::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Slide(" << offset << ")" << std::endl;
}
void instruction_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "BinOp(" << op_action(op) << ")" << std::endl;
}
void instruction_eval::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Eval()" << std::endl;
}
void instruction_alloc::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Alloc(" << amount << ")" << std::endl;
}
void instruction_unwind::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Unwind()" << std::endl;
}

View File

@@ -1,10 +1,15 @@
#pragma once #pragma once
#include <string> #include <string>
#include <memory> #include <memory>
#include <vector>
#include <map>
#include <ostream>
#include "binop.hpp" #include "binop.hpp"
struct instruction { struct instruction {
virtual ~instruction() = default; virtual ~instruction() = default;
virtual void print(int indent, std::ostream& to) const = 0;
}; };
using instruction_ptr = std::unique_ptr<instruction>; using instruction_ptr = std::unique_ptr<instruction>;
@@ -14,6 +19,8 @@ struct instruction_pushint : public instruction {
instruction_pushint(int v) instruction_pushint(int v)
: value(v) {} : value(v) {}
void print(int indent, std::ostream& to) const;
}; };
struct instruction_pushglobal : public instruction { struct instruction_pushglobal : public instruction {
@@ -21,6 +28,8 @@ struct instruction_pushglobal : public instruction {
instruction_pushglobal(std::string n) instruction_pushglobal(std::string n)
: name(std::move(n)) {} : name(std::move(n)) {}
void print(int indent, std::ostream& to) const;
}; };
struct instruction_push : public instruction { struct instruction_push : public instruction {
@@ -28,10 +37,21 @@ struct instruction_push : public instruction {
instruction_push(int o) instruction_push(int o)
: offset(o) {} : offset(o) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_pop : public instruction {
int count;
instruction_pop(int c)
: count(c) {}
void print(int indent, std::ostream& to) const;
}; };
struct instruction_mkapp : public instruction { struct instruction_mkapp : public instruction {
void print(int indent, std::ostream& to) const;
}; };
struct instruction_update : public instruction { struct instruction_update : public instruction {
@@ -39,6 +59,8 @@ struct instruction_update : public instruction {
instruction_update(int o) instruction_update(int o)
: offset(o) {} : offset(o) {}
void print(int indent, std::ostream& to) const;
}; };
struct instruction_pack : public instruction { struct instruction_pack : public instruction {
@@ -47,10 +69,19 @@ struct instruction_pack : public instruction {
instruction_pack(int t, int s) instruction_pack(int t, int s)
: tag(t), size(s) {} : tag(t), size(s) {}
void print(int indent, std::ostream& to) const;
}; };
struct instruction_split : public instruction { struct instruction_split : public instruction {
void print(int indent, std::ostream& to) const;
};
struct instruction_jump : public instruction {
std::vector<std::vector<instruction_ptr>> branches;
std::map<int, int> tag_mappings;
void print(int indent, std::ostream& to) const;
}; };
struct instruction_slide : public instruction { struct instruction_slide : public instruction {
@@ -58,6 +89,8 @@ struct instruction_slide : public instruction {
instruction_slide(int o) instruction_slide(int o)
: offset(o) {} : offset(o) {}
void print(int indent, std::ostream& to) const;
}; };
struct instruction_binop : public instruction { struct instruction_binop : public instruction {
@@ -65,10 +98,12 @@ struct instruction_binop : public instruction {
instruction_binop(binop o) instruction_binop(binop o)
: op(o) {} : op(o) {}
void print(int indent, std::ostream& to) const;
}; };
struct instruction_eval : public instruction { struct instruction_eval : public instruction {
void print(int indent, std::ostream& to) const;
}; };
struct instruction_alloc : public instruction { struct instruction_alloc : public instruction {
@@ -76,8 +111,10 @@ struct instruction_alloc : public instruction {
instruction_alloc(int a) instruction_alloc(int a)
: amount(a) {} : amount(a) {}
void print(int indent, std::ostream& to) const;
}; };
struct instruction_unwind : public instruction { struct instruction_unwind : public instruction {
void print(int indent, std::ostream& to) const;
}; };

View File

@@ -36,6 +36,23 @@ void typecheck_program(
pair.second->print(mgr, std::cout); pair.second->print(mgr, std::cout);
std::cout << std::endl; std::cout << std::endl;
} }
for(auto& def : prog) {
def->resolve(mgr);
}
}
void compile_program(const std::vector<definition_ptr>& prog) {
for(auto& def : prog) {
def->compile();
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
if(!defn) continue;
for(auto& instruction : defn->instructions) {
instruction->print(0, std::cout);
}
std::cout << std::endl;
}
} }
int main() { int main() {
@@ -56,6 +73,7 @@ int main() {
} }
try { try {
typecheck_program(program, mgr, env); typecheck_program(program, mgr, env);
compile_program(program);
} catch(unification_error& err) { } catch(unification_error& err) {
std::cout << "failed to unify types: " << std::endl; std::cout << "failed to unify types: " << std::endl;
std::cout << " (1) \033[34m"; std::cout << " (1) \033[34m";

View File

@@ -44,7 +44,7 @@ type_ptr type_mgr::new_arrow_type() {
return type_ptr(new type_arr(new_type(), new_type())); return type_ptr(new type_arr(new_type(), new_type()));
} }
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) { type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
type_var* cast; type_var* cast;
var = nullptr; var = nullptr;

View File

@@ -30,6 +30,17 @@ struct type_base : public type {
void print(const type_mgr& mgr, std::ostream& to) const; void print(const type_mgr& mgr, std::ostream& to) const;
}; };
struct type_data : public type_base {
struct constructor {
int tag;
};
std::map<std::string, constructor> constructors;
type_data(std::string n)
: type_base(std::move(n)) {}
};
struct type_arr : public type { struct type_arr : public type {
type_ptr left; type_ptr left;
type_ptr right; type_ptr right;
@@ -49,6 +60,6 @@ struct type_mgr {
type_ptr new_arrow_type(); type_ptr new_arrow_type();
void unify(type_ptr l, type_ptr r); void unify(type_ptr l, type_ptr r);
type_ptr resolve(type_ptr t, type_var*& var); type_ptr resolve(type_ptr t, type_var*& var) const;
void bind(const std::string& s, type_ptr t); void bind(const std::string& s, type_ptr t);
}; };

View File

@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.1)
project(compiler)
find_package(BISON)
find_package(FLEX)
bison_target(parser
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
COMPILE_FLAGS "-d")
flex_target(scanner
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
add_flex_bison_dependency(scanner parser)
add_executable(compiler
ast.cpp ast.hpp definition.cpp
type_env.cpp type_env.hpp
env.cpp env.hpp
type.cpp type.hpp
error.cpp error.hpp
binop.cpp binop.hpp
instruction.cpp instruction.hpp
${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS}
main.cpp
)
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})

262
code/compiler/07/ast.cpp Normal file
View File

@@ -0,0 +1,262 @@
#include "ast.hpp"
#include <ostream>
#include "error.hpp"
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
node_type = typecheck(mgr, env);
return node_type;
}
void ast::resolve_common(const type_mgr& mgr) {
type_var* var;
type_ptr resolved_type = mgr.resolve(node_type, var);
if(var) throw type_error("ambiguously typed program");
resolve(mgr);
node_type = std::move(resolved_type);
}
void ast_int::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "INT: " << value << std::endl;
}
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
return type_ptr(new type_base("Int"));
}
void ast_int::resolve(const type_mgr& mgr) const {
}
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushint(value)));
}
void ast_lid::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "LID: " << id << std::endl;
}
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_lid::resolve(const type_mgr& mgr) const {
}
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(
env->has_variable(id) ?
(instruction*) new instruction_push(env->get_offset(id)) :
(instruction*) new instruction_pushglobal(id)));
}
void ast_uid::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "UID: " << id << std::endl;
}
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_uid::resolve(const type_mgr& mgr) const {
}
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
}
void ast_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "BINOP: " << op_name(op) << std::endl;
left->print(indent + 1, to);
right->print(indent + 1, to);
}
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr ftype = env.lookup(op_name(op));
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
type_ptr return_type = mgr.new_type();
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
mgr.unify(arrow_two, ftype);
return return_type;
}
void ast_binop::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
into.push_back(instruction_ptr(new instruction_mkapp()));
into.push_back(instruction_ptr(new instruction_mkapp()));
}
void ast_app::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "APP:" << std::endl;
left->print(indent + 1, to);
right->print(indent + 1, to);
}
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr return_type = mgr.new_type();
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
mgr.unify(arrow, ltype);
return return_type;
}
void ast_app::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_mkapp()));
}
void ast_case::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "CASE: " << std::endl;
for(auto& branch : branches) {
print_indent(indent + 1, to);
branch->pat->print(to);
to << std::endl;
branch->expr->print(indent + 2, to);
}
}
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_var* var;
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
type_ptr branch_type = mgr.new_type();
for(auto& branch : branches) {
type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
mgr.unify(branch_type, curr_branch_type);
}
case_type = mgr.resolve(case_type, var);
if(!dynamic_cast<type_data*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
return branch_type;
}
void ast_case::resolve(const type_mgr& mgr) const {
of->resolve_common(mgr);
for(auto& branch : branches) {
branch->expr->resolve_common(mgr);
}
}
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
of->compile(env, into);
into.push_back(instruction_ptr(new instruction_eval()));
instruction_jump* jump_instruction = new instruction_jump();
into.push_back(instruction_ptr(jump_instruction));
for(auto& branch : branches) {
std::vector<instruction_ptr> branch_instructions;
pattern_var* vpat;
pattern_constr* cpat;
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
jump_instruction->tag_mappings.end())
break;
jump_instruction->tag_mappings[constr_pair.second.tag] =
jump_instruction->branches.size();
}
jump_instruction->branches.push_back(std::move(branch_instructions));
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
env_ptr new_env = env;
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
branch_instructions.push_back(instruction_ptr(new instruction_split()));
branch->expr->compile(new_env, branch_instructions);
branch_instructions.push_back(instruction_ptr(new instruction_slide(
cpat->params.size())));
int new_tag = type->constructors[cpat->constr].tag;
if(jump_instruction->tag_mappings.find(new_tag) !=
jump_instruction->tag_mappings.end())
throw type_error("technically not a type error: duplicate pattern");
jump_instruction->tag_mappings[new_tag] =
jump_instruction->branches.size();
jump_instruction->branches.push_back(std::move(branch_instructions));
}
}
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
jump_instruction->tag_mappings.end())
throw type_error("non-total pattern");
}
}
void pattern_var::print(std::ostream& to) const {
to << var;
}
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
env.bind(var, t);
}
void pattern_constr::print(std::ostream& to) const {
to << constr;
for(auto& param : params) {
to << " " << param;
}
}
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
type_ptr constructor_type = env.lookup(constr);
if(!constructor_type) {
throw type_error(std::string("pattern using unknown constructor ") + constr);
}
for(int i = 0; i < params.size(); i++) {
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
if(!arr) throw type_error("too many parameters in constructor pattern");
env.bind(params[i], arr->left);
constructor_type = arr->right;
}
mgr.unify(t, constructor_type);
}

197
code/compiler/07/ast.hpp Normal file
View File

@@ -0,0 +1,197 @@
#pragma once
#include <memory>
#include <vector>
#include "type.hpp"
#include "type_env.hpp"
#include "binop.hpp"
#include "instruction.hpp"
#include "env.hpp"
struct ast {
type_ptr node_type;
virtual ~ast() = default;
virtual void print(int indent, std::ostream& to) const = 0;
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) const = 0;
virtual void compile(const env_ptr& env,
std::vector<instruction_ptr>& into) const = 0;
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
void resolve_common(const type_mgr& mgr);
};
using ast_ptr = std::unique_ptr<ast>;
struct pattern {
virtual ~pattern() = default;
virtual void print(std::ostream& to) const = 0;
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
};
using pattern_ptr = std::unique_ptr<pattern>;
struct branch {
pattern_ptr pat;
ast_ptr expr;
branch(pattern_ptr p, ast_ptr a)
: pat(std::move(p)), expr(std::move(a)) {}
};
using branch_ptr = std::unique_ptr<branch>;
struct constructor {
std::string name;
std::vector<std::string> types;
int8_t tag;
constructor(std::string n, std::vector<std::string> ts)
: name(std::move(n)), types(std::move(ts)) {}
};
using constructor_ptr = std::unique_ptr<constructor>;
struct definition {
virtual ~definition() = default;
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) = 0;
virtual void compile() = 0;
};
using definition_ptr = std::unique_ptr<definition>;
struct ast_int : public ast {
int value;
explicit ast_int(int v)
: value(v) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_lid : public ast {
std::string id;
explicit ast_lid(std::string i)
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_uid : public ast {
std::string id;
explicit ast_uid(std::string i)
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_binop : public ast {
binop op;
ast_ptr left;
ast_ptr right;
ast_binop(binop o, ast_ptr l, ast_ptr r)
: op(o), left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_app : public ast {
ast_ptr left;
ast_ptr right;
ast_app(ast_ptr l, ast_ptr r)
: left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_case : public ast {
ast_ptr of;
std::vector<branch_ptr> branches;
ast_case(ast_ptr o, std::vector<branch_ptr> b)
: of(std::move(o)), branches(std::move(b)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct pattern_var : public pattern {
std::string var;
pattern_var(std::string v)
: var(std::move(v)) {}
void print(std::ostream &to) const;
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
};
struct pattern_constr : public pattern {
std::string constr;
std::vector<std::string> params;
pattern_constr(std::string c, std::vector<std::string> p)
: constr(std::move(c)), params(std::move(p)) {}
void print(std::ostream &to) const;
void match(type_ptr t, type_mgr&, type_env& env) const;
};
struct definition_defn : public definition {
std::string name;
std::vector<std::string> params;
ast_ptr body;
type_ptr return_type;
std::vector<type_ptr> param_types;
std::vector<instruction_ptr> instructions;
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
}
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr);
void compile();
};
struct definition_data : public definition {
std::string name;
std::vector<constructor_ptr> constructors;
definition_data(std::string n, std::vector<constructor_ptr> cs)
: name(std::move(n)), constructors(std::move(cs)) {}
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr);
void compile();
};

View File

@@ -0,0 +1,21 @@
#include "binop.hpp"
std::string op_name(binop op) {
switch(op) {
case PLUS: return "+";
case MINUS: return "-";
case TIMES: return "*";
case DIVIDE: return "/";
}
return "??";
}
std::string op_action(binop op) {
switch(op) {
case PLUS: return "plus";
case MINUS: return "minus";
case TIMES: return "times";
case DIVIDE: return "divide";
}
return "??";
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <string>
enum binop {
PLUS,
MINUS,
TIMES,
DIVIDE
};
std::string op_name(binop op);
std::string op_action(binop op);

View File

@@ -0,0 +1,83 @@
#include "ast.hpp"
#include "error.hpp"
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
return_type = mgr.new_type();
type_ptr full_type = return_type;
for(auto it = params.rbegin(); it != params.rend(); it++) {
type_ptr param_type = mgr.new_type();
full_type = type_ptr(new type_arr(param_type, full_type));
param_types.push_back(param_type);
}
env.bind(name, full_type);
}
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
type_env new_env = env.scope();
auto param_it = params.begin();
auto type_it = param_types.rbegin();
while(param_it != params.end() && type_it != param_types.rend()) {
new_env.bind(*param_it, *type_it);
param_it++;
type_it++;
}
type_ptr body_type = body->typecheck_common(mgr, new_env);
mgr.unify(return_type, body_type);
}
void definition_defn::resolve(const type_mgr& mgr) {
type_var* var;
body->resolve_common(mgr);
return_type = mgr.resolve(return_type, var);
if(var) throw type_error("ambiguously typed program");
for(auto& param_type : param_types) {
param_type = mgr.resolve(param_type, var);
if(var) throw type_error("ambiguously typed program");
}
}
void definition_defn::compile() {
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
for(auto it = params.rbegin(); it != params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
body->compile(new_env, instructions);
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
}
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
type_data* this_type = new type_data(name);
type_ptr return_type = type_ptr(this_type);
int next_tag = 0;
for(auto& constructor : constructors) {
constructor->tag = next_tag;
this_type->constructors[constructor->name] = { next_tag++ };
type_ptr full_type = return_type;
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
type_ptr type = type_ptr(new type_base(*it));
full_type = type_ptr(new type_arr(type, full_type));
}
env.bind(constructor->name, full_type);
}
}
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
// Nothing
}
void definition_data::resolve(const type_mgr& mgr) {
// Nothing
}
void definition_data::compile() {
}

23
code/compiler/07/env.cpp Normal file
View File

@@ -0,0 +1,23 @@
#include "env.hpp"
int env_var::get_offset(const std::string& name) const {
if(name == this->name) return 0;
if(parent) return parent->get_offset(name) + 1;
throw 0;
}
bool env_var::has_variable(const std::string& name) const {
if(name == this->name) return true;
if(parent) return parent->has_variable(name);
return false;
}
int env_offset::get_offset(const std::string& name) const {
if(parent) return parent->get_offset(name) + offset;
throw 0;
}
bool env_offset::has_variable(const std::string& name) const {
if(parent) return parent->has_variable(name);
return false;
}

34
code/compiler/07/env.hpp Normal file
View File

@@ -0,0 +1,34 @@
#pragma once
#include <memory>
#include <string>
struct env {
virtual ~env() = default;
virtual int get_offset(const std::string& name) const = 0;
virtual bool has_variable(const std::string& name) const = 0;
};
using env_ptr = std::shared_ptr<env>;
struct env_var : public env {
std::string name;
env_ptr parent;
env_var(std::string& n, env_ptr p)
: name(std::move(n)), parent(std::move(p)) {}
int get_offset(const std::string& name) const;
bool has_variable(const std::string& name) const;
};
struct env_offset : public env {
int offset;
env_ptr parent;
env_offset(int o, env_ptr p)
: offset(o), parent(std::move(p)) {}
int get_offset(const std::string& name) const;
bool has_variable(const std::string& name) const;
};

View File

@@ -0,0 +1,5 @@
#include "error.hpp"
const char* type_error::what() const noexcept {
return "an error occured while checking the types of the program";
}

View File

@@ -0,0 +1,21 @@
#pragma once
#include <exception>
#include "type.hpp"
struct type_error : std::exception {
std::string description;
type_error(std::string d)
: description(std::move(d)) {}
const char* what() const noexcept override;
};
struct unification_error : public type_error {
type_ptr left;
type_ptr right;
unification_error(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)),
type_error("failed to unify types") {}
};

View File

@@ -0,0 +1,2 @@
data Bool = { True, False }
defn main = { 3 + True }

View File

@@ -0,0 +1 @@
defn main = { 1 2 3 4 5 }

View File

@@ -0,0 +1,8 @@
data List = { Nil, Cons Int List }
defn head l = {
case l of {
Nil -> { 0 }
Cons x y z -> { x }
}
}

View File

@@ -0,0 +1,31 @@
#include "../runtime.h"
void f_add(struct stack* s) {
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
}
void f_main(struct stack* s) {
// PushInt 320
stack_push(s, (struct node_base*) alloc_num(320));
// PushInt 6
stack_push(s, (struct node_base*) alloc_num(6));
// PushGlobal f_add (the function for +)
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
struct node_base* left;
struct node_base* right;
// MkApp
left = stack_pop(s);
right = stack_pop(s);
stack_push(s, (struct node_base*) alloc_app(left, right));
// MkApp
left = stack_pop(s);
right = stack_pop(s);
stack_push(s, (struct node_base*) alloc_app(left, right));
}

View File

@@ -0,0 +1,2 @@
defn main = { plus 320 6 }
defn plus x y = { x + y }

View File

@@ -0,0 +1,3 @@
defn add x y = { x + y }
defn double x = { add x x }
defn main = { double 163 }

View File

@@ -0,0 +1,7 @@
data List = { Nil, Cons Int List }
defn length l = {
case l of {
Nil -> { 0 }
Cons x xs -> { 1 + length xs }
}
}

View File

@@ -0,0 +1,83 @@
#include "instruction.hpp"
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
void instruction_pushint::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushInt(" << value << ")" << std::endl;
}
void instruction_pushglobal::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushGlobal(" << name << ")" << std::endl;
}
void instruction_push::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Push(" << offset << ")" << std::endl;
}
void instruction_pop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Pop(" << count << ")" << std::endl;
}
void instruction_mkapp::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "MkApp()" << std::endl;
}
void instruction_update::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Update(" << offset << ")" << std::endl;
}
void instruction_pack::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Pack(" << tag << ", " << size << ")" << std::endl;
}
void instruction_split::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Split()" << std::endl;
}
void instruction_jump::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Jump(" << std::endl;
for(auto& instruction_set : branches) {
for(auto& instruction : instruction_set) {
instruction->print(indent + 2, to);
}
to << std::endl;
}
print_indent(indent, to);
to << ")" << std::endl;
}
void instruction_slide::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Slide(" << offset << ")" << std::endl;
}
void instruction_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "BinOp(" << op_action(op) << ")" << std::endl;
}
void instruction_eval::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Eval()" << std::endl;
}
void instruction_alloc::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Alloc(" << amount << ")" << std::endl;
}
void instruction_unwind::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Unwind()" << std::endl;
}

View File

@@ -0,0 +1,120 @@
#pragma once
#include <string>
#include <memory>
#include <vector>
#include <map>
#include <ostream>
#include "binop.hpp"
struct instruction {
virtual ~instruction() = default;
virtual void print(int indent, std::ostream& to) const = 0;
};
using instruction_ptr = std::unique_ptr<instruction>;
struct instruction_pushint : public instruction {
int value;
instruction_pushint(int v)
: value(v) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_pushglobal : public instruction {
std::string name;
instruction_pushglobal(std::string n)
: name(std::move(n)) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_push : public instruction {
int offset;
instruction_push(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_pop : public instruction {
int count;
instruction_pop(int c)
: count(c) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_mkapp : public instruction {
void print(int indent, std::ostream& to) const;
};
struct instruction_update : public instruction {
int offset;
instruction_update(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_pack : public instruction {
int tag;
int size;
instruction_pack(int t, int s)
: tag(t), size(s) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_split : public instruction {
void print(int indent, std::ostream& to) const;
};
struct instruction_jump : public instruction {
std::vector<std::vector<instruction_ptr>> branches;
std::map<int, int> tag_mappings;
void print(int indent, std::ostream& to) const;
};
struct instruction_slide : public instruction {
int offset;
instruction_slide(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_binop : public instruction {
binop op;
instruction_binop(binop o)
: op(o) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_eval : public instruction {
void print(int indent, std::ostream& to) const;
};
struct instruction_alloc : public instruction {
int amount;
instruction_alloc(int a)
: amount(a) {}
void print(int indent, std::ostream& to) const;
};
struct instruction_unwind : public instruction {
void print(int indent, std::ostream& to) const;
};

88
code/compiler/07/main.cpp Normal file
View File

@@ -0,0 +1,88 @@
#include "ast.hpp"
#include <iostream>
#include "parser.hpp"
#include "error.hpp"
#include "type.hpp"
void yy::parser::error(const std::string& msg) {
std::cout << "An error occured: " << msg << std::endl;
}
extern std::vector<definition_ptr> program;
void typecheck_program(
const std::vector<definition_ptr>& prog,
type_mgr& mgr, type_env& env) {
type_ptr int_type = type_ptr(new type_base("Int"));
type_ptr binop_type = type_ptr(new type_arr(
int_type,
type_ptr(new type_arr(int_type, int_type))));
env.bind("+", binop_type);
env.bind("-", binop_type);
env.bind("*", binop_type);
env.bind("/", binop_type);
for(auto& def : prog) {
def->typecheck_first(mgr, env);
}
for(auto& def : prog) {
def->typecheck_second(mgr, env);
}
for(auto& pair : env.names) {
std::cout << pair.first << ": ";
pair.second->print(mgr, std::cout);
std::cout << std::endl;
}
for(auto& def : prog) {
def->resolve(mgr);
}
}
void compile_program(const std::vector<definition_ptr>& prog) {
for(auto& def : prog) {
def->compile();
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
if(!defn) continue;
for(auto& instruction : defn->instructions) {
instruction->print(0, std::cout);
}
std::cout << std::endl;
}
}
int main() {
yy::parser parser;
type_mgr mgr;
type_env env;
parser.parse();
for(auto& definition : program) {
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
if(!def) continue;
std::cout << def->name;
for(auto& param : def->params) std::cout << " " << param;
std::cout << ":" << std::endl;
def->body->print(1, std::cout);
}
try {
typecheck_program(program, mgr, env);
compile_program(program);
} catch(unification_error& err) {
std::cout << "failed to unify types: " << std::endl;
std::cout << " (1) \033[34m";
err.left->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
std::cout << " (2) \033[32m";
err.right->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
} catch(type_error& err) {
std::cout << "failed to type check program: " << err.description << std::endl;
}
}

140
code/compiler/07/parser.y Normal file
View File

@@ -0,0 +1,140 @@
%{
#include <string>
#include <iostream>
#include "ast.hpp"
#include "parser.hpp"
std::vector<definition_ptr> program;
extern yy::parser::symbol_type yylex();
%}
%token PLUS
%token TIMES
%token MINUS
%token DIVIDE
%token <int> INT
%token DEFN
%token DATA
%token CASE
%token OF
%token OCURLY
%token CCURLY
%token OPAREN
%token CPAREN
%token COMMA
%token ARROW
%token EQUAL
%token <std::string> LID
%token <std::string> UID
%language "c++"
%define api.value.type variant
%define api.token.constructor
%type <std::vector<std::string>> lowercaseParams uppercaseParams
%type <std::vector<definition_ptr>> program definitions
%type <std::vector<branch_ptr>> branches
%type <std::vector<constructor_ptr>> constructors
%type <ast_ptr> aAdd aMul case app appBase
%type <definition_ptr> definition defn data
%type <branch_ptr> branch
%type <pattern_ptr> pattern
%type <constructor_ptr> constructor
%start program
%%
program
: definitions { program = std::move($1); }
;
definitions
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
;
definition
: defn { $$ = std::move($1); }
| data { $$ = std::move($1); }
;
defn
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
{ $$ = definition_ptr(
new definition_defn(std::move($2), std::move($3), std::move($6))); }
;
lowercaseParams
: %empty { $$ = std::vector<std::string>(); }
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
;
uppercaseParams
: %empty { $$ = std::vector<std::string>(); }
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
;
aAdd
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
| aMul { $$ = std::move($1); }
;
aMul
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
| app { $$ = std::move($1); }
;
app
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
| appBase { $$ = std::move($1); }
;
appBase
: INT { $$ = ast_ptr(new ast_int($1)); }
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
| OPAREN aAdd CPAREN { $$ = std::move($2); }
| case { $$ = std::move($1); }
;
case
: CASE aAdd OF OCURLY branches CCURLY
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
;
branches
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
;
branch
: pattern ARROW OCURLY aAdd CCURLY
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
;
pattern
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
| UID lowercaseParams
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
;
data
: DATA UID EQUAL OCURLY constructors CCURLY
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
;
constructors
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
| constructor
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
;
constructor
: UID uppercaseParams
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
;

159
code/compiler/07/runtime.c Normal file
View File

@@ -0,0 +1,159 @@
#include <stdint.h>
#include <assert.h>
#include <memory.h>
#include "runtime.h"
struct node_base* alloc_node() {
struct node_base* new_node = malloc(sizeof(struct node_app));
assert(new_node != NULL);
return new_node;
}
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
struct node_app* node = (struct node_app*) alloc_node();
node->base.tag = NODE_APP;
node->left = l;
node->right = r;
return node;
}
struct node_num* alloc_num(int32_t n) {
struct node_num* node = (struct node_num*) alloc_node();
node->base.tag = NODE_NUM;
node->value = n;
return node;
}
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a) {
struct node_global* node = (struct node_global*) alloc_node();
node->base.tag = NODE_GLOBAL;
node->arity = a;
node->function = f;
return node;
}
struct node_ind* alloc_ind(struct node_base* n) {
struct node_ind* node = (struct node_ind*) alloc_node();
node->base.tag = NODE_IND;
node->next = n;
return node;
}
void stack_init(struct stack* s) {
s->size = 4;
s->count = 0;
s->data = malloc(sizeof(*s->data) * s->size);
assert(s->data != NULL);
}
void stack_free(struct stack* s) {
free(s->data);
}
void stack_push(struct stack* s, struct node_base* n) {
while(s->count >= s->size) {
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
assert(s->data != NULL);
}
s->data[s->count++] = n;
}
struct node_base* stack_pop(struct stack* s) {
assert(s->count > 0);
return s->data[--s->count];
}
struct node_base* stack_peek(struct stack* s, size_t o) {
assert(s->count > o);
return s->data[s->count - o - 1];
}
void stack_popn(struct stack* s, size_t n) {
assert(s->count >= n);
s->count -= n;
}
void stack_slide(struct stack* s, size_t n) {
assert(s->count > n);
s->data[s->count - n - 1] = s->data[s->count - 1];
s->count -= n;
}
void stack_update(struct stack* s, size_t o) {
assert(s->count > o + 1);
struct node_ind* ind = (struct node_ind*) s->data[s->count - o - 2];
ind->base.tag = NODE_IND;
ind->next = s->data[s->count -= 1];
}
void stack_alloc(struct stack* s, size_t o) {
while(o--) {
stack_push(s, (struct node_base*) alloc_ind(NULL));
}
}
void stack_pack(struct stack* s, size_t n, int8_t t) {
assert(s->count >= n);
struct node_base** data = malloc(sizeof(*data) * n);
assert(data != NULL);
memcpy(data, &s->data[s->count - 1 - n], n * sizeof(*data));
struct node_data* new_node = (struct node_data*) alloc_node();
new_node->array = data;
new_node->base.tag = NODE_DATA;
new_node->tag = t;
stack_popn(s, n);
stack_push(s, (struct node_base*) new_node);
}
void stack_split(struct stack* s, size_t n) {
struct node_data* node = (struct node_data*) stack_pop(s);
for(size_t i = 0; i < n; i++) {
stack_push(s, node->array[i]);
}
}
void unwind(struct stack* s) {
while(1) {
struct node_base* peek = stack_peek(s, 0);
if(peek->tag == NODE_APP) {
struct node_app* n = (struct node_app*) peek;
stack_push(s, n->left);
} else if(peek->tag == NODE_GLOBAL) {
struct node_global* n = (struct node_global*) peek;
assert(s->count > n->arity);
for(size_t i = 1; i <= n->arity; i++) {
s->data[s->count - i]
= ((struct node_app*) s->data[s->count - i - 1])->right;
}
n->function(s);
} else if(peek->tag == NODE_IND) {
struct node_ind* n = (struct node_ind*) peek;
stack_pop(s);
stack_push(s, n->next);
} else {
break;
}
}
}
struct node_base* eval(struct node_base* n) {
struct stack program_stack;
stack_init(&program_stack);
stack_push(&program_stack, n);
unwind(&program_stack);
struct node_base* result = stack_pop(&program_stack);
stack_free(&program_stack);
return result;
}
extern void f_main(struct stack* s);
int main(int argc, char** argv) {
struct node_global* first_node = alloc_global(f_main, 0);
struct node_base* result = eval((struct node_base*) first_node);
}

View File

@@ -0,0 +1,70 @@
#pragma once
#include <stdlib.h>
struct stack;
enum node_tag {
NODE_APP,
NODE_NUM,
NODE_GLOBAL,
NODE_IND,
NODE_DATA
};
struct node_base {
enum node_tag tag;
};
struct node_app {
struct node_base base;
struct node_base* left;
struct node_base* right;
};
struct node_num {
struct node_base base;
int32_t value;
};
struct node_global {
struct node_base base;
int32_t arity;
void (*function)(struct stack*);
};
struct node_ind {
struct node_base base;
struct node_base* next;
};
struct node_data {
struct node_base base;
int8_t tag;
struct node_base** array;
};
struct node_base* alloc_node();
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
struct node_num* alloc_num(int32_t n);
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a);
struct node_ind* alloc_ind(struct node_base* n);
struct stack {
size_t size;
size_t count;
struct node_base** data;
};
void stack_init(struct stack* s);
void stack_free(struct stack* s);
void stack_push(struct stack* s, struct node_base* n);
struct node_base* stack_pop(struct stack* s);
struct node_base* stack_peek(struct stack* s, size_t o);
void stack_popn(struct stack* s, size_t n);
void stack_slide(struct stack* s, size_t n);
void stack_update(struct stack* s, size_t o);
void stack_alloc(struct stack* s, size_t o);
void stack_pack(struct stack* s, size_t n, int8_t t);
void stack_split(struct stack* s, size_t n);
struct node_base* eval(struct node_base* n);

View File

@@ -0,0 +1,34 @@
%option noyywrap
%{
#include <iostream>
#include "ast.hpp"
#include "parser.hpp"
#define YY_DECL yy::parser::symbol_type yylex()
%}
%%
[ \n]+ {}
\+ { return yy::parser::make_PLUS(); }
\* { return yy::parser::make_TIMES(); }
- { return yy::parser::make_MINUS(); }
\/ { return yy::parser::make_DIVIDE(); }
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
defn { return yy::parser::make_DEFN(); }
data { return yy::parser::make_DATA(); }
case { return yy::parser::make_CASE(); }
of { return yy::parser::make_OF(); }
\{ { return yy::parser::make_OCURLY(); }
\} { return yy::parser::make_CCURLY(); }
\( { return yy::parser::make_OPAREN(); }
\) { return yy::parser::make_CPAREN(); }
, { return yy::parser::make_COMMA(); }
-> { return yy::parser::make_ARROW(); }
= { return yy::parser::make_EQUAL(); }
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
%%

99
code/compiler/07/type.cpp Normal file
View File

@@ -0,0 +1,99 @@
#include "type.hpp"
#include <sstream>
#include <algorithm>
#include "error.hpp"
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
auto it = mgr.types.find(name);
if(it != mgr.types.end()) {
it->second->print(mgr, to);
} else {
to << name;
}
}
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
to << name;
}
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
left->print(mgr, to);
to << " -> (";
right->print(mgr, to);
to << ")";
}
std::string type_mgr::new_type_name() {
int temp = last_id++;
std::string str = "";
while(temp != -1) {
str += (char) ('a' + (temp % 26));
temp = temp / 26 - 1;
}
std::reverse(str.begin(), str.end());
return str;
}
type_ptr type_mgr::new_type() {
return type_ptr(new type_var(new_type_name()));
}
type_ptr type_mgr::new_arrow_type() {
return type_ptr(new type_arr(new_type(), new_type()));
}
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
type_var* cast;
var = nullptr;
while((cast = dynamic_cast<type_var*>(t.get()))) {
auto it = types.find(cast->name);
if(it == types.end()) {
var = cast;
break;
}
t = it->second;
}
return t;
}
void type_mgr::unify(type_ptr l, type_ptr r) {
type_var* lvar;
type_var* rvar;
type_arr* larr;
type_arr* rarr;
type_base* lid;
type_base* rid;
l = resolve(l, lvar);
r = resolve(r, rvar);
if(lvar) {
bind(lvar->name, r);
return;
} else if(rvar) {
bind(rvar->name, l);
return;
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
(rarr = dynamic_cast<type_arr*>(r.get()))) {
unify(larr->left, rarr->left);
unify(larr->right, rarr->right);
return;
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
(rid = dynamic_cast<type_base*>(r.get()))) {
if(lid->name == rid->name) return;
}
throw unification_error(l, r);
}
void type_mgr::bind(const std::string& s, type_ptr t) {
type_var* other = dynamic_cast<type_var*>(t.get());
if(other && other->name == s) return;
types[s] = t;
}

65
code/compiler/07/type.hpp Normal file
View File

@@ -0,0 +1,65 @@
#pragma once
#include <memory>
#include <map>
struct type_mgr;
struct type {
virtual ~type() = default;
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
};
using type_ptr = std::shared_ptr<type>;
struct type_var : public type {
std::string name;
type_var(std::string n)
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_base : public type {
std::string name;
type_base(std::string n)
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_data : public type_base {
struct constructor {
int tag;
};
std::map<std::string, constructor> constructors;
type_data(std::string n)
: type_base(std::move(n)) {}
};
struct type_arr : public type {
type_ptr left;
type_ptr right;
type_arr(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_mgr {
int last_id = 0;
std::map<std::string, type_ptr> types;
std::string new_type_name();
type_ptr new_type();
type_ptr new_arrow_type();
void unify(type_ptr l, type_ptr r);
type_ptr resolve(type_ptr t, type_var*& var) const;
void bind(const std::string& s, type_ptr t);
};

View File

@@ -0,0 +1,16 @@
#include "type_env.hpp"
type_ptr type_env::lookup(const std::string& name) const {
auto it = names.find(name);
if(it != names.end()) return it->second;
if(parent) return parent->lookup(name);
return nullptr;
}
void type_env::bind(const std::string& name, type_ptr t) {
names[name] = t;
}
type_env type_env::scope() const {
return type_env(this);
}

View File

@@ -0,0 +1,16 @@
#pragma once
#include <map>
#include "type.hpp"
struct type_env {
std::map<std::string, type_ptr> names;
type_env const* parent = nullptr;
type_env(type_env const* p)
: parent(p) {}
type_env() : type_env(nullptr) {}
type_ptr lookup(const std::string& name) const;
void bind(const std::string& name, type_ptr t);
type_env scope() const;
};

View File

@@ -0,0 +1,42 @@
cmake_minimum_required(VERSION 3.1)
project(compiler)
# Find all the required packages
find_package(BISON)
find_package(FLEX)
find_package(LLVM REQUIRED CONFIG)
# Set up the flex and bison targets
bison_target(parser
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
COMPILE_FLAGS "-d")
flex_target(scanner
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
add_flex_bison_dependency(scanner parser)
# Find all the relevant LLVM components
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
# Create compiler executable
add_executable(compiler
ast.cpp ast.hpp definition.cpp
llvm_context.cpp llvm_context.hpp
type_env.cpp type_env.hpp
env.cpp env.hpp
type.cpp type.hpp
error.cpp error.hpp
binop.cpp binop.hpp
instruction.cpp instruction.hpp
${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS}
main.cpp
)
# Configure compiler executable
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS})
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
target_link_libraries(compiler ${LLVM_LIBS})

264
code/compiler/08/ast.cpp Normal file
View File

@@ -0,0 +1,264 @@
#include "ast.hpp"
#include <ostream>
#include "binop.hpp"
#include "error.hpp"
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
node_type = typecheck(mgr, env);
return node_type;
}
void ast::resolve_common(const type_mgr& mgr) {
type_var* var;
type_ptr resolved_type = mgr.resolve(node_type, var);
if(var) throw type_error("ambiguously typed program");
resolve(mgr);
node_type = std::move(resolved_type);
}
void ast_int::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "INT: " << value << std::endl;
}
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
return type_ptr(new type_base("Int"));
}
void ast_int::resolve(const type_mgr& mgr) const {
}
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushint(value)));
}
void ast_lid::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "LID: " << id << std::endl;
}
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_lid::resolve(const type_mgr& mgr) const {
}
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(
env->has_variable(id) ?
(instruction*) new instruction_push(env->get_offset(id)) :
(instruction*) new instruction_pushglobal(id)));
}
void ast_uid::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "UID: " << id << std::endl;
}
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_uid::resolve(const type_mgr& mgr) const {
}
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
}
void ast_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "BINOP: " << op_name(op) << std::endl;
left->print(indent + 1, to);
right->print(indent + 1, to);
}
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr ftype = env.lookup(op_name(op));
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
type_ptr return_type = mgr.new_type();
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
mgr.unify(arrow_two, ftype);
return return_type;
}
void ast_binop::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
into.push_back(instruction_ptr(new instruction_mkapp()));
into.push_back(instruction_ptr(new instruction_mkapp()));
}
void ast_app::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "APP:" << std::endl;
left->print(indent + 1, to);
right->print(indent + 1, to);
}
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr return_type = mgr.new_type();
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
mgr.unify(arrow, ltype);
return return_type;
}
void ast_app::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_mkapp()));
}
void ast_case::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "CASE: " << std::endl;
for(auto& branch : branches) {
print_indent(indent + 1, to);
branch->pat->print(to);
to << std::endl;
branch->expr->print(indent + 2, to);
}
}
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_var* var;
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
type_ptr branch_type = mgr.new_type();
for(auto& branch : branches) {
type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
mgr.unify(branch_type, curr_branch_type);
}
case_type = mgr.resolve(case_type, var);
if(!dynamic_cast<type_data*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
return branch_type;
}
void ast_case::resolve(const type_mgr& mgr) const {
of->resolve_common(mgr);
for(auto& branch : branches) {
branch->expr->resolve_common(mgr);
}
}
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
of->compile(env, into);
into.push_back(instruction_ptr(new instruction_eval()));
instruction_jump* jump_instruction = new instruction_jump();
into.push_back(instruction_ptr(jump_instruction));
for(auto& branch : branches) {
std::vector<instruction_ptr> branch_instructions;
pattern_var* vpat;
pattern_constr* cpat;
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
jump_instruction->tag_mappings.end())
break;
jump_instruction->tag_mappings[constr_pair.second.tag] =
jump_instruction->branches.size();
}
jump_instruction->branches.push_back(std::move(branch_instructions));
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
env_ptr new_env = env;
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
branch_instructions.push_back(instruction_ptr(new instruction_split(
cpat->params.size())));
branch->expr->compile(new_env, branch_instructions);
branch_instructions.push_back(instruction_ptr(new instruction_slide(
cpat->params.size())));
int new_tag = type->constructors[cpat->constr].tag;
if(jump_instruction->tag_mappings.find(new_tag) !=
jump_instruction->tag_mappings.end())
throw type_error("technically not a type error: duplicate pattern");
jump_instruction->tag_mappings[new_tag] =
jump_instruction->branches.size();
jump_instruction->branches.push_back(std::move(branch_instructions));
}
}
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
jump_instruction->tag_mappings.end())
throw type_error("non-total pattern");
}
}
void pattern_var::print(std::ostream& to) const {
to << var;
}
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
env.bind(var, t);
}
void pattern_constr::print(std::ostream& to) const {
to << constr;
for(auto& param : params) {
to << " " << param;
}
}
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
type_ptr constructor_type = env.lookup(constr);
if(!constructor_type) {
throw type_error(std::string("pattern using unknown constructor ") + constr);
}
for(int i = 0; i < params.size(); i++) {
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
if(!arr) throw type_error("too many parameters in constructor pattern");
env.bind(params[i], arr->left);
constructor_type = arr->right;
}
mgr.unify(t, constructor_type);
}

141
code/compiler/08/ast.hpp Normal file
View File

@@ -0,0 +1,141 @@
#pragma once
#include <memory>
#include <vector>
#include "type.hpp"
#include "type_env.hpp"
#include "binop.hpp"
#include "instruction.hpp"
#include "env.hpp"
struct ast {
type_ptr node_type;
virtual ~ast() = default;
virtual void print(int indent, std::ostream& to) const = 0;
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) const = 0;
virtual void compile(const env_ptr& env,
std::vector<instruction_ptr>& into) const = 0;
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
void resolve_common(const type_mgr& mgr);
};
using ast_ptr = std::unique_ptr<ast>;
struct pattern {
virtual ~pattern() = default;
virtual void print(std::ostream& to) const = 0;
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
};
using pattern_ptr = std::unique_ptr<pattern>;
struct branch {
pattern_ptr pat;
ast_ptr expr;
branch(pattern_ptr p, ast_ptr a)
: pat(std::move(p)), expr(std::move(a)) {}
};
using branch_ptr = std::unique_ptr<branch>;
struct ast_int : public ast {
int value;
explicit ast_int(int v)
: value(v) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_lid : public ast {
std::string id;
explicit ast_lid(std::string i)
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_uid : public ast {
std::string id;
explicit ast_uid(std::string i)
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_binop : public ast {
binop op;
ast_ptr left;
ast_ptr right;
ast_binop(binop o, ast_ptr l, ast_ptr r)
: op(o), left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_app : public ast {
ast_ptr left;
ast_ptr right;
ast_app(ast_ptr l, ast_ptr r)
: left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_case : public ast {
ast_ptr of;
std::vector<branch_ptr> branches;
ast_case(ast_ptr o, std::vector<branch_ptr> b)
: of(std::move(o)), branches(std::move(b)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct pattern_var : public pattern {
std::string var;
pattern_var(std::string v)
: var(std::move(v)) {}
void print(std::ostream &to) const;
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
};
struct pattern_constr : public pattern {
std::string constr;
std::vector<std::string> params;
pattern_constr(std::string c, std::vector<std::string> p)
: constr(std::move(c)), params(std::move(p)) {}
void print(std::ostream &to) const;
void match(type_ptr t, type_mgr&, type_env& env) const;
};

View File

@@ -0,0 +1,21 @@
#include "binop.hpp"
std::string op_name(binop op) {
switch(op) {
case PLUS: return "+";
case MINUS: return "-";
case TIMES: return "*";
case DIVIDE: return "/";
}
return "??";
}
std::string op_action(binop op) {
switch(op) {
case PLUS: return "plus";
case MINUS: return "minus";
case TIMES: return "times";
case DIVIDE: return "divide";
}
return "??";
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <string>
enum binop {
PLUS,
MINUS,
TIMES,
DIVIDE
};
std::string op_name(binop op);
std::string op_action(binop op);

View File

@@ -0,0 +1,116 @@
#include "definition.hpp"
#include "error.hpp"
#include "ast.hpp"
#include "llvm_context.hpp"
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Type.h>
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
return_type = mgr.new_type();
type_ptr full_type = return_type;
for(auto it = params.rbegin(); it != params.rend(); it++) {
type_ptr param_type = mgr.new_type();
full_type = type_ptr(new type_arr(param_type, full_type));
param_types.push_back(param_type);
}
env.bind(name, full_type);
}
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
type_env new_env = env.scope();
auto param_it = params.begin();
auto type_it = param_types.rbegin();
while(param_it != params.end() && type_it != param_types.rend()) {
new_env.bind(*param_it, *type_it);
param_it++;
type_it++;
}
type_ptr body_type = body->typecheck_common(mgr, new_env);
mgr.unify(return_type, body_type);
}
void definition_defn::resolve(const type_mgr& mgr) {
type_var* var;
body->resolve_common(mgr);
return_type = mgr.resolve(return_type, var);
if(var) throw type_error("ambiguously typed program");
for(auto& param_type : param_types) {
param_type = mgr.resolve(param_type, var);
if(var) throw type_error("ambiguously typed program");
}
}
void definition_defn::compile() {
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
for(auto it = params.rbegin(); it != params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
body->compile(new_env, instructions);
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
}
void definition_defn::gen_llvm_first(llvm_context& ctx) {
generated_function = ctx.create_custom_function(name, params.size());
}
void definition_defn::gen_llvm_second(llvm_context& ctx) {
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
for(auto& instruction : instructions) {
instruction->gen_llvm(ctx, generated_function);
}
ctx.builder.CreateRetVoid();
}
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
type_data* this_type = new type_data(name);
type_ptr return_type = type_ptr(this_type);
int next_tag = 0;
for(auto& constructor : constructors) {
constructor->tag = next_tag;
this_type->constructors[constructor->name] = { next_tag++ };
type_ptr full_type = return_type;
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
type_ptr type = type_ptr(new type_base(*it));
full_type = type_ptr(new type_arr(type, full_type));
}
env.bind(constructor->name, full_type);
}
}
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
// Nothing
}
void definition_data::resolve(const type_mgr& mgr) {
// Nothing
}
void definition_data::compile() {
}
void definition_data::gen_llvm_first(llvm_context& ctx) {
for(auto& constructor : constructors) {
auto new_function =
ctx.create_custom_function(constructor->name, constructor->types.size());
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
ctx.create_pack(new_function,
ctx.create_size(constructor->types.size()),
ctx.create_i8(constructor->tag)
);
ctx.builder.CreateRetVoid();
}
}
void definition_data::gen_llvm_second(llvm_context& ctx) {
// Nothing
}

View File

@@ -0,0 +1,73 @@
#pragma once
#include <memory>
#include <vector>
#include "instruction.hpp"
#include "llvm_context.hpp"
#include "type_env.hpp"
struct ast;
using ast_ptr = std::unique_ptr<ast>;
struct definition {
virtual ~definition() = default;
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) = 0;
virtual void compile() = 0;
virtual void gen_llvm_first(llvm_context& ctx) = 0;
virtual void gen_llvm_second(llvm_context& ctx) = 0;
};
using definition_ptr = std::unique_ptr<definition>;
struct constructor {
std::string name;
std::vector<std::string> types;
int8_t tag;
constructor(std::string n, std::vector<std::string> ts)
: name(std::move(n)), types(std::move(ts)) {}
};
using constructor_ptr = std::unique_ptr<constructor>;
struct definition_defn : public definition {
std::string name;
std::vector<std::string> params;
ast_ptr body;
type_ptr return_type;
std::vector<type_ptr> param_types;
std::vector<instruction_ptr> instructions;
llvm::Function* generated_function;
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
}
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr);
void compile();
void gen_llvm_first(llvm_context& ctx);
void gen_llvm_second(llvm_context& ctx);
};
struct definition_data : public definition {
std::string name;
std::vector<constructor_ptr> constructors;
definition_data(std::string n, std::vector<constructor_ptr> cs)
: name(std::move(n)), constructors(std::move(cs)) {}
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr);
void compile();
void gen_llvm_first(llvm_context& ctx);
void gen_llvm_second(llvm_context& ctx);
};

23
code/compiler/08/env.cpp Normal file
View File

@@ -0,0 +1,23 @@
#include "env.hpp"
int env_var::get_offset(const std::string& name) const {
if(name == this->name) return 0;
if(parent) return parent->get_offset(name) + 1;
throw 0;
}
bool env_var::has_variable(const std::string& name) const {
if(name == this->name) return true;
if(parent) return parent->has_variable(name);
return false;
}
int env_offset::get_offset(const std::string& name) const {
if(parent) return parent->get_offset(name) + offset;
throw 0;
}
bool env_offset::has_variable(const std::string& name) const {
if(parent) return parent->has_variable(name);
return false;
}

34
code/compiler/08/env.hpp Normal file
View File

@@ -0,0 +1,34 @@
#pragma once
#include <memory>
#include <string>
struct env {
virtual ~env() = default;
virtual int get_offset(const std::string& name) const = 0;
virtual bool has_variable(const std::string& name) const = 0;
};
using env_ptr = std::shared_ptr<env>;
struct env_var : public env {
std::string name;
env_ptr parent;
env_var(std::string& n, env_ptr p)
: name(std::move(n)), parent(std::move(p)) {}
int get_offset(const std::string& name) const;
bool has_variable(const std::string& name) const;
};
struct env_offset : public env {
int offset;
env_ptr parent;
env_offset(int o, env_ptr p)
: offset(o), parent(std::move(p)) {}
int get_offset(const std::string& name) const;
bool has_variable(const std::string& name) const;
};

View File

@@ -0,0 +1,5 @@
#include "error.hpp"
const char* type_error::what() const noexcept {
return "an error occured while checking the types of the program";
}

View File

@@ -0,0 +1,21 @@
#pragma once
#include <exception>
#include "type.hpp"
struct type_error : std::exception {
std::string description;
type_error(std::string d)
: description(std::move(d)) {}
const char* what() const noexcept override;
};
struct unification_error : public type_error {
type_ptr left;
type_ptr right;
unification_error(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)),
type_error("failed to unify types") {}
};

View File

@@ -0,0 +1,2 @@
data Bool = { True, False }
defn main = { 3 + True }

View File

@@ -0,0 +1 @@
defn main = { 1 2 3 4 5 }

View File

@@ -0,0 +1,8 @@
data List = { Nil, Cons Int List }
defn head l = {
case l of {
Nil -> { 0 }
Cons x y z -> { x }
}
}

View File

@@ -0,0 +1,31 @@
#include "../runtime.h"
void f_add(struct stack* s) {
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
}
void f_main(struct stack* s) {
// PushInt 320
stack_push(s, (struct node_base*) alloc_num(320));
// PushInt 6
stack_push(s, (struct node_base*) alloc_num(6));
// PushGlobal f_add (the function for +)
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
struct node_base* left;
struct node_base* right;
// MkApp
left = stack_pop(s);
right = stack_pop(s);
stack_push(s, (struct node_base*) alloc_app(left, right));
// MkApp
left = stack_pop(s);
right = stack_pop(s);
stack_push(s, (struct node_base*) alloc_app(left, right));
}

View File

@@ -0,0 +1,3 @@
defn main = { sum 320 6 }
defn sum x y = { x + y }

View File

@@ -0,0 +1,3 @@
defn add x y = { x + y }
defn double x = { add x x }
defn main = { double 163 }

View File

@@ -0,0 +1,8 @@
data List = { Nil, Cons Int List }
defn length l = {
case l of {
Nil -> { 0 }
Cons x xs -> { 1 + length xs }
}
}
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) }

View File

@@ -0,0 +1,16 @@
data List = { Nil, Cons Int List }
defn add x y = { x + y }
defn mul x y = { x * y }
defn foldr f b l = {
case l of {
Nil -> { b }
Cons x xs -> { f x (foldr f b xs) }
}
}
defn main = {
foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +
foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))))
}

View File

@@ -0,0 +1,17 @@
data List = { Nil, Cons Int List }
defn sumZip l m = {
case l of {
Nil -> { 0 }
Cons x xs -> {
case m of {
Nil -> { 0 }
Cons y ys -> { x + y + sumZip xs ys }
}
}
}
}
defn ones = { Cons 1 ones }
defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) }

View File

@@ -0,0 +1,177 @@
#include "instruction.hpp"
#include "llvm_context.hpp"
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Function.h>
using namespace llvm;
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
void instruction_pushint::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushInt(" << value << ")" << std::endl;
}
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_push(f, ctx.create_num(ctx.create_i32(value)));
}
void instruction_pushglobal::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushGlobal(" << name << ")" << std::endl;
}
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
auto& global_f = ctx.custom_functions.at("f_" + name);
auto arity = ctx.create_i32(global_f->arity);
ctx.create_push(f, ctx.create_global(global_f->function, arity));
}
void instruction_push::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Push(" << offset << ")" << std::endl;
}
void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset)));
}
void instruction_pop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Pop(" << count << ")" << std::endl;
}
void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_popn(f, ctx.create_size(count));
}
void instruction_mkapp::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "MkApp()" << std::endl;
}
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
auto left = ctx.create_pop(f);
auto right = ctx.create_pop(f);
ctx.create_push(f, ctx.create_app(left, right));
}
void instruction_update::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Update(" << offset << ")" << std::endl;
}
void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_update(f, ctx.create_size(offset));
}
void instruction_pack::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Pack(" << tag << ", " << size << ")" << std::endl;
}
void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag));
}
void instruction_split::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Split()" << std::endl;
}
void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_split(f, ctx.create_size(size));
}
void instruction_jump::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Jump(" << std::endl;
for(auto& instruction_set : branches) {
for(auto& instruction : instruction_set) {
instruction->print(indent + 2, to);
}
to << std::endl;
}
print_indent(indent, to);
to << ")" << std::endl;
}
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
auto top_node = ctx.create_peek(f, ctx.create_size(0));
auto tag = ctx.unwrap_data_tag(top_node);
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
std::vector<BasicBlock*> blocks;
for(auto& branch : branches) {
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
ctx.builder.SetInsertPoint(branch_block);
for(auto& instruction : branch) {
instruction->gen_llvm(ctx, f);
}
ctx.builder.CreateBr(safety_block);
blocks.push_back(branch_block);
}
for(auto& mapping : tag_mappings) {
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
}
ctx.builder.SetInsertPoint(safety_block);
}
void instruction_slide::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Slide(" << offset << ")" << std::endl;
}
void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_slide(f, ctx.create_size(offset));
}
void instruction_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "BinOp(" << op_action(op) << ")" << std::endl;
}
void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
auto left_int = ctx.unwrap_num(ctx.create_pop(f));
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
llvm::Value* result;
switch(op) {
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
}
ctx.create_push(f, ctx.create_num(result));
}
void instruction_eval::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Eval()" << std::endl;
}
void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_push(f, ctx.create_eval(ctx.create_pop(f)));
}
void instruction_alloc::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Alloc(" << amount << ")" << std::endl;
}
void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_alloc(f, ctx.create_size(amount));
}
void instruction_unwind::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Unwind()" << std::endl;
}
void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const {
// Nothing
}

View File

@@ -0,0 +1,142 @@
#pragma once
#include <llvm/IR/Function.h>
#include <string>
#include <memory>
#include <vector>
#include <map>
#include <ostream>
#include "binop.hpp"
#include "llvm_context.hpp"
struct instruction {
virtual ~instruction() = default;
virtual void print(int indent, std::ostream& to) const = 0;
virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0;
};
using instruction_ptr = std::unique_ptr<instruction>;
struct instruction_pushint : public instruction {
int value;
instruction_pushint(int v)
: value(v) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_pushglobal : public instruction {
std::string name;
instruction_pushglobal(std::string n)
: name(std::move(n)) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_push : public instruction {
int offset;
instruction_push(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_pop : public instruction {
int count;
instruction_pop(int c)
: count(c) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_mkapp : public instruction {
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_update : public instruction {
int offset;
instruction_update(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_pack : public instruction {
int tag;
int size;
instruction_pack(int t, int s)
: tag(t), size(s) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_split : public instruction {
int size;
instruction_split(int s)
: size(s) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_jump : public instruction {
std::vector<std::vector<instruction_ptr>> branches;
std::map<int, int> tag_mappings;
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_slide : public instruction {
int offset;
instruction_slide(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_binop : public instruction {
binop op;
instruction_binop(binop o)
: op(o) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_eval : public instruction {
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_alloc : public instruction {
int amount;
instruction_alloc(int a)
: amount(a) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_unwind : public instruction {
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};

View File

@@ -0,0 +1,252 @@
#include "llvm_context.hpp"
#include <llvm/IR/DerivedTypes.h>
using namespace llvm;
void llvm_context::create_types() {
stack_type = StructType::create(ctx, "stack");
stack_ptr_type = PointerType::getUnqual(stack_type);
tag_type = IntegerType::getInt8Ty(ctx);
struct_types["node_base"] = StructType::create(ctx, "node_base");
struct_types["node_app"] = StructType::create(ctx, "node_app");
struct_types["node_num"] = StructType::create(ctx, "node_num");
struct_types["node_global"] = StructType::create(ctx, "node_global");
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
struct_types["node_data"] = StructType::create(ctx, "node_data");
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
function_type = FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false);
struct_types.at("node_base")->setBody(
IntegerType::getInt32Ty(ctx)
);
struct_types.at("node_app")->setBody(
struct_types.at("node_base"),
node_ptr_type,
node_ptr_type
);
struct_types.at("node_num")->setBody(
struct_types.at("node_base"),
IntegerType::getInt32Ty(ctx)
);
struct_types.at("node_global")->setBody(
struct_types.at("node_base"),
FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false)
);
struct_types.at("node_ind")->setBody(
struct_types.at("node_base"),
node_ptr_type
);
struct_types.at("node_data")->setBody(
struct_types.at("node_base"),
IntegerType::getInt8Ty(ctx),
PointerType::getUnqual(node_ptr_type)
);
}
void llvm_context::create_functions() {
auto void_type = Type::getVoidTy(ctx);
auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8);
functions["stack_init"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_init",
&module
);
functions["stack_free"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_free",
&module
);
functions["stack_push"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_push",
&module
);
functions["stack_pop"] = Function::Create(
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_pop",
&module
);
functions["stack_peek"] = Function::Create(
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_peek",
&module
);
functions["stack_popn"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_popn",
&module
);
functions["stack_slide"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_slide",
&module
);
functions["stack_update"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_update",
&module
);
functions["stack_alloc"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_alloc",
&module
);
functions["stack_pack"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type, sizet_type, tag_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_pack",
&module
);
functions["stack_split"] = Function::Create(
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_split",
&module
);
auto int32_type = IntegerType::getInt32Ty(ctx);
functions["alloc_app"] = Function::Create(
FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"alloc_app",
&module
);
functions["alloc_num"] = Function::Create(
FunctionType::get(node_ptr_type, { int32_type }, false),
Function::LinkageTypes::ExternalLinkage,
"alloc_num",
&module
);
functions["alloc_global"] = Function::Create(
FunctionType::get(node_ptr_type, { function_type, int32_type }, false),
Function::LinkageTypes::ExternalLinkage,
"alloc_global",
&module
);
functions["alloc_ind"] = Function::Create(
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"alloc_ind",
&module
);
functions["eval"] = Function::Create(
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"eval",
&module
);
}
ConstantInt* llvm_context::create_i8(int8_t i) {
return ConstantInt::get(ctx, APInt(8, i));
}
ConstantInt* llvm_context::create_i32(int32_t i) {
return ConstantInt::get(ctx, APInt(32, i));
}
ConstantInt* llvm_context::create_size(size_t i) {
return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i));
}
Value* llvm_context::create_pop(Function* f) {
auto pop_f = functions.at("stack_pop");
return builder.CreateCall(pop_f, { f->arg_begin() });
}
Value* llvm_context::create_peek(Function* f, Value* off) {
auto peek_f = functions.at("stack_peek");
return builder.CreateCall(peek_f, { f->arg_begin(), off });
}
void llvm_context::create_push(Function* f, Value* v) {
auto push_f = functions.at("stack_push");
builder.CreateCall(push_f, { f->arg_begin(), v });
}
void llvm_context::create_popn(Function* f, Value* off) {
auto popn_f = functions.at("stack_popn");
builder.CreateCall(popn_f, { f->arg_begin(), off });
}
void llvm_context::create_update(Function* f, Value* off) {
auto update_f = functions.at("stack_update");
builder.CreateCall(update_f, { f->arg_begin(), off });
}
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
auto pack_f = functions.at("stack_pack");
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
}
void llvm_context::create_split(Function* f, Value* c) {
auto split_f = functions.at("stack_split");
builder.CreateCall(split_f, { f->arg_begin(), c });
}
void llvm_context::create_slide(Function* f, Value* off) {
auto slide_f = functions.at("stack_slide");
builder.CreateCall(slide_f, { f->arg_begin(), off });
}
void llvm_context::create_alloc(Function* f, Value* n) {
auto alloc_f = functions.at("stack_alloc");
builder.CreateCall(alloc_f, { f->arg_begin(), n });
}
Value* llvm_context::create_eval(Value* e) {
auto eval_f = functions.at("eval");
return builder.CreateCall(eval_f, { e });
}
Value* llvm_context::unwrap_num(Value* v) {
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
auto cast = builder.CreatePointerCast(v, num_ptr_type);
auto offset_0 = create_i32(0);
auto offset_1 = create_i32(1);
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
return builder.CreateLoad(int_ptr);
}
Value* llvm_context::create_num(Value* v) {
auto alloc_num_f = functions.at("alloc_num");
return builder.CreateCall(alloc_num_f, { v });
}
Value* llvm_context::unwrap_data_tag(Value* v) {
auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data"));
auto cast = builder.CreatePointerCast(v, data_ptr_type);
auto offset_0 = create_i32(0);
auto offset_1 = create_i32(1);
auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
return builder.CreateLoad(tag_ptr);
}
Value* llvm_context::create_global(Value* f, Value* a) {
auto alloc_global_f = functions.at("alloc_global");
return builder.CreateCall(alloc_global_f, { f, a });
}
Value* llvm_context::create_app(Value* l, Value* r) {
auto alloc_app_f = functions.at("alloc_app");
return builder.CreateCall(alloc_app_f, { l, r });
}
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
auto void_type = llvm::Type::getVoidTy(ctx);
auto function_type =
llvm::FunctionType::get(void_type, { stack_ptr_type }, false);
auto new_function = llvm::Function::Create(
function_type,
llvm::Function::LinkageTypes::ExternalLinkage,
"f_" + name,
&module
);
auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function);
auto new_custom_f = custom_function_ptr(new custom_function());
new_custom_f->arity = arity;
new_custom_f->function = new_function;
custom_functions["f_" + name] = std::move(new_custom_f);
return new_function;
}

View File

@@ -0,0 +1,66 @@
#pragma once
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Module.h>
#include <map>
struct llvm_context {
struct custom_function {
llvm::Function* function;
int32_t arity;
};
using custom_function_ptr = std::unique_ptr<custom_function>;
llvm::LLVMContext ctx;
llvm::IRBuilder<> builder;
llvm::Module module;
std::map<std::string, custom_function_ptr> custom_functions;
std::map<std::string, llvm::Function*> functions;
std::map<std::string, llvm::StructType*> struct_types;
llvm::StructType* stack_type;
llvm::PointerType* stack_ptr_type;
llvm::PointerType* node_ptr_type;
llvm::IntegerType* tag_type;
llvm::FunctionType* function_type;
llvm_context()
: builder(ctx), module("bloglang", ctx) {
create_types();
create_functions();
}
void create_types();
void create_functions();
llvm::ConstantInt* create_i8(int8_t);
llvm::ConstantInt* create_i32(int32_t);
llvm::ConstantInt* create_size(size_t);
llvm::Value* create_pop(llvm::Function*);
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
void create_push(llvm::Function*, llvm::Value*);
void create_popn(llvm::Function*, llvm::Value*);
void create_update(llvm::Function*, llvm::Value*);
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
void create_split(llvm::Function*, llvm::Value*);
void create_slide(llvm::Function*, llvm::Value*);
void create_alloc(llvm::Function*, llvm::Value*);
llvm::Value* create_eval(llvm::Value*);
llvm::Value* unwrap_num(llvm::Value*);
llvm::Value* create_num(llvm::Value*);
llvm::Value* unwrap_data_tag(llvm::Value*);
llvm::Value* create_global(llvm::Value*, llvm::Value*);
llvm::Value* create_app(llvm::Value*, llvm::Value*);
llvm::Function* create_custom_function(std::string name, int32_t arity);
};

174
code/compiler/08/main.cpp Normal file
View File

@@ -0,0 +1,174 @@
#include "ast.hpp"
#include <iostream>
#include "binop.hpp"
#include "definition.hpp"
#include "instruction.hpp"
#include "llvm_context.hpp"
#include "parser.hpp"
#include "error.hpp"
#include "type.hpp"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetMachine.h"
void yy::parser::error(const std::string& msg) {
std::cout << "An error occured: " << msg << std::endl;
}
extern std::vector<definition_ptr> program;
void typecheck_program(
const std::vector<definition_ptr>& prog,
type_mgr& mgr, type_env& env) {
type_ptr int_type = type_ptr(new type_base("Int"));
type_ptr binop_type = type_ptr(new type_arr(
int_type,
type_ptr(new type_arr(int_type, int_type))));
env.bind("+", binop_type);
env.bind("-", binop_type);
env.bind("*", binop_type);
env.bind("/", binop_type);
for(auto& def : prog) {
def->typecheck_first(mgr, env);
}
for(auto& def : prog) {
def->typecheck_second(mgr, env);
}
for(auto& pair : env.names) {
std::cout << pair.first << ": ";
pair.second->print(mgr, std::cout);
std::cout << std::endl;
}
for(auto& def : prog) {
def->resolve(mgr);
}
}
void compile_program(const std::vector<definition_ptr>& prog) {
for(auto& def : prog) {
def->compile();
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
if(!defn) continue;
for(auto& instruction : defn->instructions) {
instruction->print(0, std::cout);
}
std::cout << std::endl;
}
}
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
auto new_function = ctx.create_custom_function(op_action(op), 2);
std::vector<instruction_ptr> instructions;
instructions.push_back(instruction_ptr(new instruction_push(1)));
instructions.push_back(instruction_ptr(new instruction_eval()));
instructions.push_back(instruction_ptr(new instruction_push(1)));
instructions.push_back(instruction_ptr(new instruction_eval()));
instructions.push_back(instruction_ptr(new instruction_binop(op)));
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
for(auto& instruction : instructions) {
instruction->gen_llvm(ctx, new_function);
}
ctx.builder.CreateRetVoid();
}
void output_llvm(llvm_context& ctx, const std::string& filename) {
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmParser();
llvm::InitializeNativeTargetAsmPrinter();
std::string error;
const llvm::Target* target =
llvm::TargetRegistry::lookupTarget(targetTriple, error);
if (!target) {
std::cerr << error << std::endl;
} else {
std::string cpu = "generic";
std::string features = "";
llvm::TargetOptions options;
llvm::TargetMachine* targetMachine =
target->createTargetMachine(targetTriple, cpu, features,
options, llvm::Optional<llvm::Reloc::Model>());
ctx.module.setDataLayout(targetMachine->createDataLayout());
ctx.module.setTargetTriple(targetTriple);
std::error_code ec;
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
if (ec) {
throw 0;
} else {
llvm::TargetMachine::CodeGenFileType type = llvm::TargetMachine::CGFT_ObjectFile;
llvm::legacy::PassManager pm;
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
throw 0;
} else {
pm.run(ctx.module);
file.close();
}
}
}
}
void gen_llvm(const std::vector<definition_ptr>& prog) {
llvm_context ctx;
gen_llvm_internal_op(ctx, PLUS);
gen_llvm_internal_op(ctx, MINUS);
gen_llvm_internal_op(ctx, TIMES);
gen_llvm_internal_op(ctx, DIVIDE);
for(auto& definition : prog) {
definition->gen_llvm_first(ctx);
}
for(auto& definition : prog) {
definition->gen_llvm_second(ctx);
}
ctx.module.print(llvm::outs(), nullptr);
output_llvm(ctx, "program.o");
}
int main() {
yy::parser parser;
type_mgr mgr;
type_env env;
parser.parse();
for(auto& definition : program) {
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
if(!def) continue;
std::cout << def->name;
for(auto& param : def->params) std::cout << " " << param;
std::cout << ":" << std::endl;
def->body->print(1, std::cout);
}
try {
typecheck_program(program, mgr, env);
compile_program(program);
gen_llvm(program);
} catch(unification_error& err) {
std::cout << "failed to unify types: " << std::endl;
std::cout << " (1) \033[34m";
err.left->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
std::cout << " (2) \033[32m";
err.right->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
} catch(type_error& err) {
std::cout << "failed to type check program: " << err.description << std::endl;
}
}

141
code/compiler/08/parser.y Normal file
View File

@@ -0,0 +1,141 @@
%{
#include <string>
#include <iostream>
#include "ast.hpp"
#include "definition.hpp"
#include "parser.hpp"
std::vector<definition_ptr> program;
extern yy::parser::symbol_type yylex();
%}
%token PLUS
%token TIMES
%token MINUS
%token DIVIDE
%token <int> INT
%token DEFN
%token DATA
%token CASE
%token OF
%token OCURLY
%token CCURLY
%token OPAREN
%token CPAREN
%token COMMA
%token ARROW
%token EQUAL
%token <std::string> LID
%token <std::string> UID
%language "c++"
%define api.value.type variant
%define api.token.constructor
%type <std::vector<std::string>> lowercaseParams uppercaseParams
%type <std::vector<definition_ptr>> program definitions
%type <std::vector<branch_ptr>> branches
%type <std::vector<constructor_ptr>> constructors
%type <ast_ptr> aAdd aMul case app appBase
%type <definition_ptr> definition defn data
%type <branch_ptr> branch
%type <pattern_ptr> pattern
%type <constructor_ptr> constructor
%start program
%%
program
: definitions { program = std::move($1); }
;
definitions
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
;
definition
: defn { $$ = std::move($1); }
| data { $$ = std::move($1); }
;
defn
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
{ $$ = definition_ptr(
new definition_defn(std::move($2), std::move($3), std::move($6))); }
;
lowercaseParams
: %empty { $$ = std::vector<std::string>(); }
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
;
uppercaseParams
: %empty { $$ = std::vector<std::string>(); }
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
;
aAdd
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
| aMul { $$ = std::move($1); }
;
aMul
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
| app { $$ = std::move($1); }
;
app
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
| appBase { $$ = std::move($1); }
;
appBase
: INT { $$ = ast_ptr(new ast_int($1)); }
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
| OPAREN aAdd CPAREN { $$ = std::move($2); }
| case { $$ = std::move($1); }
;
case
: CASE aAdd OF OCURLY branches CCURLY
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
;
branches
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
;
branch
: pattern ARROW OCURLY aAdd CCURLY
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
;
pattern
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
| UID lowercaseParams
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
;
data
: DATA UID EQUAL OCURLY constructors CCURLY
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
;
constructors
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
| constructor
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
;
constructor
: UID uppercaseParams
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
;

183
code/compiler/08/runtime.c Normal file
View File

@@ -0,0 +1,183 @@
#include <stdint.h>
#include <assert.h>
#include <memory.h>
#include <stdio.h>
#include "runtime.h"
struct node_base* alloc_node() {
struct node_base* new_node = malloc(sizeof(struct node_app));
assert(new_node != NULL);
return new_node;
}
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
struct node_app* node = (struct node_app*) alloc_node();
node->base.tag = NODE_APP;
node->left = l;
node->right = r;
return node;
}
struct node_num* alloc_num(int32_t n) {
struct node_num* node = (struct node_num*) alloc_node();
node->base.tag = NODE_NUM;
node->value = n;
return node;
}
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a) {
struct node_global* node = (struct node_global*) alloc_node();
node->base.tag = NODE_GLOBAL;
node->arity = a;
node->function = f;
return node;
}
struct node_ind* alloc_ind(struct node_base* n) {
struct node_ind* node = (struct node_ind*) alloc_node();
node->base.tag = NODE_IND;
node->next = n;
return node;
}
void stack_init(struct stack* s) {
s->size = 4;
s->count = 0;
s->data = malloc(sizeof(*s->data) * s->size);
assert(s->data != NULL);
}
void stack_free(struct stack* s) {
free(s->data);
}
void stack_push(struct stack* s, struct node_base* n) {
while(s->count >= s->size) {
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
assert(s->data != NULL);
}
s->data[s->count++] = n;
}
struct node_base* stack_pop(struct stack* s) {
assert(s->count > 0);
return s->data[--s->count];
}
struct node_base* stack_peek(struct stack* s, size_t o) {
assert(s->count > o);
return s->data[s->count - o - 1];
}
void stack_popn(struct stack* s, size_t n) {
assert(s->count >= n);
s->count -= n;
}
void stack_slide(struct stack* s, size_t n) {
assert(s->count > n);
s->data[s->count - n - 1] = s->data[s->count - 1];
s->count -= n;
}
void stack_update(struct stack* s, size_t o) {
assert(s->count > o + 1);
struct node_ind* ind = (struct node_ind*) s->data[s->count - o - 2];
ind->base.tag = NODE_IND;
ind->next = s->data[s->count -= 1];
}
void stack_alloc(struct stack* s, size_t o) {
while(o--) {
stack_push(s, (struct node_base*) alloc_ind(NULL));
}
}
void stack_pack(struct stack* s, size_t n, int8_t t) {
assert(s->count >= n);
struct node_base** data = malloc(sizeof(*data) * n);
assert(data != NULL);
memcpy(data, &s->data[s->count - n], n * sizeof(*data));
struct node_data* new_node = (struct node_data*) alloc_node();
new_node->array = data;
new_node->base.tag = NODE_DATA;
new_node->tag = t;
stack_popn(s, n);
stack_push(s, (struct node_base*) new_node);
}
void stack_split(struct stack* s, size_t n) {
struct node_data* node = (struct node_data*) stack_pop(s);
for(size_t i = 0; i < n; i++) {
stack_push(s, node->array[i]);
}
}
void unwind(struct stack* s) {
while(1) {
struct node_base* peek = stack_peek(s, 0);
if(peek->tag == NODE_APP) {
struct node_app* n = (struct node_app*) peek;
stack_push(s, n->left);
} else if(peek->tag == NODE_GLOBAL) {
struct node_global* n = (struct node_global*) peek;
assert(s->count > n->arity);
for(size_t i = 1; i <= n->arity; i++) {
s->data[s->count - i]
= ((struct node_app*) s->data[s->count - i - 1])->right;
}
n->function(s);
} else if(peek->tag == NODE_IND) {
struct node_ind* n = (struct node_ind*) peek;
stack_pop(s);
stack_push(s, n->next);
} else {
break;
}
}
}
struct node_base* eval(struct node_base* n) {
struct stack program_stack;
stack_init(&program_stack);
stack_push(&program_stack, n);
unwind(&program_stack);
struct node_base* result = stack_pop(&program_stack);
stack_free(&program_stack);
return result;
}
extern void f_main(struct stack* s);
void print_node(struct node_base* n) {
if(n->tag == NODE_APP) {
struct node_app* app = (struct node_app*) n;
print_node(app->left);
putchar(' ');
print_node(app->right);
} else if(n->tag == NODE_DATA) {
printf("(Packed)");
} else if(n->tag == NODE_GLOBAL) {
struct node_global* global = (struct node_global*) n;
printf("(Global: %p)", global->function);
} else if(n->tag == NODE_IND) {
print_node(((struct node_ind*) n)->next);
} else if(n->tag == NODE_NUM) {
struct node_num* num = (struct node_num*) n;
printf("%d", num->value);
}
}
int main(int argc, char** argv) {
struct node_global* first_node = alloc_global(f_main, 0);
struct node_base* result = eval((struct node_base*) first_node);
printf("Result: ");
print_node(result);
putchar('\n');
}

View File

@@ -0,0 +1,70 @@
#pragma once
#include <stdlib.h>
struct stack;
enum node_tag {
NODE_APP,
NODE_NUM,
NODE_GLOBAL,
NODE_IND,
NODE_DATA
};
struct node_base {
enum node_tag tag;
};
struct node_app {
struct node_base base;
struct node_base* left;
struct node_base* right;
};
struct node_num {
struct node_base base;
int32_t value;
};
struct node_global {
struct node_base base;
int32_t arity;
void (*function)(struct stack*);
};
struct node_ind {
struct node_base base;
struct node_base* next;
};
struct node_data {
struct node_base base;
int8_t tag;
struct node_base** array;
};
struct node_base* alloc_node();
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
struct node_num* alloc_num(int32_t n);
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a);
struct node_ind* alloc_ind(struct node_base* n);
struct stack {
size_t size;
size_t count;
struct node_base** data;
};
void stack_init(struct stack* s);
void stack_free(struct stack* s);
void stack_push(struct stack* s, struct node_base* n);
struct node_base* stack_pop(struct stack* s);
struct node_base* stack_peek(struct stack* s, size_t o);
void stack_popn(struct stack* s, size_t n);
void stack_slide(struct stack* s, size_t n);
void stack_update(struct stack* s, size_t o);
void stack_alloc(struct stack* s, size_t o);
void stack_pack(struct stack* s, size_t n, int8_t t);
void stack_split(struct stack* s, size_t n);
struct node_base* eval(struct node_base* n);

View File

@@ -0,0 +1,35 @@
%option noyywrap
%{
#include <iostream>
#include "ast.hpp"
#include "definition.hpp"
#include "parser.hpp"
#define YY_DECL yy::parser::symbol_type yylex()
%}
%%
[ \n]+ {}
\+ { return yy::parser::make_PLUS(); }
\* { return yy::parser::make_TIMES(); }
- { return yy::parser::make_MINUS(); }
\/ { return yy::parser::make_DIVIDE(); }
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
defn { return yy::parser::make_DEFN(); }
data { return yy::parser::make_DATA(); }
case { return yy::parser::make_CASE(); }
of { return yy::parser::make_OF(); }
\{ { return yy::parser::make_OCURLY(); }
\} { return yy::parser::make_CCURLY(); }
\( { return yy::parser::make_OPAREN(); }
\) { return yy::parser::make_CPAREN(); }
, { return yy::parser::make_COMMA(); }
-> { return yy::parser::make_ARROW(); }
= { return yy::parser::make_EQUAL(); }
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
%%

99
code/compiler/08/type.cpp Normal file
View File

@@ -0,0 +1,99 @@
#include "type.hpp"
#include <sstream>
#include <algorithm>
#include "error.hpp"
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
auto it = mgr.types.find(name);
if(it != mgr.types.end()) {
it->second->print(mgr, to);
} else {
to << name;
}
}
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
to << name;
}
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
left->print(mgr, to);
to << " -> (";
right->print(mgr, to);
to << ")";
}
std::string type_mgr::new_type_name() {
int temp = last_id++;
std::string str = "";
while(temp != -1) {
str += (char) ('a' + (temp % 26));
temp = temp / 26 - 1;
}
std::reverse(str.begin(), str.end());
return str;
}
type_ptr type_mgr::new_type() {
return type_ptr(new type_var(new_type_name()));
}
type_ptr type_mgr::new_arrow_type() {
return type_ptr(new type_arr(new_type(), new_type()));
}
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
type_var* cast;
var = nullptr;
while((cast = dynamic_cast<type_var*>(t.get()))) {
auto it = types.find(cast->name);
if(it == types.end()) {
var = cast;
break;
}
t = it->second;
}
return t;
}
void type_mgr::unify(type_ptr l, type_ptr r) {
type_var* lvar;
type_var* rvar;
type_arr* larr;
type_arr* rarr;
type_base* lid;
type_base* rid;
l = resolve(l, lvar);
r = resolve(r, rvar);
if(lvar) {
bind(lvar->name, r);
return;
} else if(rvar) {
bind(rvar->name, l);
return;
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
(rarr = dynamic_cast<type_arr*>(r.get()))) {
unify(larr->left, rarr->left);
unify(larr->right, rarr->right);
return;
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
(rid = dynamic_cast<type_base*>(r.get()))) {
if(lid->name == rid->name) return;
}
throw unification_error(l, r);
}
void type_mgr::bind(const std::string& s, type_ptr t) {
type_var* other = dynamic_cast<type_var*>(t.get());
if(other && other->name == s) return;
types[s] = t;
}

65
code/compiler/08/type.hpp Normal file
View File

@@ -0,0 +1,65 @@
#pragma once
#include <memory>
#include <map>
struct type_mgr;
struct type {
virtual ~type() = default;
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
};
using type_ptr = std::shared_ptr<type>;
struct type_var : public type {
std::string name;
type_var(std::string n)
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_base : public type {
std::string name;
type_base(std::string n)
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_data : public type_base {
struct constructor {
int tag;
};
std::map<std::string, constructor> constructors;
type_data(std::string n)
: type_base(std::move(n)) {}
};
struct type_arr : public type {
type_ptr left;
type_ptr right;
type_arr(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_mgr {
int last_id = 0;
std::map<std::string, type_ptr> types;
std::string new_type_name();
type_ptr new_type();
type_ptr new_arrow_type();
void unify(type_ptr l, type_ptr r);
type_ptr resolve(type_ptr t, type_var*& var) const;
void bind(const std::string& s, type_ptr t);
};

View File

@@ -0,0 +1,16 @@
#include "type_env.hpp"
type_ptr type_env::lookup(const std::string& name) const {
auto it = names.find(name);
if(it != names.end()) return it->second;
if(parent) return parent->lookup(name);
return nullptr;
}
void type_env::bind(const std::string& name, type_ptr t) {
names[name] = t;
}
type_env type_env::scope() const {
return type_env(this);
}

View File

@@ -0,0 +1,16 @@
#pragma once
#include <map>
#include "type.hpp"
struct type_env {
std::map<std::string, type_ptr> names;
type_env const* parent = nullptr;
type_env(type_env const* p)
: parent(p) {}
type_env() : type_env(nullptr) {}
type_ptr lookup(const std::string& name) const;
void bind(const std::string& name, type_ptr t);
type_env scope() const;
};

View File

@@ -2,12 +2,11 @@
title: Compiling a Functional Language Using C++, Part 0 - Intro title: Compiling a Functional Language Using C++, Part 0 - Intro
date: 2019-08-03T01:02:30-07:00 date: 2019-08-03T01:02:30-07:00
tags: ["C and C++", "Functional Languages", "Compilers"] tags: ["C and C++", "Functional Languages", "Compilers"]
draft: true
--- ---
During my last academic term, I was enrolled in a compilers course. During my last academic term, I was enrolled in a compilers course.
We had a final project - develop a compiler for a basic Python subset, We had a final project - develop a compiler for a basic Python subset,
using LLVM. It was a little boring - virtually nothing about the compiler using LLVM. It was a little boring - virtually nothing about the compiler
was __not__ covered in class, and it felt more like putting two puzzles was __not__ covered in class, and it felt more like putting two puzzle
pieces together than building a real project. pieces together than building a real project.
Instead, I chose to implement a compiler for a functional programming language, Instead, I chose to implement a compiler for a functional programming language,
@@ -138,3 +137,6 @@ Here are the posts that I've written so far for this series:
* [Typechecking]({{< relref "03_compiler_typechecking.md" >}}) * [Typechecking]({{< relref "03_compiler_typechecking.md" >}})
* [Small Improvements]({{< relref "04_compiler_improvements.md" >}}) * [Small Improvements]({{< relref "04_compiler_improvements.md" >}})
* [Execution]({{< relref "05_compiler_execution.md" >}}) * [Execution]({{< relref "05_compiler_execution.md" >}})
* [Compilation]({{< relref "06_compiler_compilation.md" >}})
* [Runtime]({{< relref "07_compiler_runtime.md" >}})
* [LLVM]({{< relref "08_compiler_llvm.md" >}})

View File

@@ -2,7 +2,6 @@
title: Compiling a Functional Language Using C++, Part 1 - Tokenizing title: Compiling a Functional Language Using C++, Part 1 - Tokenizing
date: 2019-08-03T01:02:30-07:00 date: 2019-08-03T01:02:30-07:00
tags: ["C and C++", "Functional Languages", "Compilers"] tags: ["C and C++", "Functional Languages", "Compilers"]
draft: true
--- ---
It makes sense to build a compiler bit by bit, following the stages we outlined in It makes sense to build a compiler bit by bit, following the stages we outlined in
the first post of the series. This is because these stages are essentially a pipeline, the first post of the series. This is because these stages are essentially a pipeline,
@@ -48,7 +47,7 @@ are fairly simple - one or more digits is an integer, a few letters together
are a variable name. In order to be able to efficiently break text up into are a variable name. In order to be able to efficiently break text up into
such tokens, we restrict ourselves to __regular languages__. A language such tokens, we restrict ourselves to __regular languages__. A language
is defined as a set of strings (potentially infinite), and a regular is defined as a set of strings (potentially infinite), and a regular
language for which we can write a __regular expression__ to check if language is one for which we can write a __regular expression__ to check if
a string is in the set. Regular expressions are a way of representing a string is in the set. Regular expressions are a way of representing
patterns that a string has to match. We define regular expressions patterns that a string has to match. We define regular expressions
as follows: as follows:
@@ -77,7 +76,7 @@ Let's see some examples. An integer, such as 326, can be represented with \\([0-
This means, one or more characters between 0 or 9. Some (most) regex implementations This means, one or more characters between 0 or 9. Some (most) regex implementations
have a special symbol for \\([0-9]\\), written as \\(\\setminus d\\). A variable, have a special symbol for \\([0-9]\\), written as \\(\\setminus d\\). A variable,
starting with a lowercase letter and containing lowercase or uppercase letters after it, starting with a lowercase letter and containing lowercase or uppercase letters after it,
can be written as \\(\[a-z\]([a-z]+)?\\). Again, most regex implementations provide can be written as \\(\[a-z\]([a-zA-Z]+)?\\). Again, most regex implementations provide
a special operator for \\((r_1+)?\\), written as \\(r_1*\\). a special operator for \\((r_1+)?\\), written as \\(r_1*\\).
So how does one go about checking if a regular expression matches a string? An efficient way is to So how does one go about checking if a regular expression matches a string? An efficient way is to
@@ -115,8 +114,8 @@ represent numbers directly into numbers, and do other small tasks.
So, what tokens do we have? From our arithmetic definition, we see that we have integers. So, what tokens do we have? From our arithmetic definition, we see that we have integers.
Let's use the regex `[0-9]+` for those. We also have the operators `+`, `-`, `*`, and `/`. Let's use the regex `[0-9]+` for those. We also have the operators `+`, `-`, `*`, and `/`.
`-` is simple enough: the corresponding regex is `-`. We need to The regex for `-` is simple enough: it's just `-`. However, we need to
preface our `/`, `+` and `*` with a backslash, though, since they happen to also be modifiers preface our `/`, `+` and `*` with a backslash, since they happen to also be modifiers
in flex's regular expressions: `\/`, `\+`, `\*`. in flex's regular expressions: `\/`, `\+`, `\*`.
Let's also represent some reserved keywords. We'll say that `defn`, `data`, `case`, and `of` Let's also represent some reserved keywords. We'll say that `defn`, `data`, `case`, and `of`

View File

@@ -2,7 +2,6 @@
title: Compiling a Functional Language Using C++, Part 2 - Parsing title: Compiling a Functional Language Using C++, Part 2 - Parsing
date: 2019-08-03T01:02:30-07:00 date: 2019-08-03T01:02:30-07:00
tags: ["C and C++", "Functional Languages", "Compilers"] tags: ["C and C++", "Functional Languages", "Compilers"]
draft: true
--- ---
In the previous post, we covered tokenizing. We learned how to convert an input string into logical segments, and even wrote up a tokenizer to do it according to the rules of our language. Now, it's time to make sense of the tokens, and parse our language. In the previous post, we covered tokenizing. We learned how to convert an input string into logical segments, and even wrote up a tokenizer to do it according to the rules of our language. Now, it's time to make sense of the tokens, and parse our language.
@@ -38,7 +37,7 @@ $$
In practice, there are many ways of using a CFG to parse a programming language. Various parsing algorithms support various subsets In practice, there are many ways of using a CFG to parse a programming language. Various parsing algorithms support various subsets
of context free languages. For instance, top down parsers follow nearly exactly the structure that we had. They try to parse of context free languages. For instance, top down parsers follow nearly exactly the structure that we had. They try to parse
a nonterminal by trying to match each symbol in its body. In the rule \\(S \\rightarrow \\alpha \\beta \\gamma\\), it will a nonterminal by trying to match each symbol in its body. In the rule \\(S \\rightarrow \\alpha \\beta \\gamma\\), it will
first try to match \\(alpha\\), then \\(beta\\), and so on. If one of the three contains a nonterminal, it will attempt to parse first try to match \\(\\alpha\\), then \\(\\beta\\), and so on. If one of the three contains a nonterminal, it will attempt to parse
that nonterminal following the same strategy. However, this leaves a flaw - For instance, consider the grammar that nonterminal following the same strategy. However, this leaves a flaw - For instance, consider the grammar
$$ $$
\\begin{align} \\begin{align}
@@ -105,7 +104,7 @@ A\_{add} & \\rightarrow A\_{add}-A\_{mult} \\\\\\
A\_{add} & \\rightarrow A\_{mult} A\_{add} & \\rightarrow A\_{mult}
\\end{align} \\end{align}
$$ $$
The first rule matches another addition, added to the result of another addition. We use the addition in the body The first rule matches another addition, added to the result of a multiplication. Similarly, the second rule matches another addition, from which the result of a multiplication is then subtracted. We use the \\(A\_{add}\\) on the left side of \\(+\\) and \\(-\\) in the body
because we want to be able to parse strings like `1+2+3+4`, which we want to view as `((1+2)+3)+4` (mostly because because we want to be able to parse strings like `1+2+3+4`, which we want to view as `((1+2)+3)+4` (mostly because
subtraction is [left-associative](https://en.wikipedia.org/wiki/Operator_associativity)). So, we want the top level subtraction is [left-associative](https://en.wikipedia.org/wiki/Operator_associativity)). So, we want the top level
of the tree to be the rightmost `+` or `-`, since that means it will be the "last" operation. You may be asking, of the tree to be the rightmost `+` or `-`, since that means it will be the "last" operation. You may be asking,
@@ -150,7 +149,7 @@ What's the last \\(C\\)? We also want a "thing" to be a case expression. Here ar
$$ $$
\\begin{align} \\begin{align}
C & \\rightarrow \\text{case} \\; A\_{add} \\; \\text{of} \\; \\{ L\_B\\} \\\\\\ C & \\rightarrow \\text{case} \\; A\_{add} \\; \\text{of} \\; \\{ L\_B\\} \\\\\\
L\_B & \\rightarrow R \\; , \\; L\_B \\\\\\ L\_B & \\rightarrow R \\; L\_B \\\\\\
L\_B & \\rightarrow R \\\\\\ L\_B & \\rightarrow R \\\\\\
R & \\rightarrow N \\; \\text{arrow} \\; \\{ A\_{add} \\} \\\\\\ R & \\rightarrow N \\; \\text{arrow} \\; \\{ A\_{add} \\} \\\\\\
N & \\rightarrow \\text{lowerVar} \\\\\\ N & \\rightarrow \\text{lowerVar} \\\\\\

View File

@@ -1,7 +1,6 @@
--- ---
title: Compiling a Functional Language Using C++, Part 3 - Type Checking title: Compiling a Functional Language Using C++, Part 3 - Type Checking
date: 2019-08-06T14:26:38-07:00 date: 2019-08-06T14:26:38-07:00
draft: true
tags: ["C and C++", "Functional Languages", "Compilers"] tags: ["C and C++", "Functional Languages", "Compilers"]
--- ---
I think tokenizing and parsing are boring. The thing is, looking at syntax I think tokenizing and parsing are boring. The thing is, looking at syntax

View File

@@ -1,7 +1,6 @@
--- ---
title: Compiling a Functional Language Using C++, Part 4 - Small Improvements title: Compiling a Functional Language Using C++, Part 4 - Small Improvements
date: 2019-08-06T14:26:38-07:00 date: 2019-08-06T14:26:38-07:00
draft: true
tags: ["C and C++", "Functional Languages", "Compilers"] tags: ["C and C++", "Functional Languages", "Compilers"]
--- ---
We've done quite a big push in the previous post. We defined We've done quite a big push in the previous post. We defined
@@ -65,8 +64,8 @@ Finally, just like `ast_case::typecheck` called
We follow the same implementation strategy for patterns, We follow the same implementation strategy for patterns,
but we don't need indentation, or recursion: but we don't need indentation, or recursion:
{{< codelines "C++" "compiler/04/ast.cpp" 114 116 >}} {{< codelines "C++" "compiler/04/ast.cpp" 115 117 >}}
{{< codelines "C++" "compiler/04/ast.cpp" 122 127 >}} {{< codelines "C++" "compiler/04/ast.cpp" 123 128 >}}
In `main`, let's print the bodies of each function we receive from the parser: In `main`, let's print the bodies of each function we receive from the parser:
{{< codelines "C++" "compiler/04/main.cpp" 47 56 >}} {{< codelines "C++" "compiler/04/main.cpp" 47 56 >}}
@@ -160,12 +159,12 @@ we simply pass the type of the expression to be case analyzed into
the pattern matching method. However, since we don't want the pattern matching method. However, since we don't want
case analysis on functions, we ensure that the type of the expression case analysis on functions, we ensure that the type of the expression
is `type_base`. If not, we report this: is `type_base`. If not, we report this:
{{< codelines "C++" "compiler/04/ast.cpp" 100 102 >}} {{< codelines "C++" "compiler/04/ast.cpp" 107 110 >}}
The next exception is in `pattern_constr::match`. It occurs The next exception is in `pattern_constr::match`. It occurs
when the pattern has a constructor we don't recognize, and when the pattern has a constructor we don't recognize, and
that's exactly what we report: that's exactly what we report:
{{< codelines "C++" "compiler/04/ast.cpp" 131 133 >}} {{< codelines "C++" "compiler/04/ast.cpp" 132 134 >}}
The next exception occurs in a loop, when we bind The next exception occurs in a loop, when we bind
types for each of the constructor pattern's variables. types for each of the constructor pattern's variables.
@@ -174,7 +173,7 @@ constructor type to a `type_arr`. Conceptually,
this means that the pattern wants to apply the this means that the pattern wants to apply the
constructor to more parameters than it actually constructor to more parameters than it actually
takes: takes:
{{< codelines "C++" "compiler/04/ast.cpp" 137 137 >}} {{< codelines "C++" "compiler/04/ast.cpp" 138 138 >}}
We remove the last throw at the bottom of `pattern_constr::match`. We remove the last throw at the bottom of `pattern_constr::match`.
This is because once unification succeeds, we know This is because once unification succeeds, we know

View File

@@ -1,7 +1,6 @@
--- ---
title: Compiling a Functional Language Using C++, Part 5 - Execution title: Compiling a Functional Language Using C++, Part 5 - Execution
date: 2019-08-06T14:26:38-07:00 date: 2019-08-06T14:26:38-07:00
draft: true
tags: ["C and C++", "Functional Languages", "Compilers"] tags: ["C and C++", "Functional Languages", "Compilers"]
--- ---
{{< gmachine_css >}} {{< gmachine_css >}}
@@ -47,7 +46,7 @@ defn snd p = {
P x y -> { y } P x y -> { y }
} }
} }
defn slow x = { returns x after waiting for 4 seconds } defn slow x = { returns x after waiting for 1 second }
defn main = { fst (P (slow 320) (slow 6)) } defn main = { fst (P (slow 320) (slow 6)) }
``` ```
@@ -559,7 +558,9 @@ rule to Unwind:
{{< /gmachine_inner >}} {{< /gmachine_inner >}}
{{< /gmachine >}} {{< /gmachine >}}
Just one more! Sometimes, it's possible for a tree node to reference itself. Just a couple more special-purpose instructions, and we're done!
Sometimes, it's possible for a tree node to reference itself.
For instance, Haskell defines the For instance, Haskell defines the
[fixpoint combinator](https://en.wikipedia.org/wiki/Fixed-point_combinator) [fixpoint combinator](https://en.wikipedia.org/wiki/Fixed-point_combinator)
as follows: as follows:
@@ -587,9 +588,27 @@ We can allocate an indirection on the stack, and call Update on it when
we've constructed a node. While we're constructing the tree, we can we've constructed a node. While we're constructing the tree, we can
refer to the indirection when a self-reference is required. refer to the indirection when a self-reference is required.
Lastly, we also define a Pop instruction, which just removes
some number of nodes from the stack. We want this because
calling Update at the end of a function modifies a node further up the stack,
leaving anything on top of the stack after that node as scratch work. We get
rid of that scratch work simply by popping it.
{{< gmachine "Pop" >}}
{{< gmachine_inner "Before">}}
\( \text{Pop} \; n : i \quad a_1, a_2, ..., a_n : s \quad d \quad h \quad m \)
{{< /gmachine_inner >}}
{{< gmachine_inner "After" >}}
\( i \quad s \quad d \quad h \quad m \)
{{< /gmachine_inner >}}
{{< gmachine_inner "Description" >}}
Pop \(n\) nodes from the stack.
{{< /gmachine_inner >}}
{{< /gmachine >}}
That's it for the instructions. Knowing them, however, doesn't That's it for the instructions. Knowing them, however, doesn't
tell us what to do with our `ast` structs. We'll need to define tell us what to do with our `ast` structs. We'll need to define
rules to translate trees into these instructions, and I've already rules to translate trees into these instructions, and I've already
alluded to this when we went over `double 326`. alluded to this when we went over `double 326`.
However, this has already gotten pretty long, However, this has already gotten pretty long,
so we'll do it in the next post: (link me!) so we'll do it in the next post: [Part 6 - Compilation]({{< relref "06_compiler_compilation.md" >}}).

View File

@@ -0,0 +1,504 @@
---
title: Compiling a Functional Language Using C++, Part 6 - Compilation
date: 2019-08-06T14:26:38-07:00
tags: ["C and C++", "Functional Languages", "Compilers"]
---
In the previous post, we defined a machine for graph reduction,
called a G-machine. However, this machine is still not particularly
connected to __our__ language. In this post, we will give
meanings to programs in our language in the context of
this G-machine. We will define a __compilation scheme__,
which will be a set of rules that tell us how to
translate programs in our language into G-machine instructions.
To mirror _Implementing Functional Languages: a tutorial_, we'll
call this compilation scheme \\(\\mathcal{C}\\), and write it
as \\(\\mathcal{C} ⟦e⟧ = i\\), meaning "the expression \\(e\\)
compiles to the instructions \\(i\\)".
To follow our route from the typechecking, let's start
with compiling expressions that are numbers. It's pretty easy:
$$
\\mathcal{C} ⟦n⟧ = [\\text{PushInt} \\; n]
$$
Here, we compiled a number expression to a list of
instructions with only one element - PushInt.
Just like when we did typechecking, let's
move on to compiling function applications. As
we informally stated in the previous chapter, since
the thing we're applying has to be on top,
we want to compile it last:
$$
\\mathcal{C} ⟦e\_1 \\; e\_2⟧ = \\mathcal{C} ⟦e\_2⟧ ⧺ \\mathcal{C} ⟦e\_1⟧ ⧺ [\\text{MkApp}]
$$
Here, we used the \\(⧺\\) operator to represent the concatenation of two
lists. Otherwise, this should be pretty intutive - we first run the instructions
to create the parameter, then we run the instructions to create the function,
and finally, we combine them using MkApp.
It's variables that once again force us to adjust our strategy. If our
program is well-typed, we know our variable will be on the stack:
our definition of Unwind makes it so for functions, and we will
define our case expression compilation scheme to match. However,
we still need to know __where__ on the stack each variable is,
and this changes as the stack is modified.
To accommodate for this, we define an environment, \\(\\rho\\),
to be a partial function mapping variable names to thier
offsets on the stack. We write \\(\\rho = [x \\rightarrow n, y \\rightarrow m]\\)
to say "the environment \\(\\rho\\) maps variable \\(x\\) to stack offset \\(n\\),
and variable \\(y\\) to stack offset \\(m\\)". We also write \\(\\rho \\; x\\) to
say "look up \\(x\\) in \\(\\rho\\)", since \\(\\rho\\) is a function. Finally,
to help with the ever-changing stack, we define an augmented environment
\\(\\rho^{+n}\\), such that \\(\\rho^{+n} \\; x = \\rho \\; x + n\\). In words,
this basically means "\\(\\rho^{+n}\\) has all the variables from \\(\\rho\\),
but their addresses are incremented by \\(n\\)". We now pass \\(\\rho\\)
in to \\(\\mathcal{C}\\) together with the expression \\(e\\). Let's
rewrite our first two rules. For numbers:
$$
\\mathcal{C} ⟦n⟧ \\; \\rho = [\\text{PushInt} \\; n]
$$
For function application:
$$
\\mathcal{C} ⟦e\_1 \\; e\_2⟧ \\; \\rho = \\mathcal{C} ⟦e\_2⟧ \\; \\rho ⧺ \\mathcal{C} ⟦e\_1⟧ \\; \\rho^{+1} ⧺ [\\text{MkApp}]
$$
Notice how in that last rule, we passed in \\(\\rho^{+1}\\) when compiling the function's expression. This is because
the result of running the instructions for \\(e\_2\\) will have left on the stack the function's parameter. Whatever
was at the top of the stack (and thus, had index 0), is now the second element from the top (address 1). The
same is true for all other things that were on the stack. So, we increment the environment accordingly.
With the environment, the variable rule is simple:
$$
\\mathcal{C} ⟦x⟧ \\; \\rho = [\\text{Push} \\; (\\rho \\; x)]
$$
One more thing. If we run across a function name, we want to
use PushGlobal rather than Push. Defining \\(f\\) to be a name
of a global function, we capture this using the following rule:
$$
\\mathcal{C} ⟦f⟧ \\; \\rho = [\\text{PushGlobal} \\; f]
$$
Now it's time for us to compile case expressions, but there's a bit of
an issue - our case expressions branches don't map one-to-one with
the \\(t \\rightarrow i\_t\\) format of the Jump instruction.
This is because we allow for name patterns in the form \\(x\\),
which can possibly match more than one tag. Consider this
rather useless example:
```
data Bool = { True, False }
defn weird b = { case b of { b -> { False } } }
```
We only have one branch, but we have two tags that should
lead to it! Not only that, but variable patterns are
location-dependent: if a variable pattern comes
before a constructor pattern, then the constructor
pattern will never be reached. On the other hand,
if a constructor pattern comes before a variable
pattern, it will be tried before the varible pattern,
and thus is reachable.
We will ignore this problem for now - we will define our semantics
as though each case expression branch can match exactly one tag.
In our C++ code, we will write a conversion function that will
figure out which tag goes to which sequence of instructions.
Effectively, we'll be performing [desugaring](https://en.wikipedia.org/wiki/Syntactic_sugar).
Now, on to defining the compilation rules for case expressions.
It's helpful to define compiling a single branch of a case expression
separately. For a branch in the form \\(t \\; x\_1 \\; x\_2 \\; ... \\; x\_n \\rightarrow \text{body}\\),
we define a compilation scheme \\(\\mathcal{A}\\) as follows:
$$
\\begin{align}
\\mathcal{A} ⟦t \\; x\_1 \\; ... \\; x\_n \\rightarrow \text{body}⟧ \\; \\rho & =
t \\rightarrow [\\text{Split} \\; n] \\; ⧺ \\; \\mathcal{C}⟦\\text{body}⟧ \\; \\rho' \\; ⧺ \\; [\\text{Slide} \\; n] \\\\\\
\text{where} \\; \\rho' &= \\rho^{+n}[x\_1 \\rightarrow 0, ..., x\_n \\rightarrow n - 1]
\\end{align}
$$
First, we run Split - the node on the top of the stack is a packed constructor,
and we want access to its member variables, since they can be referenced by
the branch's body via \\(x\_i\\). For the same reason, we must make sure to include
\\(x\_1\\) through \\(x\_n\\) in our environment. Furthermore, since the split values now occupy the stack,
we have to offset our environment by \\(n\\) before adding bindings to our new variables.
Doing all these things gives us \\(\\rho'\\), which we use to compile the body, placing
the resulting instructions after Split. This leaves us with the desired graph on top of
the stack - the only thing left to do is to clean up the stack of the unpacked values,
which we do using Slide.
Notice that we didn't just create instructions - we created a mapping from the tag \\(t\\)
to the instructions that correspond to it.
Now, it's time for compiling the whole case expression. We first want
to construct the graph for the expression we want to perform case analysis on.
Next, we want to evaluate it (since we need a packed value, not a graph,
to read the tag). Finally, we perform a jump depending on the tag. This
is captured by the following rule:
$$
\\mathcal{C} ⟦\\text{case} \\; e \\; \\text{of} \\; \\text{alt}_1 ... \\text{alt}_n⟧ \\; \\rho =
\\mathcal{C} ⟦e⟧ \\; \\rho \\; ⧺ [\\text{Eval}, \\text{Jump} \\; [\\mathcal{A} ⟦\\text{alt}_1⟧ \; \\rho, ..., \\mathcal{A} ⟦\\text{alt}_n⟧ \; \\rho]]
$$
This works because \\(\\mathcal{A}\\) creates not only instructions,
but also a tag mapping. We simply populate our Jump instruction such mappings
resulting from compiling each branch.
You may have noticed that we didn't add rules for binary operators. Just like
with type checking, we treat them as function calls. However, rather that constructing
graphs when we have to instantiate those functions, we simply
evaluate the arguments and perform the relevant arithmetic operation using BinOp.
We will do a similar thing for constructors.
### Implementation
With that out of the way, we can get around to writing some code. Let's
first define C++ structs for the instructions of the G-machine:
{{< codeblock "C++" "compiler/06/instruction.hpp" >}}
I omit the implementation of the various (trivial) `print` methods in this post;
as always, you can look at the full project source code, which is
freely available for each post in the series.
We can now envision a method on the `ast` struct that takes an environment
(just like our compilation scheme takes the environment \\(\\rho\\\)),
and compiles the `ast`. Rather than returning a vector
of instructions (which involves copying, unless we get some optimization kicking in),
we'll pass a reference to a vector to our method. The method will then place the generated
instructions into the vector.
There's one more thing to be considered. How do we tell apart a "global"
from a variable? A naive solution would be to take a list or map of
global functions as a third parameter to our `compile` method.
But there's an easier way! We know that the program passed type checking.
This means that every referenced variable exists. From then, the situation is easy -
if actual variable names are kept in the environment, \\(\\rho\\), then whenever
we see a variable that __isn't__ in the current environment, it must be a function name.
Having finished contemplating out method, it's time to define a signature:
```C++
virtual void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
```
Ah, but now we have to define "environment". Let's do that. Here's our header:
{{< codeblock "C++" "compiler/06/env.hpp" >}}
And here's the source file:
{{< codeblock "C++" "compiler/06/env.cpp" >}}
There's not that much to see here, but let's go through it anyway.
We define an environment as a linked list, kind of like
we did with the type environment. This time, though,
we use shared pointers instead of raw pointers to reference the parent.
I decided on this because we will need to be using virtual methods
(since we have two subclasses of `env`), and thus will need to
be passing the `env` by pointer. At that point, we might as well
use the "proper" way!
I implemented the environment as a linked list because it is, in essence,
a stack. However, not every "offset" in a stack is introduced by
binding variables - for instance, when we create an application node,
we first build the argument value on the stack, and then,
with that value still on the stack, build the left hand side of the application.
Thus, all the variable positions are offset by the presence of the argument
on the stack, and we must account for that. Similarly, in cases when we will
allocate space on the stack (we will run into these cases later), we will
need to account for that change. Thus, since we can increment
the offset by two ways (binding a variable and building something on the stack),
we allow for two types of nodes in our `env` stack.
During recursion we will be tweaking the return value of `get_offset` to
calculate the final location of a variable on the stack (if the
parent of a node returned offset `1`, but the node itself is a variable
node and thus introduces another offset, we need to return `2`). Because
of this, we cannot reasonably return a constant like `-1` (it will quickly
be made positive on a long list), and thus we throw an exception. To
allow for a safe way to check for an offset, without try-catch,
we also add a `has_variable` method which checks if the lookup will succeed.
A better approach would be to use `std::optional`, but it's C++17, so
we'll shy away from it.
It will also help to move some of the functions on the `binop` enum
into a separate file. The new neader is pretty small:
{{< codeblock "C++" "compiler/06/binop.hpp" >}}
The new source file is not much longer:
{{< codeblock "C++" "compiler/06/binop.cpp" >}}
And now, we begin our implementation. Let's start with the easy ones:
`ast_int`, `ast_lid` and `ast_uid`. The code for `ast_int` involves just pushing
the integer into the stack:
{{< codelines "C++" "compiler/06/ast.cpp" 36 38 >}}
The code for `ast_lid` needs to check if the variable is global or local,
just like we discussed:
{{< codelines "C++" "compiler/06/ast.cpp" 53 58 >}}
We do not have to do this for `ast_uid`:
{{< codelines "C++" "compiler/06/ast.cpp" 73 75 >}}
On to `ast_binop`! This is the first time we have to change our environment.
As we said earlier, once we build the right operand on the stack, every offset that we counted
from the top of the stack will have been shifted by 1 (we see this
in our compilation scheme for function application). So,
we create a new environment with `env_offset`, and use that
when we compile the left child:
{{< codelines "C++" "compiler/06/ast.cpp" 103 110 >}}
`ast_binop` performs two applications: `(+) lhs rhs`.
We push `rhs`, then `lhs`, then `(+)`, and then use MkApp
twice. In `ast_app`, we only need to perform one application,
`lhs rhs`:
{{< codelines "C++" "compiler/06/ast.cpp" 134 138 >}}
Note that we also extend our environment in this one,
for the exact same reason as before.
Case expressions are the only thing left on the agenda. This
is the time during which we have to perform desugaring. Here,
though, we run into an issue: we don't have tags assigned to constructors!
We need to adjust our code to keep track of the tags of the various
constructors of a type. To do this, we add a subclass for the `type_base`
struct, called `type_data`:
{{< codelines "C++" "compiler/06/type.hpp" 33 42 >}}
When we create types from `definition_data`, we tag the corresponding constructors:
{{< codelines "C++" "compiler/06/definition.cpp" 54 71 >}}
Ah, but adding constructor info to the type doesn't solve the problem.
Once we performed type checking, we don't keep
the types that we computed for an AST node, in the node. And obviously, we don't want
to go looking for them again. Furthermore, we can't just look up a constructor
in the environment, since we can well have patterns that don't have __any__ constructors:
```
match l {
l -> { 0 }
}
```
So, we want each `ast` node to store its type (well, in practice we only need this for
`ast_case`, but we might as well store it for all nodes). We can add it, no problem.
To add to that, we can add another, non-virtual `typecheck` method (let's call it `typecheck_common`,
since naming is hard). This method will call `typecheck`, and store the output into
the `node_type` field.
The signature is identical to `typecheck`, except it's neither virtual nor const:
```
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
```
And the implementation is as simple as you think:
{{< codelines "C++" "compiler/06/ast.cpp" 9 12 >}}
In client code (`definition_defn::typecheck_first` for instance), we should now
use `typecheck_common` instead of `typecheck`. With that done, we're almost there.
However, we're still missing something: most likely, the initial type assigned to any
node is a `type_var`, or a type variable. In this case, `type_var` __needs__ the information
from `type_mgr`, which we will not be keeping around. Besides, it's cleaner to keep the actual type
as a member of the node, not a variable type that references it. In order
to address this, we write two conversion functions that call `resolve` on all
types in an AST, given a type manager. After this is done, the type manager can be thrown away.
The signatures of the functions are as follows:
```
void resolve_common(const type_mgr& mgr);
virtual void resolve(const type_mgr& mgr) const = 0;
```
We also add the `resolve` method to `definition`, so that we can call it
without having to run `dynamic_cast`. The implementation for `ast::resolve_common`
just resolves the type:
{{< codelines "C++" "compiler/06/ast.cpp" 14 21 >}}
The virtual `ast::resolve` just calls `ast::resolve_common` on an all `ast` children
of a node. Here's a sample implementation from `ast_binop`:
{{< codelines "C++" "compiler/06/ast.cpp" 98 101 >}}
And here's the implementation of `definition::resolve` on `definition_defn`:
{{< codelines "C++" "compiler/06/definition.cpp" 32 42 >}}
Finally, we call `resolve` at the end `typecheck_program` in `main.cpp`:
{{< codelines "C++" "compiler/06/main.cpp" 40 42 >}}
At last, we're ready to implement the code for compiling `ast_case`.
Here it is, in all its glory:
{{< codelines "C++" "compiler/06/ast.cpp" 178 230 >}}
There's a lot to unpack here. First of all, just like we said in the compilation
scheme, we want to build and evaluate the expression that's being analyzed.
Once that's done, however, things get more tricky. We know that each
branch of a case expression will correspond to a vector of instructions -
in fact, our jump instruction contains a mapping from tags to instructions.
As we also discussed above, each list of instructions can be mapped to
by multiple tags. We don't want to recompile the same sequence of instructions
multiple times (or indeed, generate machine code for it). So, we keep
a mapping of tags to their corresponding sequences of instructions. We implement
this by having a vector of vectors of instructions (in which each inner vector
represents the code for a branch), and a map of tag number to index
in the vector containing all the branches. This way, multiple tags
can point to the same instruction set without duplicating information.
We also don't allow a tag to be mapped to more than one sequence of instructions.
This is handled differently depending on whether a variable pattern or a
constructor pattern are encountered. Variable patterns map all
tags that haven't been mapped yet, so no error can occur. Constructor patterns,
though, can explicitly try to map the same tag twice, and we don't want that.
I implied in the previous paragraph the implementation of our case expression
compilation algorithm, but let's go through it. Once we've compiled
the expression to be analyzed, and evaluated it (just like in our definitions
above), we proceed to look at all the branches specified in the case expression.
If a branch has a variable pattern, we must map to the result of the compilation
all the remaining, unmapped tags. We also aren't going to be taking apart
our value, so we don't need to use Split, but we do need to add 1 to the
environment offset to account the the presence of that value. So,
we compile the branch body with that offset, and iterate through
all the constructors of our data type. We skip a constructor
if it's been mapped, and if it hasn't been, we map it to the index
that this branch body will have in our list. Finally,
we push the newly compiled instruction sequence into the list of branch
bodies.
If a branch is a constructor pattern, on the other hand, we lead our compilation
output with a Split. This takes off the value from the stack, but pushes on
all the parameters of the constructor. We account for this by incrementing the
environment with the offset given by the number of arguments (just like we did
in our definitions of our compilation scheme). Before we map the tag,
we ensure that it hasn't already been mapped (and throw an exception, currently
in the form of a type error due to the growing length of this post),
and finally map it and insert the new branch code into the list of branches.
After we're done with all the branches, we also check for non-exhaustive patterns,
since otherwise we could run into runtime errors. With this, the case expression,
and the last of the AST nodes, can be compiled.
We also add a `compile` method to definitions, since they contain
our AST nodes. The method is empty for `defn_data`, and
looks as follows for `definition_defn`:
{{< codelines "C++" "compiler/06/definition.cpp" 44 52 >}}
Notice that we terminate the function with Update and Pop. This
will turn the `ast_app` node that served as the "root"
of the application into an indirection to the value that we have computed.
Doing so will also remove all "scratch work" from the stack.
In essense, this is how we can lazily evaluate expressions.
Finally, we make a function in our `main.cpp` file to compile
all the definitions:
{{< codelines "C++" "compiler/06/main.cpp" 45 56 >}}
In this method, we also include some extra
output to help us see the result of our compilation. Since
at the moment, only the `definition_defn` program has to
be compiled, we try cast all definitions to it, and if
we succeed, we print them out.
Let's try it all out! For the below sample program:
{{< rawblock "compiler/06/examples/works1.txt" >}}
Our compiler produces the following new output:
```
PushInt(6)
PushInt(320)
PushGlobal(plus)
MkApp()
MkApp()
Update(0)
Pop(0)
Push(1)
Push(1)
PushGlobal(plus)
MkApp()
MkApp()
Update(2)
Pop(2)
```
The first sequence of instructions is clearly `main`. It creates
an application of `plus` to `320`, and then applies that to
`6`, which results in `plus 320 6`, which is correct. The
second sequence of instruction pushes the parameter that
sits on offset 1 from the top of the stack (`y`). It then
pushes a parameter from the same offset again, but this time,
since `y` was previously pushed on the stack, `x` is now
in that position, so `x` is pushed onto the stack.
Finally, `+` is pushed, and the application
`(+) x y` is created, which is equivalent to `x+y`.
Let's also take a look at a case expression program:
{{< rawblock "compiler/06/examples/works3.txt" >}}
The result of the compilation is as follows:
```
Push(0)
Eval()
Jump(
Split()
PushInt(0)
Slide(0)
Split()
Push(1)
PushGlobal(length)
MkApp()
PushInt(1)
PushGlobal(plus)
MkApp()
MkApp()
Slide(2)
)
Update(1)
Pop(1)
```
We push the first (and only) parameter onto the stack. We then make
sure it's evaluated, and perform case analysis: if the list
is `Nil`, we simply push the number 0 onto the stack. If it's
a concatenation of some `x` and another lists `xs`, we
push `xs` and `length` onto the stack, make the application
(`length xs`), push the 1, and finally apply `+` to the result.
This all makes sense!
With this, we've been able to compile our expressions and functions
into G-machine code. We're not done, however - our computers
aren't G-machines. We'll need to compile our G-machine code to
__machine code__ (we will use LLVM for this), implement the
__runtime__, and develop a __garbage collector__. We'll
tackle the first of these in the next post - [Part 7 - Runtime]({{< relref "07_compiler_runtime.md" >}}).

View File

@@ -1,195 +0,0 @@
---
title: Compiling a Functional Language Using C++, Part 6 - Compilation
date: 2019-08-06T14:26:38-07:00
draft: true
tags: ["C and C++", "Functional Languages", "Compilers"]
---
In the previous post, we defined a machine for graph reduction,
called a G-machine. However, this machine is still not particularly
connected to __our__ language. In this post, we will give
meanings to programs in our language in the context of
this G-machine. We will define a __compilation scheme__,
which will be a set of rules that tell us how to
translate programs in our language into G-machine instructions.
To mirror _Implementing Functional Languages: a tutorial_, we'll
call this compilation scheme \\(\\mathcal{C}\\), and write it
as \\(\\mathcal{C} ⟦e⟧ = i\\), meaning "the expression \\(e\\)
compiles to the instructions \\(i\\)".
To follow our route from the typechecking, let's start
with compiling expressions that are numbers. It's pretty easy:
$$
\\mathcal{C} ⟦n⟧ = [\\text{PushInt} \\; n]
$$
Here, we compiled a number expression to a list of
instructions with only one element - PushInt.
Just like when we did typechecking, let's
move on to compiling function applications. As
we informally stated in the previous chapter, since
the thing we're applying has to be on top,
we want to compile it last:
$$
\\mathcal{C} ⟦e\_1 \\; e\_2⟧ = \\mathcal{C} ⟦e\_2⟧ ⧺ \\mathcal{C} ⟦e\_1⟧ ⧺ [\\text{MkApp}]
$$
Here, we used the \\(⧺\\) operator to represent the concatenation of two
lists. Otherwise, this should be pretty intutive - we first run the instructions
to create the parameter, then we run the instructions to create the function,
and finally, we combine them using MkApp.
It's variables that once again force us to adjust our strategy. If our
program is well-typed, we know our variable will be on the stack:
our definition of Unwind makes it so for functions, and we will
define our case expression compilation scheme to match. However,
we still need to know __where__ on the stack each variable is,
and this changes as the stack is modified.
To accommodate for this, we define an environment, \\(\\rho\\),
to be a partial function mapping variable names to thier
offsets on the stack. We write \\(\\rho = [x \\rightarrow n, y \\rightarrow m]\\)
to say "the environment \\(\\rho\\) maps variable \\(x\\) to stack offset \\(n\\),
and variable \\(y\\) to stack offset \\(m\\)". We also write \\(\\rho \\; x\\) to
say "look up \\(x\\) in \\(\\rho\\)", since \\(\\rho\\) is a function. Finally,
to help with the ever-changing stack, we define an augmented environment
\\(\\rho^{+n}\\), such that \\(\\rho^{+n} \\; x = \\rho \\; x + n\\). In words,
this basically means "\\(\\rho^{+n}\\) has all the variables from \\(\\rho\\),
but their addresses are incremented by \\(n\\)". We now pass \\(\\rho\\)
in to \\(\\mathcal{C}\\) together with the expression \\(e\\). Let's
rewrite our first two rules. For numbers:
$$
\\mathcal{C} ⟦n⟧ \\; \\rho = [\\text{PushInt} \\; n]
$$
For function application:
$$
\\mathcal{C} ⟦e\_1 \\; e\_2⟧ \\; \\rho = \\mathcal{C} ⟦e\_2⟧ \\; \\rho ⧺ \\mathcal{C} ⟦e\_1⟧ \\; \\rho^{+1} ⧺ [\\text{MkApp}]
$$
Notice how in that last rule, we passed in \\(\\rho^{+1}\\) when compiling the function's expression. This is because
the result of running the instructions for \\(e\_2\\) will have left on the stack the function's parameter. Whatever
was at the top of the stack (and thus, had index 0), is now the second element from the top (address 1). The
same is true for all other things that were on the stack. So, we increment the environment accordingly.
With the environment, the variable rule is simple:
$$
\\mathcal{C} ⟦x⟧ \\; \\rho = [\\text{Push} \\; (\\rho \\; x)]
$$
One more thing. If we run across a function name, we want to
use PushGlobal rather than Push. Defining \\(f\\) to be a name
of a global function, we capture this using the following rule:
$$
\\mathcal{C} ⟦f⟧ \\; \\rho = [\\text{PushGlobal} \\; f]
$$
Now it's time for us to compile case expressions, but there's a bit of
an issue - our case expressions branches don't map one-to-one with
the \\(t \\rightarrow i\_t\\) format of the Jump instruction.
This is because we allow for name patterns in the form \\(x\\),
which can possibly match more than one tag. Consider this
rather useless example:
```
data Bool = { True, False }
defn weird b = { case b of { b -> { False } } }
```
We only have one branch, but we have two tags that should
lead to it! Not only that, but variable patterns are
location-dependent: if a variable pattern comes
before a constructor pattern, then the constructor
pattern will never be reached. On the other hand,
if a constructor pattern comes before a variable
pattern, it will be tried before the varible pattern,
and thus is reachable.
We will ignore this problem for now - we will define our semantics
as though each case expression branch can match exactly one tag.
In our C++ code, we will write a conversion function that will
figure out which tag goes to which sequence of instructions.
Effectively, we'll be performing [desugaring](https://en.wikipedia.org/wiki/Syntactic_sugar).
Now, on to defining the compilation rules for case expressions.
It's helpful to define compiling a single branch of a case expression
separately. For a branch in the form \\(t \\; x\_1 \\; x\_2 \\; ... \\; x\_n \\rightarrow \text{body}\\),
we define a compilation scheme \\(\\mathcal{A}\\) as follows:
$$
\\begin{align}
\\mathcal{A} ⟦t \\; x\_1 \\; ... \\; x\_n \\rightarrow \text{body}⟧ \\; \\rho & =
t \\rightarrow [\\text{Split} \\; n] \\; ⧺ \\; \\mathcal{C}⟦\\text{body}⟧ \\; \\rho' \\; ⧺ \\; [\\text{Slide} \\; n] \\\\\\
\text{where} \\; \\rho' &= \\rho^{+n}[x\_1 \\rightarrow 0, ..., x\_n \\rightarrow n - 1]
\\end{align}
$$
First, we run Split - the node on the top of the stack is a packed constructor,
and we want access to its member variables, since they can be referenced by
the branch's body via \\(x\_i\\). For the same reason, we must make sure to include
\\(x\_1\\) through \\(x\_n\\) in our environment. Furthermore, since the split values now occupy the stack,
we have to offset our environment by \\(n\\) before adding bindings to our new variables.
Doing all these things gives us \\(\\rho'\\), which we use to compile the body, placing
the resulting instructions after Split. This leaves us with the desired graph on top of
the stack - the only thing left to do is to clean up the stack of the unpacked values,
which we do using Slide.
Notice that we didn't just create instructions - we created a mapping from the tag \\(t\\)
to the instructions that correspond to it.
Now, it's time for compiling the whole case expression. We first want
to construct the graph for the expression we want to perform case analysis on.
Next, we want to evaluate it (since we need a packed value, not a graph,
to read the tag). Finally, we perform a jump depending on the tag. This
is capture by the following rule:
$$
\\mathcal{C} ⟦\\text{case} \\; e \\; \\text{of} \\; \\text{alt}_1 ... \\text{alt}_n⟧ \\; \\rho =
\\mathcal{C} ⟦e⟧ \\; \\rho \\; ⧺ [\\text{Eval}, \\text{Jump} \\; [\\mathcal{A} ⟦\\text{alt}_1⟧ \; \\rho, ..., \\mathcal{A} ⟦\\text{alt}_n⟧ \; \\rho]]
$$
This works because \\(\\mathcal{A}\\) creates not only instructions,
but also a tag mapping. We simply populate our Jump instruction such mappings
resulting from compiling each branch.
You may have noticed that we didn't add rules for binary operators. Just like
with type checking, we treat them as function calls. However, rather that constructing
graphs when we have to instantiate those functions, we simply
evaluate the arguments and perform the relevant arithmetic operation using BinOp.
We will do a similar thing for constructors.
### Implementation
With that out of the way, we can get around to writing some code. Let's
first define C++ structs for the instructions of the G-machine:
{{< codeblock "C++" "compiler/06/instruction.hpp" >}}
We can now envision a method on the `ast` struct that takes an environment
(just like our compilation scheme takes the environment \\(\\rho\\\)),
and compiles the `ast`. Rather than returning a vector
of instructions (which involves copying, unless we get some optimization kicking in),
we'll pass a reference to a vector to our method. The method will then place the generated
instructions into the vector.
There's one more thing to be considered. How do we tell apart a "global"
from a variable? A naive solution would be to take a list or map of
global functions as a third parameter to our `compile` method.
But there's an easier way! We know that the program passed type checking.
This means that every referenced variable exists. From then, the situation is easy -
if actual variable names are kept in the environment, \\(\\rho\\), then whenever
we see a variable that __isn't__ in the current environment, it must be a function name.
Having finished contemplating out method, it's time to define a signature:
```C++
virtual void compile(const env_ptr env, std::vector<instruction>& into) const;
```
Ah, but now we have to define "environment". Let's do that:
{{< codeblock "C++" "compiler/06/env.hpp" >}}
And now, we begin our implementation.

View File

@@ -0,0 +1,160 @@
---
title: Compiling a Functional Language Using C++, Part 7 - Runtime
date: 2019-08-06T14:26:38-07:00
tags: ["C and C++", "Functional Languages", "Compilers"]
---
Wikipedia has the following definition for a __runtime__:
> A [runtime] primarily implements portions of an execution model.
We know what our execution model is! We talked about it in Part 5 - it's the
lazy graph reduction we've specified. Creating and manipulating
graph nodes is slightly above hardware level, and all programs in our
functional language will rely on such manipulation (it's how they run!). Furthermore,
most G-machine instructions are also above hardware level (especially unwind!).
Push and Slide and other instructions are pretty complex.
Most computers aren't stack machines. We'll have to implement
our own stack, and whenever a graph-building function will want to modify
the stack, it will have to call library routines for our stack implementation:
```C
void stack_push(struct stack* s, struct node_s* n);
struct node_s* stack_slide(struct stack* s, size_t c);
/* other stack operations */
```
Furthermore, we observe that Unwind does a lot of the heavy lifting in our
G-machine definition. After we build the graph,
Unwind is what picks it apart and performs function calls. Furthermore,
Unwind pushes Unwind back on the stack: once you've hit it,
you're continuing to Unwind until you reach a function call. This
effectively means we can implement Unwind as a loop:
```C
while(1) {
// Check for Unwind's first rule
// Check for Unwind's second rule
// ...
}
```
In this implementation, Unwind is in charge. We won't need to insert
the Unwind operations at the end of our generated functions, and you
may have noticed we've already been following this strategy in our
implementation of the G-machine compilation.
We can start working on an implementation of the runtime right now,
beginning with the nodes:
{{< codelines "C++" "compiler/07/runtime.h" 4 50 >}}
We have a variety of different nodes that can be on the stack, but without
the magic of C++'s `vtable` and RTTI, we have to take care of the bookkeeping
ourselves. We add an enum, `node_tag`, which we will use to indicate what
type of node we're looking at. We also add a "base class" `node_base`, which
contains the fields that all nodes must contain (only `tag` at the moment).
We then add to the beginning of each node struct a member of type
`node_base`. With this, a pointer to a node struct can be interpreted as a pointer
to `node_base`, which is our lowest common denominator. To go back, we
check the `tag` of `node_base`, and cast the pointer appropriately. This way,
we mimic inheritance, in a very basic manner.
We also add an `alloc_node`, which allocates a region of memory big enough
to be any node. We do this because we sometimes mutate nodes (replacing
expressions with the results of their evaluation), changing their type.
We then want to be able to change a node without reallocating memory.
Since the biggest node we have is `node_app`, that's the one we choose.
Finally, to make it easier to create nodes from our generated code,
we add helper functions like `alloc_num`, which allocate a given
node type, and set its tag and member fields appropriately. We
don't include such a function for `node_data`, since this
node will be created only in one possible way.
Here's the implementation:
{{< codelines "C" "compiler/07/runtime.c" 6 40 >}}
We now move on to implement some stack operations. Let's list them:
* `stack_init` and `stack_free` - one allocates memory for the stack,
the other releases it.
* `stack_push`, `stack_pop` and `stack_peek` - the classic stack operations.
We have `_peek` to take an offset, so we can peek relative to the top of the stack.
* `stack_popn` - pop off some number of nodes instead of one.
* `stack_slide` - the slide we specified in the semantics. Keeps the top, deletes the
next several nodes.
* `stack_update` - turns the node at the offset into an indirection to the result,
which we will use for lazy evaluation (modifying expressions with their reduced forms).
* `stack_alloc` - allocate indirection nodes on the stack. We will use this later.
* `stack_pack` and `stack_split` - Wrap and unwrap constructors on the stack.
We declare these in a header:
{{< codelines "C" "compiler/07/runtime.h" 52 68 >}}
And implement them as follows:
{{< codelines "C" "compiler/07/runtime.c" 42 116 >}}
Let's now talk about how this will connect to the code we generate. To get
a quick example, consider the `node_global` struct that we have declared above.
It has a member `function`, which is a __function pointer__ to a function
that takes a stack and returns void.
When we finally generate machine code for each of the functions
we have in our program, it will be made up of sequences of G-machine
operations expressed using assembly instructions. These instructions will still
have to manipulate the G-machine stack (they still represent G-machine operations!),
and thus, the resulting assembly subroutine will take as parameter a stack. It will
then construct the function's graph on that stack, as we've already seen. Thus,
we express a compiled top-level function as a subroutine that takes a stack,
and returns void. A global node holds in it the pointer to the function that it will call.
When our program will start, it will assume that there exists a top-level
function `f_main` that takes 0 parameters. It will take that function, call it
to produce the initial graph, and then let the unwind loop take care of the evaluation.
Thus, our program will initially look like this:
{{< codelines "C" "compiler/07/runtime.c" 154 159 >}}
As we said, we expect an externally-declared subroutine `f_main`. We construct
a global node for `f_main` with arity 0, and then start the execution using a function `eval`.
What's `eval`, though? It's the function that will take care of creating
a new stack, and evaluating the node that is passed to it using
our unwind loop. `eval` itself is pretty terse:
{{< codelines "C" "compiler/07/runtime.c" 144 152 >}}
We create a fresh program stack, start it off with whatever node
we want to evaluate, and have `unwind` take care of the rest.
`unwind` is a direct implementation of the rules from Part 5:
{{< codelines "C" "compiler/07/runtime.c" 118 142 >}}
We can now come up with some simple programs. Let's try
writing out, by hand, `main = { 320 + 6 }`. We end up with:
{{< codeblock "C" "compiler/07/examples/runtime1.c" >}}
If we add to the bottom of our `main` the following code:
```C
printf("%d\n", ((struct node_num*) result)->value);
```
And compile and run our code:
```
gcc runtime.c examples/runtime1.c
./a.out
```
We get the output `326`, which is exactly correct!
We now have a common set of functions and declarations
that serve to support the code we generate from our compiler.
Although this time, we wrote out `f_main` by hand, we will soon
use LLVM to generate code for `f_main` and more. Once we get
that going, we be able to compile our code!
Next time, we will start work on converting our G-machine instructions
into machine code. We will set up LLVM and get our very first
fully functional compiled programs in [Part 8 - LLVM]({{< relref "08_compiler_llvm.md" >}}).

View File

@@ -0,0 +1,578 @@
---
title: Compiling a Functional Language Using C++, Part 8 - LLVM
date: 2019-10-30T22:16:22-07:00
tags: ["C and C++", "Functional Languages", "Compilers"]
---
We don't want a compiler that can only generate code for a single
platform. Our language should work on macOS, Windows, and Linux,
on x86\_64, ARM, and maybe some other architectures. We also
don't want to manually implement the compiler for each platform,
dealing with the specifics of each architecture and operating
system.
This is where LLVM comes in. LLVM (which stands for __Low Level Virtual Machine__),
is a project which presents us with a kind of generic assembly language,
an __Intermediate Representation__ (IR). It also provides tooling to compile the
IR into platform-specific instructions, as well as to apply a host of various
optimizations. We can thus translate our G-machine instructions to LLVM,
and then use LLVM to generate machine code, which gets us to our ultimate
goal of compiling our language.
We start with adding LLVM to our CMake project.
{{< codelines "CMake" "compiler/08/CMakeLists.txt" 7 7 >}}
LLVM is a huge project, and has many components. We don't need
most of them. We do need the core libraries, the x86 assembly
generator, and x86 assembly parser. I'm
not sure why we need the last one, but I ran into linking
errors without them. We find the required link targets
for these components using this CMake command:
{{< codelines "CMake" "compiler/08/CMakeLists.txt" 19 20 >}}
Finally, we add the new include directories, link targets,
and definitions to our compiler executable:
{{< codelines "CMake" "compiler/08/CMakeLists.txt" 39 41 >}}
Great, we have the infrastructure updated to work with LLVM. It's
now time to start using the LLVM API to compile our G-machine instructions
into assembly. We start with `LLVMContext`. The LLVM documentation states:
> This is an important class for using LLVM in a threaded context.
> It (opaquely) owns and manages the core "global" data of LLVM's core infrastructure, including the type and constant uniquing tables.
We will have exactly one instance of such a class in our program.
Additionally, we want an `IRBuilder`, which will help us generate IR instructions,
placing them into basic blocks (more on that in a bit). Also, we want
a `Module` object, which represents some collection of code and declarations
(perhaps like a C++ source file). Let's keep these things in our own
`llvm_context` class. Here's what that looks like:
{{< codeblock "C++" "compiler/08/llvm_context.hpp" >}}
We include the LLVM context, builder, and module as members
of the context struct. Since the builder and the module need
the context, we initialize them in the constructor, where they
can safely reference it.
Besides these fields, we added
a few others, namely the `functions` and `struct_types` maps,
and the various `llvm::Type` subclasses such as `stack_type`.
We did this because we want to be able to call our runtime
functions (and use our runtime structs) from LLVM. To generate
a function call from LLVM, we need to have access to an
`llvm::Function` object. We thus want to have an `llvm::Function`
object for each runtime function we want to call. We could declare
a member variable in our `llvm_context` for each runtime function,
but it's easier to leave this to be an implementation
detail, and only have a dynamically created map between runtime
function names and their corresponding `llvm::Function` objects.
We populate the maps and other type-related variables in the
two methods, `create_functions()` and `create_types()`. To
create an `llvm::Function`, we must provide an `llvm::FunctionType`,
an `llvm::LinkageType`, the name of the function, and the module
in which the function is declared. Since we only have one
module (the one we initialized in the constructor) that's
the module we pass in. The name of the function is the same
as its name in the runtime. The linkage type is a little
more complicated - it tells LLVM the "visibility" of a function.
"Private" or "Internal" would hide this function from the linker
(like `static` functions in C). However, we want to do the opposite: our
generated functions should be accessible from other code.
Thus, our linkage type is "External".
The only remaining parameter is the `llvm::FunctionType`, which
is created using code like:
```C++
llvm::FunctionType::get(return_type, {param_type_1, param_type_2, ...}, is_variadic)
```
Declaring all the functions and types in our runtime is mostly
just tedious. Here are a few lines from `create_functions()`, which
give a very good idea of the rest of that method:
{{< codelines "C++" "compiler/08/llvm_context.cpp" 47 60 >}}
Similarly, here are a few lines from `create_types()`, from
which you can extrapolate the rest:
{{< codelines "C++" "compiler/08/llvm_context.cpp" 7 11 >}}
We also tell LLVM the contents of our structs, so that
we may later reference specific fields. This is just like
forward declaration - we can forward declare a struct
in C/C++, but unless we also declare its contents,
we can't access what's inside. Below is the code
for specifying the body of `node_base` and `node_app`.
{{< codelines "C++" "compiler/08/llvm_context.cpp" 19 26 >}}
There's still more functionality packed into `llvm_context`.
Let's next take a look into `custom_function`, and
the `create_custom_function` method. Why do we need
these? To highlight the need for the custom class,
let's take a look at `instruction_pushglobal` which
occurs at the G-machine level, and then at `alloc_global`,
which will be a function call generated as part of
the PushGlobal instruction. `instruction_pushglobal`'s
only member variable is `name`, which stands for
the name of the global function it's referencing. However,
`alloc_global` requires an arity argument! We can
try to get this information from the `llvm::Function`
corresponding to the global we're trying to reference,
but this doesn't get us anywhere: as far as LLVM
is concerned, any global function only takes one
parameter, the stack. The rest of the parameters
are given through that stack, and their number cannot
be easily deduced from the function alone.
Instead, we decide to store global functions together
with their arity. We thus create a class to combine
these two things (`custom_function`), define
a map from global function names to instances
of `custom_function`, and add a convenience method
(`create_custom_function`) that takes care of
constructing an `llvm::Function` object, creating
a `custom_function`, and storing it in the map.
The implementation for `custom_function` is
straightforward:
{{< codelines "C++" "compiler/08/llvm_context.cpp" 234 252 >}}
We create a function type, then a function, and finally
initialize a `custom_function`. There's one thing
we haven't seen yet in this function, which is the
`BasicBlock` class. We'll get to what basic blocks
are shortly, but for now it's sufficient to
know that the basic block gives us a place to
insert code.
This isn't the end of our `llvm_context` class: it also
has a variety of other `create_*` methods! Let's take a look
at their signatures. Most return either `void`,
`llvm::ConstantInt*`, or `llvm::Value*`. Since
`llvm::ConstantInt*` is a subclass of `llvm::Value*`, let's
just treat it as simply an `llvm::Value*` while trying
to understand these methods.
So, what is `llvm::Value`? To answer this question, let's
first understand how the LLVM IR works.
### LLVM IR
An important property of LLVM IR is that it is in __Single Static Assignment__
(SSA) form. This means that each variable can only be assigned to once. For instance,
if we use `<-` to represent assignment, the following program is valid:
```
x <- 1
y <- 2
z <- x + y
```
However, the following program is __not__ valid:
```
x <- 1
x <- x + 1
```
But what if we __do__ want to modify a variable `x`?
We can declare another "version" of `x` every time we modify it.
For instance, if we wanted to increment `x` twice, we'd do this:
```
x <- 1
x1 <- x + 1
x2 <- x1 + 1
```
In practice, LLVM's C++ API can take care of versioning variables on its own, by
auto-incrementing numbers associated with each variable we use.
Assigned to each variable is `llvm::Value`. The LLVM documentation states:
> It is the base class of all values computed by a program that may be used as operands to other values.
It's important to understand that `llvm::Value` __does not store the result of the computation__.
It rather represents how something may be computed. 1 is a value because it computed by
just returning 1. `x + 1` is a value because it is computed by adding the value inside of
`x` to 1. Since we cannot modify a variable once we've declared it, we will
keep assigning intermediate results to new variables, constructing new values
out of values that we've already specified.
This somewhat elucidates what the `create_*` functions do: `create_i8` creates an 8-bit integer
value, and `create_pop` creates a value that is computed by calling
our runtime `stack_pop` function.
Before we move on to look at the implementations of these functions,
we need to understand another concept from the world of compiler design:
__basic blocks__. A basic block is a sequence of instructions that
are guaranteed to be executed one after another. This means that a
basic block cannot have an if/else, jump, or any other type of control flow anywhere
except at the end. If control flow could appear inside the basic block,
there would be opporunity for execution of some, but not all,
instructions in the block, violating the definition. Every time
we add an IR instruction in LLVM, we add it to a basic block.
Writing control flow involves creating several blocks, with each
block serving as the destination of a potential jump. We will
see this used to compile the Jump instruction.
### Generating LLVM IR
Now that we understand what `llvm::Value` is, and have a vague
understanding of how LLVM is structured, let's take a look at
the implementations of the `create_*` functions. The simplest
is `create_i8`:
{{< codelines "C++" "compiler/08/llvm_context.cpp" 150 152 >}}
Not much to see here. We create an instance of the `llvm::ConstantInt` class,
from the actual integer given to the method. As we said before,
`llvm::ConstantInt` is a subclass of `llvm::Value`. Next up, let's look
at `create_pop`:
{{< codelines "C++" "compiler/08/llvm_context.cpp" 160 163 >}}
We first retrieve an `llvm::Function` associated with `stack_pop`
from our map, and then use `llvm::IRBuilder::CreateCall` to insert
a value that represents a function call into the currently
selected basic block (the builder's state is what
dictates what the "selected basic block" is). `CreateCall`
takes as parameters the function we want to call (`stack_pop`,
which we store into the `pop_f` variable), as well as the arguments
to the function (for which we pass `f->arg_begin()`).
Hold on. What the heck is `arg_begin()`? Why do we take a function
as a paramter to this method? The answer is fairly simple: this
method is used when we are
generating a function with signature `void f_(struct stack* s)`
(we discussed the signature in the previous post). The
parameter that we give to `create_pop` is this function we're
generating, and `arg_begin()` gets the value that represents
the first parameter to our function - `s`! Since `stack_pop`
takes a stack, we need to give it the stack we're working on,
and so we use `f->arg_begin()` to access it.
Most of the other functions follow this exact pattern, with small
deviations. However, another function uses a more complicated LLVM
instruction:
{{< codelines "C++" "compiler/08/llvm_context.cpp" 202 209 >}}
`unwrap_num` is used to cast a given node pointer to a pointer
to a number node, and then return the integer value from
that number node. It starts fairly innocently: we ask
LLVM for the type of a pointer to a `node_num` struct,
and then use `CreatePointerCast` to create a value
that is the same node pointer we're given, but now interpreted
as a number node pointer. We now have to access
the `value` field of our node. `CreateGEP` helps us with
this: given a pointer to a node, and two offsets
`n` and `k`, it effectively performs the following:
```C++
&(num_pointer[n]->kth_field)
```
The first offset, then, gives an index into the "array"
represented by the pointer, while the second offset
gives the index of the field we want to access. We
want to dereference the pointer (`num_pointer[0]`),
and we want the second field (`1`, when counting from 0).
Thus, we call `CreateGEP` with these offsets and our pointers.
This still leaves us with a pointer to a number, rather
than the number itself. To dereference the pointer, we use
`CreateLoad`. This gives us the value of the number node,
which we promptly return.
This concludes our implementation of the `llvm_context` -
it's time to move on to the G-machine instructions.
### G-machine Instructions to LLVM IR
Let's now envision a `gen_llvm` method on the `instruction` struct,
which will turn the still-abstract G-machine instruction
into tangible, close-to-metal LLVM IR. As we've seen
in our implementation of `llvm_context`, to access the stack, we need access to the first
argument of the function we're generating. Thus, we need this method
to accept the function whose instructions are
being converted to LLVM. We also pass in the
`llvm_context`, since it contains the LLVM builder,
context, module, and a map of globally declared functions.
With these things in mind, here's the signature for `gen_llvm`:
```C++
virtual void gen_llvm(llvm_context&, llvm::Function*) const;
```
Let's get right to it! `instruction_pushint` gives us an easy
start:
{{< codelines "C++" "compiler/08/instruction.cpp" 17 19 >}}
We create an LLVM integer constant with the value of
our integer, and push it onto the stack.
`instruction_push` is equally terse:
{{< codelines "C++" "compiler/08/instruction.cpp" 37 39 >}}
We simply peek at the value of the stack at the given
offset (an integer of the same size as `size_t`, which
we create using `create_size`). Once we have the
result of the peek, we push it onto the stack.
`instruction_pushglobal` is more involved. Let's take a look:
{{< codelines "C++" "compiler/08/instruction.cpp" 26 30 >}}
First, we retrive the `custom_function` associated with
the given global name. We then create an LLVM integer
constant representing the arity of the function,
and then push onto the stack the result of `alloc_global`,
giving it the function and arity just like it expects.
`instruction_pop` is also short, and doesn't require much
further explanation:
{{< codelines "C++" "compiler/08/instruction.cpp" 46 48 >}}
Some other instructions, such as `instruction_update`,
`instruction_pack`, `instruction_split`, `instruction_slide`,
`instruction_alloc` and `instruction_eval` are equally as simple,
and we omit them for the purpose of brevity.
What remains are two "meaty" functions, `instruction_jump` and
`instruction_binop`. Let's start with the former:
{{< codelines "C++" "compiler/08/instruction.cpp" 101 123 >}}
This is the one and only function in which we have to take
care of control flow. Conceptually, depending on the tag
of the `node_data` at the top of the stack, we want
to pick one of many branches and jump to it.
As we discussed, a basic block has to be executed in
its entirety; since the branches of a case expression
are mutually exclusive (only one of them is executed in any given case),
we have to create a separate basic block for each branch.
Given these blocks, we then want to branch to the correct one
using the tag of the node on top of the stack.
This is exactly what we do in this function. We first peek
at the node on top of the stack, and use `CreateGEP` through
`unwrap_data_tag` to get access to its tag. What we then
need is LLVM's switch instruction, created using `CreateSwitch`.
We must provide the switch with a "default" case in case
the tag value is something we don't recognize. To do this,
we create a "safety" `BasicBlock`. With this new safety
block in hand, we're able to call `CreateSwitch`, giving it
the tag value to switch on, the safety block to default to,
and the expected number of branches (to optimize memory allocation).
Next, we create a vector of blocks, and for each branch,
we append to it a corresponding block `branch_block`, into
which we insert the LLVM IR corresponding to the
instructions of the branch. No matter the branch we take,
we eventually want to come back to the same basic block,
which will perform the usual function cleanup via Update and Slide.
We re-use the safety block for this, and use `CreateBr` at the
end of each `branch_block` to perform an unconditional jump.
After we create each of the blocks, we use the `tag_mappings`
to add cases to the switch instruction, using `addCase`. Finally,
we set the builder's insertion point to the safety block,
meaning that the next instructions will insert their
LLVM IR into that block. Since we have all branches
jump to the safety block at the end, this means that
no matter which branch we take in the case expression,
we will still execute the subsequent instructions as expected.
Let's now look at `instruction_binop`:
{{< codelines "C++" "compiler/08/instruction.cpp" 139 150 >}}
In this instruction, we pop and unwrap two integers from
the stack (assuming they are integers). Depending on
the type of operation the instruction is set to, we
then push the result of the corresponding LLVM
instruction. `PLUS` calls LLVM's `CreateAdd` to insert
addition, `MINUS` calls `CreateSub`, and so on. No matter
what the operation was, we push the result onto the stack.
That's all for our instructions! We're so very close now. Let's
move on to compiling definitions.
### Definitions to LLVM IR
As with typechecking, to allow for mutually recursive functions,
we need to be able each global function from any other function.
We then take the same approah as before, going in two passes.
This leads to two new methods for `definition`:
```C++
virtual void gen_llvm_first(llvm_context& ctx) = 0;
virtual void gen_llvm_second(llvm_context& ctx) = 0;
```
The first pass is intended to register all functions into
the `llvm_context`, making them visible to other functions.
The second pass is used to actually generate the code for
each function, now having access to all the other global
functions. Let's see the implementation for `gen_llvm_first`
for `definition_defn`:
{{< codelines "C++" "compiler/08/definition.cpp" 58 60 >}}
Since `create_custom_function` already creates a function
__and__ registers it with `llvm_context`, this is
all we need. Note that we created a new member variable
for `definition_defn` which stores this newly created
function. In the second pass, we will populate this
function with LLVM IR from the definition's instructions.
We actually create functions for each of the constructors
of data types, but they're quite special: all they do is
pack their arguments! Since they don't need access to
the other global functions, we might as well create
their bodies then and there:
{{< codelines "C++" "compiler/08/definition.cpp" 101 112 >}}
Like in `definition_defn`, we use `create_custom_function`.
However, we then use `SetInsertPoint` to configure our builder to insert code into
the newly created function (which already has a `BasicBlock`,
thanks to that one previously unexplained line in `create_custom_function`!).
Since we decided to only include the Pack instruction, we generate
a call to it directly using `create_pack`. We follow this
up with `CreateRetVoid`, which tells LLVM that this is
the end of the function, and that it is now safe to return
from it.
Great! We now implement the second pass of `gen_llvm`. In
the case of `definition_defn`, we do almost exactly
what we did in the first pass of `definition_data`:
{{< codelines "C++" "compiler/08/definition.cpp" 62 68 >}}
As for `definition_data`, we have nothing to do in the
second pass. We're done!
### Getting Results
We're almost there. Two things remain. The first: our implementation
of `ast_binop`, implement each binary operation as simply a function call:
`+` calls `f_plus`, and so on. But so far, we have not implemented
`f_plus`, or any other binary operator function. We do this
in `main.cpp`, creating a function `gen_llvm_internal_op`:
{{< codelines "C++" "compiler/08/main.cpp" 70 83 >}}
We create a simple function body. We then append G-machine
instructions that take each argument, evaluate it,
and then perform the corresponding binary operation.
With these instructions in the body, we insert
them into a new function, just like we did in our code
for `definition_defn` and `definition_data`.
Finally, we write our `gen_llvm` function that we will
call from `main`:
{{< codelines "C++" "compiler/08/main.cpp" 125 141 >}}
It first creates the functions for
`+`, `-`, `*`, and `/`. Then, it calls the first
pass of `gen_llvm` on all definitions, followed
by the second pass. Lastly, it uses LLVM's built-in
functionality to print out the generated IR in
our module, and then uses a function `output_llvm`
to create an object file ready for linking.
To be very honest, I took the `output_llvm` function
almost entirely from instructional material for my university's
compilers course. The gist of it, though, is: we determine
the target architecture and platform, specify a "generic" CPU,
create a default set of options, and then generate an object file.
Here it is:
{{< codelines "C++" "compiler/08/main.cpp" 85 123 >}}
We now add a `generate_llvm` call to `main`.
Are we there?
Let's try to compile our first example, `works1.txt`. The
file:
{{< rawblock "compiler/08/examples/works1.txt" >}}
We run the following commands in our build directory:
```
./compiler < ../examples/work1.txt
gcc -no-pie ../runtime.c program.o
./a.out
```
Nothing happens. How anticlimactic! Our runtime has no way of
printing out the result of the evaluation. Let's change that:
{{< codelines "C++" "compiler/08/runtime.c" 157 183 >}}
Rerunning our commands, we get:
```
Result: 326
```
The correct result! Let's try it with `works2.txt`:
{{< rawblock "compiler/08/examples/works2.txt" >}}
And again, we get the right answer:
```
Result: 326
```
This is child's play, though. Let's try with something
more complicated, like `works3.txt`:
{{< rawblock "compiler/08/examples/works3.txt" >}}
Once again, our program does exactly what we intended:
```
Result: 3
```
Alright, this is neat, but we haven't yet confirmed that
lazy evaluation works. How about we try it with
`works5.txt`:
{{< rawblock "compiler/08/examples/works5.txt" >}}
Yet again, the program works:
```
Result: 9
```
At last, we have a working compiler!
While this is a major victory, we are not yet
finished with the compiler altogether. While
we allocate nodes whenever we need them, we
have not once uttered the phrase `free` in our
runtime. Our language works, but we have no way
of comparing numbers, no lambdas, no `let/in`.
In the next several posts, we will improve
our compiler to properly free unused memory
usign a __garbage collector__, implement
lambda functions using __lambda lifting__,
and use our Alloc instruction to implement `let/in` expressions. See
you there!

View File

@@ -0,0 +1,102 @@
---
title: "Thoughts on Better Explanations"
date: 2019-10-12T00:33:02-07:00
tags: ["Language Server Protocol"]
---
How do you explain how to write a program?
Instructional material is becoming more and more popular on the web, with
thousands of programming tutorials for languages, frameworks,
and technologies created on YouTube, Medium, and peole's
personal sites. And yet, there seem to be little standardization or
progress towards an "effective" way. Everyone is pasting code
examples, showing gists, or even sharing whole projects on GitHub.
When I was writing the earliest posts on this site, I did the same.
Write some code, copy paste it, be done. Write some code, link it,
be done. If I'm feeling fancy, write some code, gist it, be done.
It's not unlikely for code presented in this way
to become outdated and dysfunctional.
I discovered a whole new perspective when going through
[Software Foundations](https://softwarefoundations.cis.upenn.edu/). What's
different about that book is that the line between source code and instructional
text is blurred - the HTML is generated from the comments in the Coq file, and
code from the Coq file is included as snippets in the book. Rather than
having readers piece together the snippets from the HTML, it simply directed
them to the Coq file from which the page was generated. It maintained
both the benefits of a live code example, and of a textbook written to teach,
not to simply explain what the code does.
This is reminiscent of [Literate Programming](https://en.wikipedia.org/wiki/Literate_programming),
a style of programming in which the explanation of the program, in human-oriented order, is presented,
with code as supporting material. Tools such as CWEB implement Literate Programming, allowing
users to write files that are then converted into C source, and can be compiled as usual. I was intrigued
by the idea, but in all honesty, found it lacking.
For one, there is the problem of an extra processing step. Compilers are written to compile C, and not
CWEB files. Thus, a program must take CWEB source, convert it to C, and then a compiler must
convert the C code to machine language. This doesn't feel elegant - you're effectively
stripping the CWEB source files of the text you added to them. In technical terms, it's not really
that big of an issue - software build systems already have support for multiple processing steps,
and it would be hard to CWEB a piece of software large enough that the intermediate step will cause problems.
Another issue is the lack of universality. CWEB is specialized for C. WEB, the original literate programming
tool, is specialized for Pascal. There's tools that are language agnostic, of course, such as noweb. But
the [Wikipedia page for noweb](https://en.wikipedia.org/wiki/Noweb) drops this bomb:
> noweb defines a specific file format and a file is likely to interleave three different formats
> (noweb, latex and the language used for the software). This is not recognised by other software development
> tools and consequently using noweb excludes the use of UML or code documentation tools.
This may be the worst trade deal in the history of trade deals, maybe ever! By trying to explain how our
code works, __we sacrifce all other tooling__. Worse, because Literal Programming encourages presenting
code in fragments and out of order, it is particularly difficult to reason about programs in an automated
setting.
When I present code to a reader, I want to write it with the use of existing tooling. I want my syntax
highlighting. I want my linting. I want my build system. And in the same way, a user who is reading
my code wants to be able to view it, change it, experiment with it. Furthermore, though, I want
to be able to guide the reader's attention. Text-in-comments works great for Coq, but other languages like
C++, in which the order of declarations matters, may not be as suited for such an approach.
In essense, I want:
* The power of language-specific tooling, without having to extend the tooling itself
* A universal way of describing a program in any language
* A way of maintaining synchrony between the explanation and the source
I have an idea of a piece of software that can do such a thing.
### A Language Server Based Tool
It is a well known problem that various editors support different languages
with mixed success. The idea of the Language Server Protocol is to allow
for a program (the server) to be in charge of making sense of the code, and then
communicate the results to an editor. The editor, in that case,
doesn't have to do as much heavy lifting, and instead just queries
the language server when it needs information.
While this technology is used for text editors, I think it can
be adapted to educational texts that reference a particular
codebase. I envision the following workflow:
1. An author writes their tutorial/book/blog post
in their markup language of choice (Markdown).
2. They reference a fragment of code (a function, a variable)
through a specialized syntax.
3. When the HTML/LaTeX output is created, a language server
is started. The language server uses information from
the references in step 2 to insert code fragments into
the generated output.
After each "conversion" of source text to HTML/LaTeX, the
code in the generated snippets will be in sync with the codebase.
At the same time, changing the source text will not require changing
the source files. Finally, since language servers exist for most
established languages, this sytem can work nearly out of the box,
and even be added to established projects with no changes to the projects
themselves.
Of course, this is just a rough idea. I'm not sure how plausible it is
to include snippets with the use of Language Server Protocol. But
I certainly would like to try!

178
content/blog/sidenotes.md Normal file
View File

@@ -0,0 +1,178 @@
---
title: JavaScript-Free Sidenotes in Hugo
date: 2019-12-07T00:23:34-08:00
tags: ["Website", "Hugo", "CSS"]
---
A friend recently showed me a website, the design of which I really liked:
Gwern Branwen's [personal website](https://www.gwern.net/index). In particular,
I found that __sidenotes__ were a feature that I didn't even know I needed.
A lot of my writing seems to use small parenthesized remarks (like this), which,
although it doesn't break the flow in a grammatical sense, lengthens the
sentence, and makes it harder to follow. Since I do my best to write content
to help explain stuff (like the [compiler series]({{ relref "00_compiler_intro.md" }})),
making sentences __more__ difficult to understand is a no-go.
So, what do they look like?
{{< sidenote "right" "example-note" "Here's an example sidenote." >}}
This is this example note's content.
{{< /sidenote >}}
If you're on a mobile device, the content is hidden by default: there's no
"side" on which the note fits. In this case, you can click or tap the underlined
portion of the text, which is the part to which the sidenote is related or refers to.
Otherwise, the example sidenote should be visible
{{< sidenote "left" "left-note" "on the right side of the screen." >}}
Sidenotes can also appear on the left of the screen, to help prevent situations
in which there are too many sidenotes and not enough space.
{{< /sidenote >}}
A major goal of mine in implementing these sidenotes was to avoid the use of JavaScript.
This is driven by my recent installation of uMatrix. uMatrix is an extension
that blocks JavaScript loaded from domains other than the one you're visiting.
To my frustration, a lot of the websites I commonly visit ended up broken:
[Canvas](https://github.com/instructure/canvas-lms), Discord, YouTube, and
Disquss all fail catastrophically when they aren't allowed to load dozens of scripts
from various sources. Out of spite, I want my site to work without any JavaScript,
and these notes are no exception.
### Implementation
Some of this work has been inspired by
[this article](https://www.kooslooijesteijn.net/blog/semantic-sidenotes).
The first concern was not having to write raw HTML to add the side notes,
but this is fairly simple with Hugo's shortcodes: I write the HTML once in
a new `sidenote` shortcode, then call the shortcode from my posts. The next
issue is a matter of HTML standards. Markdown rendering generates `<p>` tags.
According the to spec, `<p>` tags cannot have a block element inside
them. When you _try_ to put a block element, such as `<div>` inside `<p>`,
the browser will automatically close the `<p>` tag, breaking the rest of the page.
So, even though conceptually (and visually) the content of the sidenote is a block,
{{< sidenote "right" "markdown-note" "it has to be inside an inline element." >}}
There's another consequence to this. Hugo implements Markdown inside shortcodes
by rendering the "inner" part of the shortcode, substituting the result into the
shortcode's definition, and then finally placing that into the final output. Since
the Markdown renderer wraps text in paragraphs, which are block elements, the
inside of the shortcode ends up with block elements. The same tag-closing issue
manifests, and the view ends up broken. So, Markdown cannot be used inside sidenotes.
{{< /sidenote >}}
That's not too bad, overall. We end up with a shortcode definition as follows:
```HTML
<span class="sidenote">
<label class="sidenote-label" for="{{ .Get 1 }}">{{ .Get 2 }}</label>
<input class="sidenote-checkbox" type="checkbox" id="{{ .Get 1 }}"></input>
<span class="sidenote-content sidenote-{{ .Get 0 }}">
{{ .Inner }}
</span>
</span>
```
As Koos points out, "label" works as a semantic tag for the text that references
the sidenote. It also helps us with the checkbox
`<input>`, which we will examine later. Since it will receive its own style,
the inner content of the sidenote is wrapped in another `<span>`. Let's
get started on styling the parts of a sidenote, beginning with the content:
```SCSS
.sidenote-content {
display: block;
position: absolute;
width: $sidenote-width;
box-sizing: border-box;
margin-top: -1.5em;
&.sidenote-right {
right: 0;
margin-right: -($sidenote-width + $sidenote-offset);
}
// ...
}
```
As you can see from the sidenotes on this page, they are displayed as a block.
We start with that, then switch the sidenotes to be positioned absolutely, so that
we can place them exactly to the right of the content, and then some. We also
make sure that the box is sized __exactly__ the amount in `$sidenote-width`, by
ensuring that the border and padding are included in the size calculation
using `border-box`. We also hide the checkbox:
```SCSS
.sidenote-checkbox {
display: none;
}
```
Finally, let's make one more adjustment to the sidenote and its label: when
you hover over one of them, the other will change its appearence slightly,
so that you can tell which note refers to which label. We can do so by detecting
hover of the parent element:
```SCSS
.sidenote {
&:hover {
.sidenote-label { /* style for the label */ }
.sidenote-content { /* style for the sidenote */ }
}
}
```
### Hiding and Showing
So far, it's hard to imagine where JavaScript would come in. If you were
always looking at the page from a wide-screen machine, it wouldn't at all.
Unfortunately phones don't leave a lot of room for margins and sidenotes, so to
make sure that these notes are visible to mobile users, we want to show
them inline. Since the entire idea of sidenotes is to present more information
__without__ interrupting the main text, we don't want to plop something down
in the middle of the screen by default. So we hide sidenotes, and show them only
when their label is clicked.
Gwern's site doesn't show the notes on mobile at all (when simulated using Firefox's
responsive design mode), and Koos uses JavaScript to toggle the sidenote text. We will
go another route.
This is where the checkbox `<input>` comes in. When the `<input>` checkbox is
checked, we show the sidenote text, as a block, in the middle of the page. When
it is not checked, we keep it hidden. Of course, keeping a checkbox in the middle
of the page is not pretty, so we keep it hidden. Rather than clicking the checkbox
directly,
{{< sidenote "right" "accessibility-note" "the users can click the text that refers to the sidenote," >}}
I'm not sure about the accessibility of such an arrangement. The label is semantic, sure, but
the checkbox is more sketchy. Take this design with a grain of salt.
{{< /sidenote >}} which happens
to also be a label for the checkbox input. Clicking the label toggles the checkbox,
and with it the display of the sidenote. We can use the following CSS to
get that to work:
```SCSS
.sidenote-content {
// ...
@media screen and
(max-width: $container-width + 2 * ($sidenote-width + 2 * $sidenote-offset)) {
position: static;
margin-top: 10px;
margin-bottom: 10px;
width: 100%;
display: none;
.sidenote-checkbox:checked ~ & {
display: block;
}
&.sidenote-right {
margin-right: 0px;
}
// ...
}
// ...
}
```
We put the position back to `static`, and add margins on the top and bottom of the node.
We keep the `display` to `none`, unless the checkbox contained in the sidenote span
is checked. Finally, we reset the margin we created earlier, since we're not moving this
note anywhere.
### Conclusion
Here, we've implemented sidenotes in Hugo with zero JavaScript. They work well on
both mobile and desktop devices, though their accessibility is, at present,
somewhat uncertain.

View File

@@ -1,7 +1,6 @@
--- ---
title: Switching to a Static Site Generator title: Switching to a Static Site Generator
date: 2019-08-05T01:13:58-07:00 date: 2019-08-05T01:13:58-07:00
draft: true
tags: ["Website"] tags: ["Website"]
--- ---
A long time ago, I decided to try out Jekyll for my website. However, it all felt too A long time ago, I decided to try out Jekyll for my website. However, it all felt too

View File

@@ -0,0 +1,70 @@
@import "style.scss";
$sidenote-width: 350px;
$sidenote-offset: 15px;
.sidenote {
&:hover {
.sidenote-label {
background-color: $primary-color;
color: white;
}
.sidenote-content {
border: 2px dashed;
padding: 9px;
border-color: $primary-color;
}
}
}
.sidenote-label {
border-bottom: 2px solid $primary-color;
}
.sidenote-checkbox {
display: none;
}
.sidenote-content {
display: block;
position: absolute;
width: $sidenote-width;
margin-top: -1.5em;
&.sidenote-right {
right: 0;
margin-right: -($sidenote-width + $sidenote-offset);
}
&.sidenote-left {
left: 0;
margin-left: -($sidenote-width + $sidenote-offset);
}
@media screen and
(max-width: $container-width + 2 * ($sidenote-width + 2 * $sidenote-offset)) {
position: static;
margin-top: 10px;
margin-bottom: 10px;
width: 100%;
display: none;
.sidenote-checkbox:checked ~ & {
display: block;
}
&.sidenote-left {
margin-left: 0px;
}
&.sidenote-right {
margin-right: 0px;
}
}
@include bordered-block;
padding: 10px;
box-sizing: border-box;
text-align: left;
}

View File

@@ -1,15 +1,29 @@
$container-width: 700px; $container-width: 800px;
$primary-color: #36e281; $primary-color: #36e281;
$primary-color-dark: #1dc868; $primary-color-dark: darken($primary-color, 10%);
$code-color: #f0f0f0;
$code-color-dark: darken($code-color, 10%);
$border-color: #bfbfbf;
$font-heading: "Lora", serif; $font-heading: "Lora", serif;
$font-body: "Raleway", serif; $font-body: "Raleway", serif;
$font-code: "Inconsolata", monospace; $font-code: "Inconsolata", monospace;
$standard-border: 1px solid $border-color;
@mixin bordered-block {
border: $standard-border;
border-radius: 2px;
}
body { body {
font-family: $font-body; font-family: $font-body;
font-size: 1.0em; font-size: 1.0em;
line-height: 1.5; line-height: 1.5;
margin-bottom: 1em; margin-bottom: 1em;
text-align: justify;
}
main {
position: relative;
} }
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
@@ -17,6 +31,7 @@ h1, h2, h3, h4, h5, h6 {
margin-top: .5em; margin-top: .5em;
font-family: $font-heading; font-family: $font-heading;
font-weight: normal; font-weight: normal;
text-align: left;
a { a {
color: black; color: black;
@@ -29,14 +44,14 @@ h1, h2, h3, h4, h5, h6 {
code { code {
font-family: $font-code; font-family: $font-code;
background-color: #f0f0f0; background-color: $code-color;
} }
pre code { pre code {
display: block; display: block;
padding: 0.5em; padding: 0.5em;
overflow-x: auto; overflow-x: auto;
background-color: #f0f0f0; background-color: $code-color;
} }
.container { .container {
@@ -57,6 +72,7 @@ pre code {
color: white; color: white;
transition: color 0.25s; transition: color 0.25s;
transition: background-color 0.25s; transition: background-color 0.25s;
text-align: left;
&:focus { &:focus {
outline: none; outline: none;
@@ -126,18 +142,6 @@ a {
text-decoration: none; text-decoration: none;
} }
input[type="text"], input[type="password"], textarea {
padding: 0.5em 0em 0.5em 0em;
margin: 0.5em 0.5em 0.5em 0em;
border: none;
border-bottom: solid 0.2em $primary-color-dark;
transition: border 0.25s;
&:focus {
outline: none;
border-bottom: solid 0.2em white;
}
}
img { img {
max-width: 100% max-width: 100%
} }

View File

@@ -1,5 +1,5 @@
<!DOCTYPE html> <!DOCTYPE html>
<html> <html lang="{{ .Site.Language.Lang }}">
{{- partial "head.html" . -}} {{- partial "head.html" . -}}
<body> <body>
{{- partial "header.html" . -}} {{- partial "header.html" . -}}

View File

@@ -6,7 +6,9 @@
<link href="https://fonts.googleapis.com/css?family=Inconsolata|Lora|Raleway" rel="stylesheet"> <link href="https://fonts.googleapis.com/css?family=Inconsolata|Lora|Raleway" rel="stylesheet">
<link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css"> <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css">
{{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }} {{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }}
{{ $sidenotes:= resources.Get "scss/sidenotes.scss" | resources.ToCSS | resources.Minify }}
<link rel="stylesheet" href="{{ $style.Permalink }}"> <link rel="stylesheet" href="{{ $style.Permalink }}">
<link rel="stylesheet" href="{{ $sidenotes.Permalink }}">
<script src='https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML' async></script> <script src='https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML' async></script>
{{ template "_internal/google_analytics.html" . }} {{ template "_internal/google_analytics.html" . }}

View File

@@ -0,0 +1,7 @@
<span class="sidenote">
<label class="sidenote-label" for="{{ .Get 1 }}">{{ .Get 2 }}</label>
<input class="sidenote-checkbox" type="checkbox" id="{{ .Get 1 }}"></input>
<span class="sidenote-content sidenote-{{ .Get 0 }}">
{{ .Inner }}
</span>
</span>