Compare commits
103 Commits
8bf67c7dc3
...
donations
| Author | SHA1 | Date | |
|---|---|---|---|
| d5f478b3c6 | |||
| 0f96b93532 | |||
| 5449affbc8 | |||
| 2cf19900db | |||
| efe5d08430 | |||
| 994e9ed8d2 | |||
| 72af5cb7f0 | |||
| 308ee34025 | |||
| 9839befdf1 | |||
| d688df6c92 | |||
| 24eef25984 | |||
| 77ae0be899 | |||
| ca939da28e | |||
| 5d0920cb6d | |||
| d1ea7b5364 | |||
| ebdb986e2a | |||
| 4bb6695c2e | |||
| a6c5a42c1d | |||
| c44c718d06 | |||
| 5e4097453b | |||
| bfeae89ab5 | |||
| 755364c0df | |||
| dcb1e9a736 | |||
| c8543961af | |||
| cbad3b76eb | |||
| b3ff2fe135 | |||
| 6a6f25547e | |||
| 43dfee56cc | |||
| 6f9a2ce092 | |||
| 06014eade9 | |||
| 6f92a50c83 | |||
| 60eb50737d | |||
| 250746e686 | |||
| 3bac151b08 | |||
| c61d9ccb99 | |||
| 56ad03b833 | |||
| 2f9e6278ba | |||
| 17e0fbc6fb | |||
| 7ee7feadf3 | |||
| b36ea558a3 | |||
| 17d6a75465 | |||
| d5541bc985 | |||
| 98a46e9fd4 | |||
| 2e3074df00 | |||
| b3dc3e690b | |||
| b1943ede2f | |||
| 0467e4e12f | |||
| 8164624cee | |||
| e0451d026c | |||
| 1f1345477f | |||
| 44529e872f | |||
| a10996954e | |||
| 4d1dfb5f66 | |||
| f97b624688 | |||
| 8215c59122 | |||
| eb97bd9c3e | |||
| d2e100fe4b | |||
| de09a1f6bd | |||
| c40672e762 | |||
| 565d4a6955 | |||
| 8f0f2eb35e | |||
| 234b795157 | |||
| e317c56c99 | |||
| 29d12a9914 | |||
| b459e9cbfe | |||
| 52abe73ef7 | |||
| f0fe481bcf | |||
| 222446a937 | |||
| e7edd43034 | |||
| 2bc2c282e1 | |||
| 5cc92d3a9d | |||
| 4be8a25699 | |||
| d3421733e1 | |||
| 4c099a54e8 | |||
| 9f77f07ed2 | |||
| 04ab1a137c | |||
| 53744ac772 | |||
| 50a1c33adb | |||
| d153af5212 | |||
| a336b27b6c | |||
| 97eb4b6e3e | |||
| 430768eac5 | |||
| 5db864881a | |||
| d3b1047d37 | |||
| 98cac103c4 | |||
| 7226d66f67 | |||
| 8a352ed3ea | |||
| 02f8306c7b | |||
| cf6f353f20 | |||
| 7a631b3557 | |||
| 5e13047846 | |||
| c17d532802 | |||
| 55e4e61906 | |||
| f2f88ab9ca | |||
| ba418d357f | |||
| 0e3f16139d | |||
| 55486d511f | |||
| 6080094c41 | |||
| 6b8d3b0f8a | |||
| 725958137a | |||
| 1f6b4bef74 | |||
| fe1e0a6de0 | |||
| 1f3c42fc44 |
9
.gitmodules
vendored
Normal file
9
.gitmodules
vendored
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
[submodule "code/aoc-2020"]
|
||||||
|
path = code/aoc-2020
|
||||||
|
url = https://dev.danilafe.com/Advent-of-Code/AdventOfCode-2020.git
|
||||||
|
[submodule "code/libabacus"]
|
||||||
|
path = code/libabacus
|
||||||
|
url = https://dev.danilafe.com/Experiments/libabacus
|
||||||
|
[submodule "themes/vanilla"]
|
||||||
|
path = themes/vanilla
|
||||||
|
url = https://dev.danilafe.com/Web-Projects/vanilla-hugo.git
|
||||||
36
assets/scss/donate.scss
Normal file
36
assets/scss/donate.scss
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
@import "../../themes/vanilla/assets/scss/mixins.scss";
|
||||||
|
|
||||||
|
.donation-methods {
|
||||||
|
padding: 0;
|
||||||
|
border: none;
|
||||||
|
border-spacing: 0 0.5rem;
|
||||||
|
|
||||||
|
td {
|
||||||
|
padding: 0;
|
||||||
|
overflow: hidden;
|
||||||
|
|
||||||
|
&:first-child {
|
||||||
|
@include bordered-block;
|
||||||
|
text-align: right;
|
||||||
|
border-right: none;
|
||||||
|
border-top-right-radius: 0;
|
||||||
|
border-bottom-right-radius: 0;
|
||||||
|
padding-left: 0.5em;
|
||||||
|
padding-right: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
&:last-child {
|
||||||
|
@include bordered-block;
|
||||||
|
border-top-left-radius: 0;
|
||||||
|
border-bottom-left-radius: 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
code {
|
||||||
|
width: 100%;
|
||||||
|
box-sizing: border-box;
|
||||||
|
border: none;
|
||||||
|
display: inline-block;
|
||||||
|
padding: 0.25rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
11
assets/scss/gametheory.scss
Normal file
11
assets/scss/gametheory.scss
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
@import "variables.scss";
|
||||||
|
@import "mixins.scss";
|
||||||
|
|
||||||
|
.assumption-number {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.assumption {
|
||||||
|
@include bordered-block;
|
||||||
|
padding: 0.8rem;
|
||||||
|
}
|
||||||
1
code/aoc-2020
Submodule
1
code/aoc-2020
Submodule
Submodule code/aoc-2020 added at 7a8503c3fe
@@ -37,6 +37,9 @@ add_executable(compiler
|
|||||||
instruction.cpp instruction.hpp
|
instruction.cpp instruction.hpp
|
||||||
graph.cpp graph.hpp
|
graph.cpp graph.hpp
|
||||||
global_scope.cpp global_scope.hpp
|
global_scope.cpp global_scope.hpp
|
||||||
|
parse_driver.cpp parse_driver.hpp
|
||||||
|
mangler.cpp mangler.hpp
|
||||||
|
compiler.cpp compiler.hpp
|
||||||
${BISON_parser_OUTPUTS}
|
${BISON_parser_OUTPUTS}
|
||||||
${FLEX_scanner_OUTPUTS}
|
${FLEX_scanner_OUTPUTS}
|
||||||
main.cpp
|
main.cpp
|
||||||
|
|||||||
@@ -3,6 +3,8 @@
|
|||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include "binop.hpp"
|
#include "binop.hpp"
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
#include "env.hpp"
|
#include "env.hpp"
|
||||||
|
|
||||||
@@ -45,7 +47,7 @@ type_ptr ast_lid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
|||||||
this->env = env;
|
this->env = env;
|
||||||
type_scheme_ptr lid_type = env->lookup(id);
|
type_scheme_ptr lid_type = env->lookup(id);
|
||||||
if(!lid_type)
|
if(!lid_type)
|
||||||
throw type_error(std::string("unknown identifier ") + id, loc);
|
throw type_error("unknown identifier " + id, loc);
|
||||||
return lid_type->instantiate(mgr);
|
return lid_type->instantiate(mgr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -54,15 +56,10 @@ void ast_lid::translate(global_scope& scope) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
auto mangled_name = this->env->get_mangled_name(id);
|
|
||||||
|
|
||||||
// Local names shouldn't need mangling.
|
|
||||||
assert(!(mangled_name != id && !this->env->is_global(id)));
|
|
||||||
|
|
||||||
into.push_back(instruction_ptr(
|
into.push_back(instruction_ptr(
|
||||||
(env->has_variable(mangled_name) && !this->env->is_global(id)) ?
|
(this->env->is_global(id)) ?
|
||||||
(instruction*) new instruction_push(env->get_offset(id)) :
|
(instruction*) new instruction_pushglobal(this->env->get_mangled_name(id)) :
|
||||||
(instruction*) new instruction_pushglobal(mangled_name)));
|
(instruction*) new instruction_push(env->get_offset(id))));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_uid::print(int indent, std::ostream& to) const {
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
@@ -78,7 +75,7 @@ type_ptr ast_uid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
|||||||
this->env = env;
|
this->env = env;
|
||||||
type_scheme_ptr uid_type = env->lookup(id);
|
type_scheme_ptr uid_type = env->lookup(id);
|
||||||
if(!uid_type)
|
if(!uid_type)
|
||||||
throw type_error(std::string("unknown constructor ") + id, loc);
|
throw type_error("unknown constructor " + id, loc);
|
||||||
return uid_type->instantiate(mgr);
|
return uid_type->instantiate(mgr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -108,7 +105,7 @@ type_ptr ast_binop::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
|||||||
type_ptr ltype = left->typecheck(mgr, env);
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
type_ptr rtype = right->typecheck(mgr, env);
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
|
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
|
||||||
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op), loc);
|
if(!ftype) throw type_error("unknown binary operator " + op_name(op), loc);
|
||||||
|
|
||||||
// For better type errors, we first require binary function,
|
// For better type errors, we first require binary function,
|
||||||
// and only then unify each argument. This way, we can
|
// and only then unify each argument. This way, we can
|
||||||
@@ -215,6 +212,10 @@ type_ptr ast_case::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
|||||||
|
|
||||||
input_type = mgr.resolve(case_type, var);
|
input_type = mgr.resolve(case_type, var);
|
||||||
type_app* app_type;
|
type_app* app_type;
|
||||||
|
if(!(app_type = dynamic_cast<type_app*>(input_type.get())) ||
|
||||||
|
!dynamic_cast<type_data*>(app_type->constructor.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
return branch_type;
|
return branch_type;
|
||||||
}
|
}
|
||||||
@@ -226,209 +227,60 @@ void ast_case::translate(global_scope& scope) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct case_mappings {
|
|
||||||
using tag_type = typename T::tag_type;
|
|
||||||
std::map<tag_type, std::vector<instruction_ptr>> defined_cases;
|
|
||||||
std::optional<std::vector<instruction_ptr>> default_case;
|
|
||||||
|
|
||||||
std::vector<instruction_ptr>& make_case_for(tag_type tag) {
|
|
||||||
if(default_case)
|
|
||||||
throw type_error("attempted pattern match after catch-all");
|
|
||||||
return defined_cases[tag];
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<instruction_ptr>& make_default_case() {
|
|
||||||
if(default_case)
|
|
||||||
throw type_error("attempted repeated use of catch-all");
|
|
||||||
default_case.emplace(std::vector<instruction_ptr>());
|
|
||||||
return *default_case;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<instruction_ptr>& get_specific_case_for(tag_type tag) {
|
|
||||||
auto existing_case = defined_cases.find(tag);
|
|
||||||
assert(existing_case != defined_cases.end());
|
|
||||||
return existing_case->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<instruction_ptr>& get_default_case() {
|
|
||||||
assert(default_case);
|
|
||||||
return *default_case;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<instruction_ptr>& get_case_for(tag_type tag) {
|
|
||||||
if(case_defined_for(tag)) return get_specific_case_for(tag);
|
|
||||||
return get_default_case();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool case_defined_for(tag_type tag) {
|
|
||||||
return defined_cases.find(tag) != defined_cases.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool default_case_defined() { return default_case.has_value(); }
|
|
||||||
|
|
||||||
size_t defined_cases_count() { return defined_cases.size(); }
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
struct case_strategy_bool {
|
|
||||||
using tag_type = bool;
|
|
||||||
using repr_type = bool;
|
|
||||||
|
|
||||||
tag_type tag_from_repr(repr_type b) { return b; }
|
|
||||||
|
|
||||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
|
||||||
pattern_constr* cpat;
|
|
||||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())) ||
|
|
||||||
(cpat->constr != "True" && cpat->constr != "False") ||
|
|
||||||
cpat->params.size() != 0)
|
|
||||||
throw type_error(
|
|
||||||
"pattern cannot be converted to a boolean",
|
|
||||||
pt->loc);
|
|
||||||
return cpat->constr == "True";
|
|
||||||
}
|
|
||||||
|
|
||||||
void compile_branch(
|
|
||||||
const branch_ptr& branch,
|
|
||||||
const env_ptr& env,
|
|
||||||
repr_type repr,
|
|
||||||
std::vector<instruction_ptr>& into) {
|
|
||||||
branch->expr->compile(env_ptr(new env_offset(1, env)), into);
|
|
||||||
into.push_back(instruction_ptr(new instruction_slide(1)));
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t case_count(const type* type) {
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
void into_instructions(
|
|
||||||
const type* type,
|
|
||||||
case_mappings<case_strategy_bool>& ms,
|
|
||||||
std::vector<instruction_ptr>& into) {
|
|
||||||
if(ms.defined_cases_count() == 0) {
|
|
||||||
for(auto& instruction : ms.get_default_case())
|
|
||||||
into.push_back(std::move(instruction));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
into.push_back(instruction_ptr(new instruction_if(
|
|
||||||
std::move(ms.get_case_for(true)),
|
|
||||||
std::move(ms.get_case_for(false)))));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct case_strategy_data {
|
|
||||||
using tag_type = int;
|
|
||||||
using repr_type = std::pair<const type_data::constructor*, const std::vector<std::string>*>;
|
|
||||||
|
|
||||||
tag_type tag_from_repr(const repr_type& repr) { return repr.first->tag; }
|
|
||||||
|
|
||||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
|
||||||
pattern_constr* cpat;
|
|
||||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())))
|
|
||||||
throw type_error(
|
|
||||||
"pattern cannot be interpreted as constructor.",
|
|
||||||
pt->loc);
|
|
||||||
return std::make_pair(
|
|
||||||
&static_cast<const type_data*>(type)->constructors.find(cpat->constr)->second,
|
|
||||||
&cpat->params);
|
|
||||||
}
|
|
||||||
|
|
||||||
void compile_branch(
|
|
||||||
const branch_ptr& branch,
|
|
||||||
const env_ptr& env,
|
|
||||||
const repr_type& repr,
|
|
||||||
std::vector<instruction_ptr>& into) {
|
|
||||||
env_ptr new_env = env;
|
|
||||||
for(auto it = repr.second->rbegin(); it != repr.second->rend(); it++) {
|
|
||||||
new_env = env_ptr(new env_var(*it, new_env));
|
|
||||||
}
|
|
||||||
|
|
||||||
into.push_back(instruction_ptr(new instruction_split(repr.second->size())));
|
|
||||||
branch->expr->compile(new_env, into);
|
|
||||||
into.push_back(instruction_ptr(new instruction_slide(repr.second->size())));
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t case_count(const type* type) {
|
|
||||||
return static_cast<const type_data*>(type)->constructors.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
void into_instructions(
|
|
||||||
const type* type,
|
|
||||||
case_mappings<case_strategy_data>& ms,
|
|
||||||
std::vector<instruction_ptr>& into) {
|
|
||||||
instruction_jump* jump_instruction = new instruction_jump();
|
|
||||||
instruction_ptr inst(jump_instruction);
|
|
||||||
|
|
||||||
auto data_type = static_cast<const type_data*>(type);
|
|
||||||
for(auto& constr : data_type->constructors) {
|
|
||||||
if(!ms.case_defined_for(constr.second.tag)) continue;
|
|
||||||
jump_instruction->branches.push_back(
|
|
||||||
std::move(ms.get_specific_case_for(constr.second.tag)));
|
|
||||||
jump_instruction->tag_mappings[constr.second.tag] =
|
|
||||||
jump_instruction->branches.size() - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(ms.default_case_defined()) {
|
|
||||||
jump_instruction->branches.push_back(
|
|
||||||
std::move(ms.get_default_case()));
|
|
||||||
for(auto& constr : data_type->constructors) {
|
|
||||||
if(ms.case_defined_for(constr.second.tag)) continue;
|
|
||||||
jump_instruction->tag_mappings[constr.second.tag] =
|
|
||||||
jump_instruction->branches.size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
into.push_back(std::move(inst));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void compile_case(const ast_case& node, const env_ptr& env, const type* type, std::vector<instruction_ptr>& into) {
|
|
||||||
T strategy;
|
|
||||||
case_mappings<T> cases;
|
|
||||||
for(auto& branch : node.branches) {
|
|
||||||
pattern_var* vpat;
|
|
||||||
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
|
||||||
if(cases.defined_cases_count() == strategy.case_count(type))
|
|
||||||
throw type_error("redundant catch-all pattern", branch->pat->loc);
|
|
||||||
auto& branch_into = cases.make_default_case();
|
|
||||||
env_ptr new_env(new env_var(vpat->var, env));
|
|
||||||
branch->expr->compile(new_env, branch_into);
|
|
||||||
branch_into.push_back(instruction_ptr(new instruction_slide(1)));
|
|
||||||
} else {
|
|
||||||
auto repr = strategy.from_typed_pattern(branch->pat, type);
|
|
||||||
auto& branch_into = cases.make_case_for(strategy.tag_from_repr(repr));
|
|
||||||
strategy.compile_branch(branch, env, repr, branch_into);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(!(cases.defined_cases_count() == strategy.case_count(type) ||
|
|
||||||
cases.default_case_defined()))
|
|
||||||
throw type_error("incomplete patterns", node.loc);
|
|
||||||
|
|
||||||
strategy.into_instructions(type, cases, into);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
||||||
type_data* data;
|
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
|
||||||
type_internal* internal;
|
|
||||||
|
|
||||||
of->compile(env, into);
|
of->compile(env, into);
|
||||||
into.push_back(instruction_ptr(new instruction_eval()));
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|
||||||
if(app_type && (data = dynamic_cast<type_data*>(app_type->constructor.get()))) {
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
compile_case<case_strategy_data>(*this, env, data, into);
|
into.push_back(instruction_ptr(jump_instruction));
|
||||||
return;
|
for(auto& branch : branches) {
|
||||||
} else if(app_type && (internal = dynamic_cast<type_internal*>(app_type->constructor.get()))) {
|
std::vector<instruction_ptr> branch_instructions;
|
||||||
if(internal->name == "Bool") {
|
pattern_var* vpat;
|
||||||
compile_case<case_strategy_bool>(*this, env, data, into);
|
pattern_constr* cpat;
|
||||||
return;
|
|
||||||
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
break;
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
}
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
||||||
|
cpat->params.size())));
|
||||||
|
branch->expr->compile(new_env, branch_instructions);
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||||
|
cpat->params.size())));
|
||||||
|
|
||||||
|
int new_tag = type->constructors[cpat->constr].tag;
|
||||||
|
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("technically not a type error: duplicate pattern");
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[new_tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw type_error("attempting unsupported case analysis", of->loc);
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("non-total pattern");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_let::print(int indent, std::ostream& to) const {
|
void ast_let::print(int indent, std::ostream& to) const {
|
||||||
@@ -586,7 +438,7 @@ void pattern_constr::find_variables(std::set<std::string>& into) const {
|
|||||||
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||||
type_scheme_ptr constructor_type_scheme = env->lookup(constr);
|
type_scheme_ptr constructor_type_scheme = env->lookup(constr);
|
||||||
if(!constructor_type_scheme) {
|
if(!constructor_type_scheme) {
|
||||||
throw type_error(std::string("pattern using unknown constructor ") + constr, loc);
|
throw type_error("pattern using unknown constructor " + constr, loc);
|
||||||
}
|
}
|
||||||
type_ptr constructor_type = constructor_type_scheme->instantiate(mgr);
|
type_ptr constructor_type = constructor_type_scheme->instantiate(mgr);
|
||||||
|
|
||||||
|
|||||||
@@ -6,9 +6,6 @@ std::string op_name(binop op) {
|
|||||||
case MINUS: return "-";
|
case MINUS: return "-";
|
||||||
case TIMES: return "*";
|
case TIMES: return "*";
|
||||||
case DIVIDE: return "/";
|
case DIVIDE: return "/";
|
||||||
case MODULO: return "%";
|
|
||||||
case EQUALS: return "==";
|
|
||||||
case LESS_EQUALS: return "<=";
|
|
||||||
}
|
}
|
||||||
return "??";
|
return "??";
|
||||||
}
|
}
|
||||||
@@ -19,9 +16,6 @@ std::string op_action(binop op) {
|
|||||||
case MINUS: return "minus";
|
case MINUS: return "minus";
|
||||||
case TIMES: return "times";
|
case TIMES: return "times";
|
||||||
case DIVIDE: return "divide";
|
case DIVIDE: return "divide";
|
||||||
case MODULO: return "modulo";
|
|
||||||
case EQUALS: return "equals";
|
|
||||||
case LESS_EQUALS: return "less_equals";
|
|
||||||
}
|
}
|
||||||
return "??";
|
return "??";
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,14 +1,16 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
#include <array>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
enum binop {
|
enum binop {
|
||||||
PLUS,
|
PLUS,
|
||||||
MINUS,
|
MINUS,
|
||||||
TIMES,
|
TIMES,
|
||||||
DIVIDE,
|
DIVIDE
|
||||||
MODULO,
|
};
|
||||||
EQUALS,
|
|
||||||
LESS_EQUALS,
|
constexpr binop all_binops[] = {
|
||||||
|
PLUS, MINUS, TIMES, DIVIDE
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string op_name(binop op);
|
std::string op_name(binop op);
|
||||||
|
|||||||
153
code/compiler/13/compiler.cpp
Normal file
153
code/compiler/13/compiler.cpp
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
#include "compiler.hpp"
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "global_scope.hpp"
|
||||||
|
#include "parse_driver.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
|
#include "llvm/IR/Verifier.h"
|
||||||
|
#include "llvm/Support/TargetSelect.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "llvm/Support/FileSystem.h"
|
||||||
|
#include "llvm/Target/TargetOptions.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
void compiler::add_default_types() {
|
||||||
|
global_env->bind_type("Int", type_ptr(new type_base("Int")));
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::add_binop_type(binop op, type_ptr type) {
|
||||||
|
auto name = mng.new_mangled_name(op_action(op));
|
||||||
|
global_env->bind(op_name(op), std::move(type), visibility::global);
|
||||||
|
global_env->set_mangled_name(op_name(op), name);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::add_default_function_types() {
|
||||||
|
type_ptr int_type = global_env->lookup_type("Int");
|
||||||
|
assert(int_type != nullptr);
|
||||||
|
type_ptr int_type_app = type_ptr(new type_app(int_type));
|
||||||
|
|
||||||
|
type_ptr closed_int_op_type(
|
||||||
|
new type_arr(int_type_app, type_ptr(new type_arr(int_type_app, int_type_app))));
|
||||||
|
|
||||||
|
constexpr binop closed_ops[] = { PLUS, MINUS, TIMES, DIVIDE };
|
||||||
|
for(auto& op : closed_ops) add_binop_type(op, closed_int_op_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::parse() {
|
||||||
|
if(!driver())
|
||||||
|
throw compiler_error("failed to open file");
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::typecheck() {
|
||||||
|
std::set<std::string> free_variables;
|
||||||
|
global_defs.find_free(free_variables);
|
||||||
|
global_defs.typecheck(type_m, global_env);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::translate() {
|
||||||
|
for(auto& data : global_defs.defs_data) {
|
||||||
|
data.second->into_globals(global_scp);
|
||||||
|
}
|
||||||
|
for(auto& defn : global_defs.defs_defn) {
|
||||||
|
auto& function = defn.second->into_global(global_scp);
|
||||||
|
defn.second->env->set_mangled_name(defn.first, function.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::compile() {
|
||||||
|
global_scp.compile();
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::create_llvm_binop(binop op) {
|
||||||
|
auto new_function =
|
||||||
|
ctx.create_custom_function(global_env->get_mangled_name(op_name(op)), 2);
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
||||||
|
ctx.get_builder().SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.get_builder().CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::generate_llvm() {
|
||||||
|
for(auto op : all_binops) {
|
||||||
|
create_llvm_binop(op);
|
||||||
|
}
|
||||||
|
|
||||||
|
global_scp.generate_llvm(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::output_llvm(const std::string& into) {
|
||||||
|
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||||
|
|
||||||
|
llvm::InitializeNativeTarget();
|
||||||
|
llvm::InitializeNativeTargetAsmParser();
|
||||||
|
llvm::InitializeNativeTargetAsmPrinter();
|
||||||
|
|
||||||
|
std::string error;
|
||||||
|
const llvm::Target* target =
|
||||||
|
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||||
|
if (!target) {
|
||||||
|
std::cerr << error << std::endl;
|
||||||
|
} else {
|
||||||
|
std::string cpu = "generic";
|
||||||
|
std::string features = "";
|
||||||
|
llvm::TargetOptions options;
|
||||||
|
std::unique_ptr<llvm::TargetMachine> targetMachine(
|
||||||
|
target->createTargetMachine(targetTriple, cpu, features,
|
||||||
|
options, llvm::Optional<llvm::Reloc::Model>()));
|
||||||
|
|
||||||
|
ctx.get_module().setDataLayout(targetMachine->createDataLayout());
|
||||||
|
ctx.get_module().setTargetTriple(targetTriple);
|
||||||
|
|
||||||
|
std::error_code ec;
|
||||||
|
llvm::raw_fd_ostream file(into, ec, llvm::sys::fs::F_None);
|
||||||
|
if (ec) {
|
||||||
|
throw compiler_error("failed to open object file for writing");
|
||||||
|
} else {
|
||||||
|
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
||||||
|
llvm::legacy::PassManager pm;
|
||||||
|
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||||
|
throw compiler_error("failed to add passes to pass manager");
|
||||||
|
} else {
|
||||||
|
pm.run(ctx.get_module());
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
compiler::compiler(const std::string& filename)
|
||||||
|
: file_m(), global_defs(), driver(file_m, global_defs, filename),
|
||||||
|
global_env(new type_env), type_m(), mng(), global_scp(mng), ctx() {
|
||||||
|
add_default_types();
|
||||||
|
add_default_function_types();
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler::operator()(const std::string& into) {
|
||||||
|
parse();
|
||||||
|
typecheck();
|
||||||
|
translate();
|
||||||
|
compile();
|
||||||
|
generate_llvm();
|
||||||
|
output_llvm(into);
|
||||||
|
}
|
||||||
|
|
||||||
|
file_mgr& compiler::get_file_manager() {
|
||||||
|
return file_m;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_mgr& compiler::get_type_manager() {
|
||||||
|
return type_m;
|
||||||
|
}
|
||||||
37
code/compiler/13/compiler.hpp
Normal file
37
code/compiler/13/compiler.hpp
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "parse_driver.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "global_scope.hpp"
|
||||||
|
#include "mangler.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
|
||||||
|
class compiler {
|
||||||
|
private:
|
||||||
|
file_mgr file_m;
|
||||||
|
definition_group global_defs;
|
||||||
|
parse_driver driver;
|
||||||
|
type_env_ptr global_env;
|
||||||
|
type_mgr type_m;
|
||||||
|
mangler mng;
|
||||||
|
global_scope global_scp;
|
||||||
|
llvm_context ctx;
|
||||||
|
|
||||||
|
void add_default_types();
|
||||||
|
void add_binop_type(binop op, type_ptr type);
|
||||||
|
void add_default_function_types();
|
||||||
|
void parse();
|
||||||
|
void typecheck();
|
||||||
|
void translate();
|
||||||
|
void compile();
|
||||||
|
void create_llvm_binop(binop op);
|
||||||
|
void generate_llvm();
|
||||||
|
void output_llvm(const std::string& into);
|
||||||
|
public:
|
||||||
|
compiler(const std::string& filename);
|
||||||
|
void operator()(const std::string& into);
|
||||||
|
file_mgr& get_file_manager();
|
||||||
|
type_mgr& get_type_manager();
|
||||||
|
};
|
||||||
@@ -64,9 +64,9 @@ void definition_data::insert_constructors() const {
|
|||||||
type_ptr return_type(return_app);
|
type_ptr return_type(return_app);
|
||||||
for(auto& var : vars) {
|
for(auto& var : vars) {
|
||||||
if(var_set.find(var) != var_set.end())
|
if(var_set.find(var) != var_set.end())
|
||||||
throw std::runtime_error(
|
throw compiler_error(
|
||||||
std::string("type variable ") +
|
"type variable " + var +
|
||||||
var + std::string(" used twice in data type definition."));
|
" used twice in data type definition.", loc);
|
||||||
var_set.insert(var);
|
var_set.insert(var);
|
||||||
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
||||||
}
|
}
|
||||||
@@ -83,7 +83,7 @@ void definition_data::insert_constructors() const {
|
|||||||
|
|
||||||
type_scheme_ptr full_scheme(new type_scheme(std::move(full_type)));
|
type_scheme_ptr full_scheme(new type_scheme(std::move(full_type)));
|
||||||
full_scheme->forall.insert(full_scheme->forall.begin(), vars.begin(), vars.end());
|
full_scheme->forall.insert(full_scheme->forall.begin(), vars.begin(), vars.end());
|
||||||
env->bind(constructor->name, full_scheme);
|
env->bind(constructor->name, full_scheme, visibility::global);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
int env_var::get_offset(const std::string& name) const {
|
int env_var::get_offset(const std::string& name) const {
|
||||||
if(name == this->name) return 0;
|
if(name == this->name) return 0;
|
||||||
assert(parent);
|
assert(parent != nullptr);
|
||||||
return parent->get_offset(name) + 1;
|
return parent->get_offset(name) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -14,7 +14,7 @@ bool env_var::has_variable(const std::string& name) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int env_offset::get_offset(const std::string& name) const {
|
int env_offset::get_offset(const std::string& name) const {
|
||||||
assert(parent);
|
assert(parent != nullptr);
|
||||||
return parent->get_offset(name) + offset;
|
return parent->get_offset(name) + offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,33 +2,38 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
struct env {
|
class env {
|
||||||
virtual ~env() = default;
|
public:
|
||||||
|
virtual ~env() = default;
|
||||||
|
|
||||||
virtual int get_offset(const std::string& name) const = 0;
|
virtual int get_offset(const std::string& name) const = 0;
|
||||||
virtual bool has_variable(const std::string& name) const = 0;
|
virtual bool has_variable(const std::string& name) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
using env_ptr = std::shared_ptr<env>;
|
using env_ptr = std::shared_ptr<env>;
|
||||||
|
|
||||||
struct env_var : public env {
|
class env_var : public env {
|
||||||
std::string name;
|
private:
|
||||||
env_ptr parent;
|
std::string name;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
env_var(std::string n, env_ptr p)
|
public:
|
||||||
: name(std::move(n)), parent(std::move(p)) {}
|
env_var(std::string n, env_ptr p)
|
||||||
|
: name(std::move(n)), parent(std::move(p)) {}
|
||||||
|
|
||||||
int get_offset(const std::string& name) const;
|
int get_offset(const std::string& name) const;
|
||||||
bool has_variable(const std::string& name) const;
|
bool has_variable(const std::string& name) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct env_offset : public env {
|
class env_offset : public env {
|
||||||
int offset;
|
private:
|
||||||
env_ptr parent;
|
int offset;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
env_offset(int o, env_ptr p)
|
public:
|
||||||
: offset(o), parent(std::move(p)) {}
|
env_offset(int o, env_ptr p)
|
||||||
|
: offset(o), parent(std::move(p)) {}
|
||||||
|
|
||||||
int get_offset(const std::string& name) const;
|
int get_offset(const std::string& name) const;
|
||||||
bool has_variable(const std::string& name) const;
|
bool has_variable(const std::string& name) const;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,23 +1,36 @@
|
|||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* compiler_error::what() const noexcept {
|
||||||
|
return "an error occured while compiling the program";
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler_error::print_about(std::ostream& to) {
|
||||||
|
to << what() << ": ";
|
||||||
|
to << description << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler_error::print_location(std::ostream& to, file_mgr& fm, bool highlight) {
|
||||||
|
if(!loc) return;
|
||||||
|
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
||||||
|
fm.print_location(to, *loc, highlight);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compiler_error::pretty_print(std::ostream& to, file_mgr& fm) {
|
||||||
|
print_about(to);
|
||||||
|
print_location(to, fm);
|
||||||
|
}
|
||||||
|
|
||||||
const char* type_error::what() const noexcept {
|
const char* type_error::what() const noexcept {
|
||||||
return "an error occured while checking the types of the program";
|
return "an error occured while checking the types of the program";
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_error::pretty_print(std::ostream& to, parse_driver& drv) {
|
void type_error::pretty_print(std::ostream& to, file_mgr& fm) {
|
||||||
to << "encountered error while typechecking program: ";
|
print_about(to);
|
||||||
to << description << std::endl;
|
print_location(to, fm, true);
|
||||||
|
|
||||||
if(loc) {
|
|
||||||
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
|
||||||
to << std::endl << "```" << std::endl;
|
|
||||||
drv.print_highlighted_location(to, *loc);
|
|
||||||
to << "```" << std::endl << std::endl;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void unification_error::pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr) {
|
void unification_error::pretty_print(std::ostream& to, file_mgr& fm, type_mgr& mgr) {
|
||||||
type_error::pretty_print(to, drv);
|
type_error::pretty_print(to, fm);
|
||||||
to << "the expected type was:" << std::endl;
|
to << "the expected type was:" << std::endl;
|
||||||
to << " \033[34m";
|
to << " \033[34m";
|
||||||
left->print(mgr, to);
|
left->print(mgr, to);
|
||||||
|
|||||||
@@ -7,24 +7,43 @@
|
|||||||
|
|
||||||
using maybe_location = std::optional<yy::location>;
|
using maybe_location = std::optional<yy::location>;
|
||||||
|
|
||||||
struct type_error : std::exception {
|
class compiler_error : std::exception {
|
||||||
std::string description;
|
private:
|
||||||
std::optional<yy::location> loc;
|
std::string description;
|
||||||
|
maybe_location loc;
|
||||||
|
|
||||||
type_error(std::string d, maybe_location l = std::nullopt)
|
public:
|
||||||
: description(std::move(d)), loc(std::move(l)) {}
|
compiler_error(std::string d, maybe_location l = std::nullopt)
|
||||||
|
: description(std::move(d)), loc(std::move(l)) {}
|
||||||
|
|
||||||
const char* what() const noexcept override;
|
const char* what() const noexcept override;
|
||||||
void pretty_print(std::ostream& to, parse_driver& drv);
|
|
||||||
|
void print_about(std::ostream& to);
|
||||||
|
void print_location(std::ostream& to, file_mgr& fm, bool highlight = false);
|
||||||
|
|
||||||
|
void pretty_print(std::ostream& to, file_mgr& fm);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct unification_error : public type_error {
|
class type_error : compiler_error {
|
||||||
type_ptr left;
|
private:
|
||||||
type_ptr right;
|
|
||||||
|
|
||||||
unification_error(type_ptr l, type_ptr r, maybe_location loc = std::nullopt)
|
public:
|
||||||
: left(std::move(l)), right(std::move(r)),
|
type_error(std::string d, maybe_location l = std::nullopt)
|
||||||
|
: compiler_error(std::move(d), std::move(l)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
void pretty_print(std::ostream& to, file_mgr& fm);
|
||||||
|
};
|
||||||
|
|
||||||
|
class unification_error : public type_error {
|
||||||
|
private:
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
public:
|
||||||
|
unification_error(type_ptr l, type_ptr r, maybe_location loc = std::nullopt)
|
||||||
|
: left(std::move(l)), right(std::move(r)),
|
||||||
type_error("failed to unify types", std::move(loc)) {}
|
type_error("failed to unify types", std::move(loc)) {}
|
||||||
|
|
||||||
void pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr);
|
void pretty_print(std::ostream& to, file_mgr& fm, type_mgr& mgr);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -16,11 +16,11 @@ void global_function::declare_llvm(llvm_context& ctx) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void global_function::generate_llvm(llvm_context& ctx) {
|
void global_function::generate_llvm(llvm_context& ctx) {
|
||||||
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
ctx.get_builder().SetInsertPoint(&generated_function->getEntryBlock());
|
||||||
for(auto& instruction : instructions) {
|
for(auto& instruction : instructions) {
|
||||||
instruction->gen_llvm(ctx, generated_function);
|
instruction->gen_llvm(ctx, generated_function);
|
||||||
}
|
}
|
||||||
ctx.builder.CreateRetVoid();
|
ctx.get_builder().CreateRetVoid();
|
||||||
}
|
}
|
||||||
|
|
||||||
void global_constructor::generate_llvm(llvm_context& ctx) {
|
void global_constructor::generate_llvm(llvm_context& ctx) {
|
||||||
@@ -29,21 +29,30 @@ void global_constructor::generate_llvm(llvm_context& ctx) {
|
|||||||
std::vector<instruction_ptr> instructions;
|
std::vector<instruction_ptr> instructions;
|
||||||
instructions.push_back(instruction_ptr(new instruction_pack(tag, arity)));
|
instructions.push_back(instruction_ptr(new instruction_pack(tag, arity)));
|
||||||
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
ctx.get_builder().SetInsertPoint(&new_function->getEntryBlock());
|
||||||
for (auto& instruction : instructions) {
|
for (auto& instruction : instructions) {
|
||||||
instruction->gen_llvm(ctx, new_function);
|
instruction->gen_llvm(ctx, new_function);
|
||||||
}
|
}
|
||||||
ctx.builder.CreateRetVoid();
|
ctx.get_builder().CreateRetVoid();
|
||||||
}
|
}
|
||||||
|
|
||||||
global_function& global_scope::add_function(std::string n, std::vector<std::string> ps, ast_ptr b) {
|
global_function& global_scope::add_function(
|
||||||
global_function* new_function = new global_function(mangle_name(n), std::move(ps), std::move(b));
|
const std::string& n,
|
||||||
|
std::vector<std::string> ps,
|
||||||
|
ast_ptr b) {
|
||||||
|
auto name = mng->new_mangled_name(n);
|
||||||
|
global_function* new_function =
|
||||||
|
new global_function(std::move(name), std::move(ps), std::move(b));
|
||||||
functions.push_back(global_function_ptr(new_function));
|
functions.push_back(global_function_ptr(new_function));
|
||||||
return *new_function;
|
return *new_function;
|
||||||
}
|
}
|
||||||
|
|
||||||
global_constructor& global_scope::add_constructor(std::string n, int8_t t, size_t a) {
|
global_constructor& global_scope::add_constructor(
|
||||||
global_constructor* new_constructor = new global_constructor(mangle_name(n), t, a);
|
const std::string& n,
|
||||||
|
int8_t t,
|
||||||
|
size_t a) {
|
||||||
|
auto name = mng->new_mangled_name(n);
|
||||||
|
global_constructor* new_constructor = new global_constructor(name, t, a);
|
||||||
constructors.push_back(global_constructor_ptr(new_constructor));
|
constructors.push_back(global_constructor_ptr(new_constructor));
|
||||||
return *new_constructor;
|
return *new_constructor;
|
||||||
}
|
}
|
||||||
@@ -65,19 +74,3 @@ void global_scope::generate_llvm(llvm_context& ctx) {
|
|||||||
function->generate_llvm(ctx);
|
function->generate_llvm(ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string global_scope::mangle_name(const std::string& n) {
|
|
||||||
auto occurence_it = occurence_count.find(n);
|
|
||||||
int occurence = 0;
|
|
||||||
if(occurence_it != occurence_count.end()) {
|
|
||||||
occurence = occurence_it->second + 1;
|
|
||||||
}
|
|
||||||
occurence_count[n] = occurence;
|
|
||||||
|
|
||||||
std::string final_name = n;
|
|
||||||
if (occurence != 0) {
|
|
||||||
final_name += "_";
|
|
||||||
final_name += std::to_string(occurence);
|
|
||||||
}
|
|
||||||
return final_name;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <llvm/IR/Function.h>
|
#include <llvm/IR/Function.h>
|
||||||
#include "instruction.hpp"
|
#include "instruction.hpp"
|
||||||
|
#include "mangler.hpp"
|
||||||
|
|
||||||
struct ast;
|
struct ast;
|
||||||
using ast_ptr = std::unique_ptr<ast>;
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
@@ -39,17 +40,21 @@ struct global_constructor {
|
|||||||
|
|
||||||
using global_constructor_ptr = std::unique_ptr<global_constructor>;
|
using global_constructor_ptr = std::unique_ptr<global_constructor>;
|
||||||
|
|
||||||
struct global_scope {
|
class global_scope {
|
||||||
std::map<std::string, int> occurence_count;
|
|
||||||
std::vector<global_function_ptr> functions;
|
|
||||||
std::vector<global_constructor_ptr> constructors;
|
|
||||||
|
|
||||||
global_function& add_function(std::string n, std::vector<std::string> ps, ast_ptr b);
|
|
||||||
global_constructor& add_constructor(std::string n, int8_t t, size_t a);
|
|
||||||
|
|
||||||
void compile();
|
|
||||||
void generate_llvm(llvm_context& ctx);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string mangle_name(const std::string& n);
|
std::vector<global_function_ptr> functions;
|
||||||
|
std::vector<global_constructor_ptr> constructors;
|
||||||
|
mangler* mng;
|
||||||
|
|
||||||
|
public:
|
||||||
|
global_scope(mangler& m) : mng(&m) {}
|
||||||
|
|
||||||
|
global_function& add_function(
|
||||||
|
const std::string& n,
|
||||||
|
std::vector<std::string> ps,
|
||||||
|
ast_ptr b);
|
||||||
|
global_constructor& add_constructor(const std::string& n, int8_t t, size_t a);
|
||||||
|
|
||||||
|
void compile();
|
||||||
|
void generate_llvm(llvm_context& ctx);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -17,37 +17,38 @@ struct group {
|
|||||||
using group_ptr = std::unique_ptr<group>;
|
using group_ptr = std::unique_ptr<group>;
|
||||||
|
|
||||||
class function_graph {
|
class function_graph {
|
||||||
using group_id = size_t;
|
private:
|
||||||
|
using group_id = size_t;
|
||||||
|
|
||||||
struct group_data {
|
struct group_data {
|
||||||
std::set<function> functions;
|
std::set<function> functions;
|
||||||
std::set<group_id> adjacency_list;
|
std::set<group_id> adjacency_list;
|
||||||
size_t indegree;
|
size_t indegree;
|
||||||
|
|
||||||
group_data() : indegree(0) {}
|
group_data() : indegree(0) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
using data_ptr = std::shared_ptr<group_data>;
|
using data_ptr = std::shared_ptr<group_data>;
|
||||||
using edge = std::pair<function, function>;
|
using edge = std::pair<function, function>;
|
||||||
using group_edge = std::pair<group_id, group_id>;
|
using group_edge = std::pair<group_id, group_id>;
|
||||||
|
|
||||||
std::map<function, std::set<function>> adjacency_lists;
|
std::map<function, std::set<function>> adjacency_lists;
|
||||||
std::set<edge> edges;
|
std::set<edge> edges;
|
||||||
|
|
||||||
std::set<edge> compute_transitive_edges();
|
std::set<edge> compute_transitive_edges();
|
||||||
void create_groups(
|
void create_groups(
|
||||||
const std::set<edge>&,
|
const std::set<edge>&,
|
||||||
std::map<function, group_id>&,
|
std::map<function, group_id>&,
|
||||||
std::map<group_id, data_ptr>&);
|
std::map<group_id, data_ptr>&);
|
||||||
void create_edges(
|
void create_edges(
|
||||||
std::map<function, group_id>&,
|
std::map<function, group_id>&,
|
||||||
std::map<group_id, data_ptr>&);
|
std::map<group_id, data_ptr>&);
|
||||||
std::vector<group_ptr> generate_order(
|
std::vector<group_ptr> generate_order(
|
||||||
std::map<function, group_id>&,
|
std::map<function, group_id>&,
|
||||||
std::map<group_id, data_ptr>&);
|
std::map<group_id, data_ptr>&);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
std::set<function>& add_function(const function& f);
|
std::set<function>& add_function(const function& f);
|
||||||
void add_edge(const function& from, const function& to);
|
void add_edge(const function& from, const function& to);
|
||||||
std::vector<group_ptr> compute_order();
|
std::vector<group_ptr> compute_order();
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -24,9 +24,9 @@ void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
auto& global_f = ctx.custom_functions.at("f_" + name);
|
auto& global_f = ctx.get_custom_function(name);
|
||||||
auto arity = ctx.create_i32(global_f->arity);
|
auto arity = ctx.create_i32(global_f.arity);
|
||||||
ctx.create_push(f, ctx.create_global(f, global_f->function, arity));
|
ctx.create_push(f, ctx.create_global(f, global_f.function, arity));
|
||||||
}
|
}
|
||||||
|
|
||||||
void instruction_push::print(int indent, std::ostream& to) const {
|
void instruction_push::print(int indent, std::ostream& to) const {
|
||||||
@@ -101,17 +101,17 @@ void instruction_jump::print(int indent, std::ostream& to) const {
|
|||||||
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||||
auto tag = ctx.unwrap_data_tag(top_node);
|
auto tag = ctx.unwrap_data_tag(top_node);
|
||||||
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
|
auto safety_block = ctx.create_basic_block("safety", f);
|
||||||
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
|
auto switch_op = ctx.get_builder().CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||||
std::vector<BasicBlock*> blocks;
|
std::vector<BasicBlock*> blocks;
|
||||||
|
|
||||||
for(auto& branch : branches) {
|
for(auto& branch : branches) {
|
||||||
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
|
auto branch_block = ctx.create_basic_block("branch", f);
|
||||||
ctx.builder.SetInsertPoint(branch_block);
|
ctx.get_builder().SetInsertPoint(branch_block);
|
||||||
for(auto& instruction : branch) {
|
for(auto& instruction : branch) {
|
||||||
instruction->gen_llvm(ctx, f);
|
instruction->gen_llvm(ctx, f);
|
||||||
}
|
}
|
||||||
ctx.builder.CreateBr(safety_block);
|
ctx.get_builder().CreateBr(safety_block);
|
||||||
blocks.push_back(branch_block);
|
blocks.push_back(branch_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -119,46 +119,7 @@ void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
|||||||
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.builder.SetInsertPoint(safety_block);
|
ctx.get_builder().SetInsertPoint(safety_block);
|
||||||
}
|
|
||||||
|
|
||||||
void instruction_if::print(int indent, std::ostream& to) const {
|
|
||||||
print_indent(indent, to);
|
|
||||||
to << "If(" << std::endl;
|
|
||||||
for(auto& instruction : on_true) {
|
|
||||||
instruction->print(indent + 2, to);
|
|
||||||
}
|
|
||||||
to << std::endl;
|
|
||||||
for(auto& instruction : on_false) {
|
|
||||||
instruction->print(indent + 2, to);
|
|
||||||
}
|
|
||||||
print_indent(indent, to);
|
|
||||||
to << ")" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void instruction_if::gen_llvm(llvm_context& ctx, llvm::Function* f) const {
|
|
||||||
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
|
||||||
auto num = ctx.unwrap_num(top_node);
|
|
||||||
|
|
||||||
auto nonzero_block = BasicBlock::Create(ctx.ctx, "nonzero", f);
|
|
||||||
auto zero_block = BasicBlock::Create(ctx.ctx, "zero", f);
|
|
||||||
auto resume_block = BasicBlock::Create(ctx.ctx, "resume", f);
|
|
||||||
auto switch_op = ctx.builder.CreateSwitch(num, nonzero_block, 2);
|
|
||||||
|
|
||||||
switch_op->addCase(ctx.create_i32(0), zero_block);
|
|
||||||
ctx.builder.SetInsertPoint(nonzero_block);
|
|
||||||
for(auto& instruction : on_true) {
|
|
||||||
instruction->gen_llvm(ctx, f);
|
|
||||||
}
|
|
||||||
ctx.builder.CreateBr(resume_block);
|
|
||||||
|
|
||||||
ctx.builder.SetInsertPoint(zero_block);
|
|
||||||
for(auto& instruction : on_false) {
|
|
||||||
instruction->gen_llvm(ctx, f);
|
|
||||||
}
|
|
||||||
ctx.builder.CreateBr(resume_block);
|
|
||||||
|
|
||||||
ctx.builder.SetInsertPoint(resume_block);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void instruction_slide::print(int indent, std::ostream& to) const {
|
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||||
@@ -180,13 +141,10 @@ void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
|||||||
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
llvm::Value* result;
|
llvm::Value* result;
|
||||||
switch(op) {
|
switch(op) {
|
||||||
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
|
case PLUS: result = ctx.get_builder().CreateAdd(left_int, right_int); break;
|
||||||
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
|
case MINUS: result = ctx.get_builder().CreateSub(left_int, right_int); break;
|
||||||
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
|
case TIMES: result = ctx.get_builder().CreateMul(left_int, right_int); break;
|
||||||
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
|
case DIVIDE: result = ctx.get_builder().CreateSDiv(left_int, right_int); break;
|
||||||
case MODULO: result = ctx.builder.CreateSRem(left_int, right_int); break;
|
|
||||||
case EQUALS: result = ctx.builder.CreateICmpEQ(left_int, right_int); break;
|
|
||||||
case LESS_EQUALS: result = ctx.builder.CreateICmpSLE(left_int, right_int); break;
|
|
||||||
}
|
}
|
||||||
ctx.create_push(f, ctx.create_num(f, result));
|
ctx.create_push(f, ctx.create_num(f, result));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -101,19 +101,6 @@ struct instruction_jump : public instruction {
|
|||||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct instruction_if : public instruction {
|
|
||||||
std::vector<instruction_ptr> on_true;
|
|
||||||
std::vector<instruction_ptr> on_false;
|
|
||||||
|
|
||||||
instruction_if(
|
|
||||||
std::vector<instruction_ptr> t,
|
|
||||||
std::vector<instruction_ptr> f)
|
|
||||||
: on_true(std::move(t)), on_false(std::move(f)) {}
|
|
||||||
|
|
||||||
void print(int indent, std::ostream& to) const;
|
|
||||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct instruction_slide : public instruction {
|
struct instruction_slide : public instruction {
|
||||||
int offset;
|
int offset;
|
||||||
|
|
||||||
|
|||||||
@@ -163,6 +163,18 @@ void llvm_context::create_functions() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IRBuilder<>& llvm_context::get_builder() {
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
Module& llvm_context::get_module() {
|
||||||
|
return module;
|
||||||
|
}
|
||||||
|
|
||||||
|
BasicBlock* llvm_context::create_basic_block(const std::string& name, llvm::Function* f) {
|
||||||
|
return BasicBlock::Create(ctx, name, f);
|
||||||
|
}
|
||||||
|
|
||||||
ConstantInt* llvm_context::create_i8(int8_t i) {
|
ConstantInt* llvm_context::create_i8(int8_t i) {
|
||||||
return ConstantInt::get(ctx, APInt(8, i));
|
return ConstantInt::get(ctx, APInt(8, i));
|
||||||
}
|
}
|
||||||
@@ -225,7 +237,12 @@ Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Value* llvm_context::unwrap_num(Value* v) {
|
Value* llvm_context::unwrap_num(Value* v) {
|
||||||
return builder.CreatePtrToInt(v, IntegerType::getInt32Ty(ctx));
|
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, num_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(int_ptr);
|
||||||
}
|
}
|
||||||
Value* llvm_context::create_num(Function* f, Value* v) {
|
Value* llvm_context::create_num(Function* f, Value* v) {
|
||||||
auto alloc_num_f = functions.at("alloc_num");
|
auto alloc_num_f = functions.at("alloc_num");
|
||||||
@@ -254,7 +271,7 @@ Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
|
|||||||
return create_track(f, alloc_app_call);
|
return create_track(f, alloc_app_call);
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
|
llvm::Function* llvm_context::create_custom_function(const std::string& name, int32_t arity) {
|
||||||
auto void_type = llvm::Type::getVoidTy(ctx);
|
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||||
auto new_function = llvm::Function::Create(
|
auto new_function = llvm::Function::Create(
|
||||||
function_type,
|
function_type,
|
||||||
@@ -271,3 +288,7 @@ llvm::Function* llvm_context::create_custom_function(std::string name, int32_t a
|
|||||||
|
|
||||||
return new_function;
|
return new_function;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llvm_context::custom_function& llvm_context::get_custom_function(const std::string& name) {
|
||||||
|
return *custom_functions.at("f_" + name);
|
||||||
|
}
|
||||||
|
|||||||
@@ -7,66 +7,75 @@
|
|||||||
#include <llvm/IR/Value.h>
|
#include <llvm/IR/Value.h>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
struct llvm_context {
|
class llvm_context {
|
||||||
struct custom_function {
|
public:
|
||||||
llvm::Function* function;
|
struct custom_function {
|
||||||
int32_t arity;
|
llvm::Function* function;
|
||||||
};
|
int32_t arity;
|
||||||
|
};
|
||||||
|
|
||||||
using custom_function_ptr = std::unique_ptr<custom_function>;
|
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||||
|
|
||||||
llvm::LLVMContext ctx;
|
private:
|
||||||
llvm::IRBuilder<> builder;
|
llvm::LLVMContext ctx;
|
||||||
llvm::Module module;
|
llvm::IRBuilder<> builder;
|
||||||
|
llvm::Module module;
|
||||||
|
|
||||||
std::map<std::string, custom_function_ptr> custom_functions;
|
std::map<std::string, custom_function_ptr> custom_functions;
|
||||||
std::map<std::string, llvm::Function*> functions;
|
std::map<std::string, llvm::Function*> functions;
|
||||||
std::map<std::string, llvm::StructType*> struct_types;
|
std::map<std::string, llvm::StructType*> struct_types;
|
||||||
|
|
||||||
llvm::StructType* stack_type;
|
llvm::StructType* stack_type;
|
||||||
llvm::StructType* gmachine_type;
|
llvm::StructType* gmachine_type;
|
||||||
llvm::PointerType* stack_ptr_type;
|
llvm::PointerType* stack_ptr_type;
|
||||||
llvm::PointerType* gmachine_ptr_type;
|
llvm::PointerType* gmachine_ptr_type;
|
||||||
llvm::PointerType* node_ptr_type;
|
llvm::PointerType* node_ptr_type;
|
||||||
llvm::IntegerType* tag_type;
|
llvm::IntegerType* tag_type;
|
||||||
llvm::FunctionType* function_type;
|
llvm::FunctionType* function_type;
|
||||||
|
|
||||||
llvm_context()
|
void create_types();
|
||||||
: builder(ctx), module("bloglang", ctx) {
|
void create_functions();
|
||||||
create_types();
|
|
||||||
create_functions();
|
|
||||||
}
|
|
||||||
|
|
||||||
void create_types();
|
public:
|
||||||
void create_functions();
|
llvm_context()
|
||||||
|
: builder(ctx), module("bloglang", ctx) {
|
||||||
|
create_types();
|
||||||
|
create_functions();
|
||||||
|
}
|
||||||
|
|
||||||
llvm::ConstantInt* create_i8(int8_t);
|
llvm::IRBuilder<>& get_builder();
|
||||||
llvm::ConstantInt* create_i32(int32_t);
|
llvm::Module& get_module();
|
||||||
llvm::ConstantInt* create_size(size_t);
|
|
||||||
|
|
||||||
llvm::Value* create_pop(llvm::Function*);
|
llvm::BasicBlock* create_basic_block(const std::string& name, llvm::Function* f);
|
||||||
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
|
||||||
void create_push(llvm::Function*, llvm::Value*);
|
|
||||||
void create_popn(llvm::Function*, llvm::Value*);
|
|
||||||
void create_update(llvm::Function*, llvm::Value*);
|
|
||||||
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
|
||||||
void create_split(llvm::Function*, llvm::Value*);
|
|
||||||
void create_slide(llvm::Function*, llvm::Value*);
|
|
||||||
void create_alloc(llvm::Function*, llvm::Value*);
|
|
||||||
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
|
||||||
|
|
||||||
void create_unwind(llvm::Function*);
|
llvm::ConstantInt* create_i8(int8_t);
|
||||||
|
llvm::ConstantInt* create_i32(int32_t);
|
||||||
|
llvm::ConstantInt* create_size(size_t);
|
||||||
|
|
||||||
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
llvm::Value* create_pop(llvm::Function*);
|
||||||
|
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||||
|
void create_push(llvm::Function*, llvm::Value*);
|
||||||
|
void create_popn(llvm::Function*, llvm::Value*);
|
||||||
|
void create_update(llvm::Function*, llvm::Value*);
|
||||||
|
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
void create_split(llvm::Function*, llvm::Value*);
|
||||||
|
void create_slide(llvm::Function*, llvm::Value*);
|
||||||
|
void create_alloc(llvm::Function*, llvm::Value*);
|
||||||
|
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
llvm::Value* unwrap_num(llvm::Value*);
|
void create_unwind(llvm::Function*);
|
||||||
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
|
||||||
|
|
||||||
llvm::Value* unwrap_data_tag(llvm::Value*);
|
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||||
|
|
||||||
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
llvm::Value* unwrap_num(llvm::Value*);
|
||||||
|
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||||
|
|
||||||
llvm::Function* create_custom_function(std::string name, int32_t arity);
|
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Function* create_custom_function(const std::string& name, int32_t arity);
|
||||||
|
custom_function& get_custom_function(const std::string& name);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,213 +1,27 @@
|
|||||||
#include "ast.hpp"
|
#include "ast.hpp"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "binop.hpp"
|
|
||||||
#include "definition.hpp"
|
|
||||||
#include "graph.hpp"
|
|
||||||
#include "instruction.hpp"
|
|
||||||
#include "llvm_context.hpp"
|
|
||||||
#include "parser.hpp"
|
#include "parser.hpp"
|
||||||
|
#include "compiler.hpp"
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
#include "type.hpp"
|
|
||||||
#include "parse_driver.hpp"
|
|
||||||
#include "type_env.hpp"
|
|
||||||
#include "llvm/IR/LegacyPassManager.h"
|
|
||||||
#include "llvm/IR/Verifier.h"
|
|
||||||
#include "llvm/Support/TargetSelect.h"
|
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
#include "llvm/Support/FileSystem.h"
|
|
||||||
#include "llvm/Target/TargetOptions.h"
|
|
||||||
#include "llvm/Target/TargetMachine.h"
|
|
||||||
|
|
||||||
void yy::parser::error(const yy::location& loc, const std::string& msg) {
|
void yy::parser::error(const yy::location& loc, const std::string& msg) {
|
||||||
std::cout << "An error occured: " << msg << std::endl;
|
std::cerr << "An error occured: " << msg << std::endl;
|
||||||
}
|
|
||||||
|
|
||||||
void prelude_types(definition_group& defs, type_env_ptr env) {
|
|
||||||
type_ptr int_type = type_ptr(new type_internal("Int"));
|
|
||||||
env->bind_type("Int", int_type);
|
|
||||||
type_ptr int_type_app = type_ptr(new type_app(int_type));
|
|
||||||
|
|
||||||
type_ptr bool_type = type_ptr(new type_internal("Bool"));
|
|
||||||
env->bind_type("Bool", bool_type);
|
|
||||||
type_ptr bool_type_app = type_ptr(new type_app(bool_type));
|
|
||||||
|
|
||||||
type_ptr binop_type = type_ptr(new type_arr(
|
|
||||||
int_type_app,
|
|
||||||
type_ptr(new type_arr(int_type_app, int_type_app))));
|
|
||||||
type_ptr cmp_type = type_ptr(new type_arr(
|
|
||||||
int_type_app,
|
|
||||||
type_ptr(new type_arr(int_type_app, bool_type_app))));
|
|
||||||
|
|
||||||
constexpr binop number_ops[] = { PLUS, MINUS, TIMES, DIVIDE, MODULO };
|
|
||||||
constexpr binop cmp_ops[] = { EQUALS, LESS_EQUALS };
|
|
||||||
for(auto& op : number_ops) {
|
|
||||||
env->bind(op_name(op), binop_type, visibility::global);
|
|
||||||
env->set_mangled_name(op_name(op), op_action(op));
|
|
||||||
}
|
|
||||||
for(auto& op : cmp_ops) {
|
|
||||||
env->bind(op_name(op), cmp_type, visibility::global);
|
|
||||||
env->set_mangled_name(op_name(op), op_action(op));
|
|
||||||
}
|
|
||||||
|
|
||||||
env->bind("True", bool_type_app, visibility::global);
|
|
||||||
env->bind("False", bool_type_app, visibility::global);
|
|
||||||
}
|
|
||||||
|
|
||||||
void typecheck_program(
|
|
||||||
definition_group& defs,
|
|
||||||
type_mgr& mgr, type_env_ptr& env) {
|
|
||||||
prelude_types(defs, env);
|
|
||||||
|
|
||||||
std::set<std::string> free;
|
|
||||||
defs.find_free(free);
|
|
||||||
defs.typecheck(mgr, env);
|
|
||||||
|
|
||||||
#ifdef DEBUG_OUT
|
|
||||||
for(auto& pair : defs.env->names) {
|
|
||||||
std::cout << pair.first << ": ";
|
|
||||||
pair.second.type->print(mgr, std::cout);
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
global_scope translate_program(definition_group& group) {
|
|
||||||
global_scope scope;
|
|
||||||
for(auto& data : group.defs_data) {
|
|
||||||
data.second->into_globals(scope);
|
|
||||||
}
|
|
||||||
for(auto& defn : group.defs_defn) {
|
|
||||||
auto& function = defn.second->into_global(scope);
|
|
||||||
function.body->env->parent->set_mangled_name(defn.first, function.name);
|
|
||||||
}
|
|
||||||
return scope;
|
|
||||||
}
|
|
||||||
|
|
||||||
void output_llvm(llvm_context& ctx, const std::string& filename) {
|
|
||||||
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
|
||||||
|
|
||||||
llvm::InitializeNativeTarget();
|
|
||||||
llvm::InitializeNativeTargetAsmParser();
|
|
||||||
llvm::InitializeNativeTargetAsmPrinter();
|
|
||||||
|
|
||||||
std::string error;
|
|
||||||
const llvm::Target* target =
|
|
||||||
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
|
||||||
if (!target) {
|
|
||||||
std::cerr << error << std::endl;
|
|
||||||
} else {
|
|
||||||
std::string cpu = "generic";
|
|
||||||
std::string features = "";
|
|
||||||
llvm::TargetOptions options;
|
|
||||||
std::unique_ptr<llvm::TargetMachine> targetMachine(
|
|
||||||
target->createTargetMachine(targetTriple, cpu, features,
|
|
||||||
options, llvm::Optional<llvm::Reloc::Model>()));
|
|
||||||
|
|
||||||
ctx.module.setDataLayout(targetMachine->createDataLayout());
|
|
||||||
ctx.module.setTargetTriple(targetTriple);
|
|
||||||
|
|
||||||
std::error_code ec;
|
|
||||||
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
|
||||||
if (ec) {
|
|
||||||
throw std::runtime_error("failed to open object file for writing");
|
|
||||||
} else {
|
|
||||||
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
|
||||||
llvm::legacy::PassManager pm;
|
|
||||||
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
|
||||||
throw std::runtime_error("failed to add passes to pass manager");
|
|
||||||
} else {
|
|
||||||
pm.run(ctx.module);
|
|
||||||
file.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
|
||||||
auto new_function = ctx.create_custom_function(op_action(op), 2);
|
|
||||||
std::vector<instruction_ptr> instructions;
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
|
||||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
|
||||||
for(auto& instruction : instructions) {
|
|
||||||
instruction->gen_llvm(ctx, new_function);
|
|
||||||
}
|
|
||||||
ctx.builder.CreateRetVoid();
|
|
||||||
}
|
|
||||||
|
|
||||||
void gen_llvm_boolean_constructor(llvm_context& ctx, const std::string& s, bool b) {
|
|
||||||
auto new_function = ctx.create_custom_function(s, 0);
|
|
||||||
std::vector<instruction_ptr> instructions;
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_pushint(b)));
|
|
||||||
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
|
||||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
|
||||||
for(auto& instruction : instructions) {
|
|
||||||
instruction->gen_llvm(ctx, new_function);
|
|
||||||
}
|
|
||||||
ctx.builder.CreateRetVoid();
|
|
||||||
}
|
|
||||||
|
|
||||||
void gen_llvm(global_scope& scope) {
|
|
||||||
llvm_context ctx;
|
|
||||||
gen_llvm_internal_op(ctx, PLUS);
|
|
||||||
gen_llvm_internal_op(ctx, MINUS);
|
|
||||||
gen_llvm_internal_op(ctx, TIMES);
|
|
||||||
gen_llvm_internal_op(ctx, DIVIDE);
|
|
||||||
gen_llvm_internal_op(ctx, MODULO);
|
|
||||||
gen_llvm_internal_op(ctx, EQUALS);
|
|
||||||
gen_llvm_internal_op(ctx, LESS_EQUALS);
|
|
||||||
gen_llvm_boolean_constructor(ctx, "True", true);
|
|
||||||
gen_llvm_boolean_constructor(ctx, "False", false);
|
|
||||||
|
|
||||||
scope.generate_llvm(ctx);
|
|
||||||
|
|
||||||
#ifdef DEBUG_OUT
|
|
||||||
ctx.module.print(llvm::outs(), nullptr);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
output_llvm(ctx, "program.o");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
if(argc != 2) {
|
if(argc != 2) {
|
||||||
std::cerr << "please enter a file to compile." << std::endl;
|
std::cerr << "please enter a file to compile." << std::endl;
|
||||||
}
|
|
||||||
parse_driver driver(argv[1]);
|
|
||||||
if(!driver.run_parse()) {
|
|
||||||
std::cerr << "failed to open file " << argv[1] << std::endl;
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
compiler cmp(argv[1]);
|
||||||
type_mgr mgr;
|
|
||||||
type_env_ptr env(new type_env);
|
|
||||||
|
|
||||||
#ifdef DEBUG_OUT
|
|
||||||
for(auto& def_defn : driver.global_defs.defs_defn) {
|
|
||||||
std::cout << def_defn.second->name;
|
|
||||||
for(auto& param : def_defn.second->params) std::cout << " " << param;
|
|
||||||
std::cout << ":" << std::endl;
|
|
||||||
def_defn.second->body->print(1, std::cout);
|
|
||||||
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
typecheck_program(driver.global_defs, mgr, env);
|
cmp("program.o");
|
||||||
global_scope scope = translate_program(driver.global_defs);
|
|
||||||
scope.compile();
|
|
||||||
gen_llvm(scope);
|
|
||||||
} catch(unification_error& err) {
|
} catch(unification_error& err) {
|
||||||
err.pretty_print(std::cerr, driver, mgr);
|
err.pretty_print(std::cerr, cmp.get_file_manager(), cmp.get_type_manager());
|
||||||
} catch(type_error& err) {
|
} catch(type_error& err) {
|
||||||
err.pretty_print(std::cerr, driver);
|
err.pretty_print(std::cerr, cmp.get_file_manager());
|
||||||
} catch(std::runtime_error& err) {
|
} catch (compiler_error& err) {
|
||||||
std::cerr << err.what() << std::endl;
|
err.pretty_print(std::cerr, cmp.get_file_manager());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
17
code/compiler/13/mangler.cpp
Normal file
17
code/compiler/13/mangler.cpp
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
#include "mangler.hpp"
|
||||||
|
|
||||||
|
std::string mangler::new_mangled_name(const std::string& n) {
|
||||||
|
auto occurence_it = occurence_count.find(n);
|
||||||
|
int occurence = 0;
|
||||||
|
if(occurence_it != occurence_count.end()) {
|
||||||
|
occurence = occurence_it->second + 1;
|
||||||
|
}
|
||||||
|
occurence_count[n] = occurence;
|
||||||
|
|
||||||
|
std::string final_name = n;
|
||||||
|
if (occurence != 0) {
|
||||||
|
final_name += "_";
|
||||||
|
final_name += std::to_string(occurence);
|
||||||
|
}
|
||||||
|
return final_name;
|
||||||
|
}
|
||||||
11
code/compiler/13/mangler.hpp
Normal file
11
code/compiler/13/mangler.hpp
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
class mangler {
|
||||||
|
private:
|
||||||
|
std::map<std::string, int> occurence_count;
|
||||||
|
|
||||||
|
public:
|
||||||
|
std::string new_mangled_name(const std::string& str);
|
||||||
|
};
|
||||||
72
code/compiler/13/parse_driver.cpp
Normal file
72
code/compiler/13/parse_driver.cpp
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
#include "parse_driver.hpp"
|
||||||
|
#include "scanner.hpp"
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
file_mgr::file_mgr() : file_offset(0) {
|
||||||
|
line_offsets.push_back(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void file_mgr::write(const char* buf, size_t len) {
|
||||||
|
string_stream.write(buf, len);
|
||||||
|
file_offset += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
void file_mgr::mark_line() {
|
||||||
|
line_offsets.push_back(file_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void file_mgr::finalize() {
|
||||||
|
file_contents = string_stream.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t file_mgr::get_index(int line, int column) const {
|
||||||
|
assert(line > 0 && line <= line_offsets.size());
|
||||||
|
return line_offsets.at(line-1) + column - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t file_mgr::get_line_end(int line) const {
|
||||||
|
if(line == line_offsets.size()) return file_contents.size();
|
||||||
|
return get_index(line+1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void file_mgr::print_location(
|
||||||
|
std::ostream& stream,
|
||||||
|
const yy::location& loc,
|
||||||
|
bool highlight) const {
|
||||||
|
size_t print_start = get_index(loc.begin.line, 1);
|
||||||
|
size_t highlight_start = get_index(loc.begin.line, loc.begin.column);
|
||||||
|
size_t highlight_end = get_index(loc.end.line, loc.end.column);
|
||||||
|
size_t print_end = get_line_end(loc.end.line);
|
||||||
|
const char* content = file_contents.c_str();
|
||||||
|
stream.write(content + print_start, highlight_start - print_start);
|
||||||
|
if(highlight) stream << "\033[4;31m";
|
||||||
|
stream.write(content + highlight_start, highlight_end - highlight_start);
|
||||||
|
if(highlight) stream << "\033[0m";
|
||||||
|
stream.write(content + highlight_end, print_end - highlight_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool parse_driver::operator()() {
|
||||||
|
FILE* stream = fopen(file_name.c_str(), "r");
|
||||||
|
if(!stream) return false;
|
||||||
|
yyscan_t scanner;
|
||||||
|
yylex_init(&scanner);
|
||||||
|
yyset_in(stream, scanner);
|
||||||
|
yy::parser parser(scanner, *this);
|
||||||
|
parser();
|
||||||
|
yylex_destroy(scanner);
|
||||||
|
fclose(stream);
|
||||||
|
file_m->finalize();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
yy::location& parse_driver::get_current_location() {
|
||||||
|
return location;
|
||||||
|
}
|
||||||
|
|
||||||
|
file_mgr& parse_driver::get_file_manager() const {
|
||||||
|
return *file_m;
|
||||||
|
}
|
||||||
|
|
||||||
|
definition_group& parse_driver::get_global_defs() const {
|
||||||
|
return *global_defs;
|
||||||
|
}
|
||||||
@@ -11,68 +11,46 @@ struct parse_driver;
|
|||||||
void scanner_init(parse_driver* d, yyscan_t* scanner);
|
void scanner_init(parse_driver* d, yyscan_t* scanner);
|
||||||
void scanner_destroy(yyscan_t* scanner);
|
void scanner_destroy(yyscan_t* scanner);
|
||||||
|
|
||||||
struct parse_driver {
|
class file_mgr {
|
||||||
std::string file_name;
|
private:
|
||||||
std::ifstream file_stream;
|
std::ostringstream string_stream;
|
||||||
std::ostringstream string_stream;
|
std::string file_contents;
|
||||||
|
|
||||||
yy::location location;
|
size_t file_offset;
|
||||||
size_t file_offset;
|
std::vector<size_t> line_offsets;
|
||||||
|
public:
|
||||||
|
file_mgr();
|
||||||
|
|
||||||
std::vector<size_t> line_offsets;
|
void write(const char* buffer, size_t len);
|
||||||
definition_group global_defs;
|
void mark_line();
|
||||||
std::string read_file;
|
void finalize();
|
||||||
|
|
||||||
parse_driver(const std::string& file)
|
size_t get_index(int line, int column) const;
|
||||||
: file_name(file), file_offset(0) {}
|
size_t get_line_end(int line) const;
|
||||||
|
void print_location(
|
||||||
|
std::ostream& stream,
|
||||||
|
const yy::location& loc,
|
||||||
|
bool highlight = true) const;
|
||||||
|
};
|
||||||
|
|
||||||
bool run_parse() {
|
class parse_driver {
|
||||||
file_stream.open(file_name);
|
private:
|
||||||
if(!file_stream.good()) return false;
|
std::string file_name;
|
||||||
line_offsets.push_back(0);
|
yy::location location;
|
||||||
yyscan_t scanner;
|
definition_group* global_defs;
|
||||||
scanner_init(this, &scanner);
|
file_mgr* file_m;
|
||||||
yy::parser parser(scanner, *this);
|
|
||||||
parser();
|
|
||||||
scanner_destroy(&scanner);
|
|
||||||
read_file = string_stream.str();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get() {
|
public:
|
||||||
int new_char = file_stream.get();
|
parse_driver(
|
||||||
if(new_char == EOF) return EOF;
|
file_mgr& mgr,
|
||||||
file_offset++;
|
definition_group& defs,
|
||||||
if(new_char == '\n') line_offsets.push_back(file_offset);
|
const std::string& file)
|
||||||
string_stream.put(new_char);
|
: file_name(file), file_m(&mgr), global_defs(&defs) {}
|
||||||
return new_char;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t get_index(int line, int column) {
|
bool operator()();
|
||||||
assert(line > 0);
|
yy::location& get_current_location();
|
||||||
assert(line <= line_offsets.size());
|
file_mgr& get_file_manager() const;
|
||||||
size_t file_offset = line_offsets[line-1];
|
definition_group& get_global_defs() const;
|
||||||
file_offset += column - 1;
|
|
||||||
return file_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t get_line_end(int line) {
|
|
||||||
if(line > line_offsets.size()) return read_file.size();
|
|
||||||
return get_index(line+1, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_highlighted_location(std::ostream& stream, const yy::location& loc) {
|
|
||||||
size_t print_start = get_index(loc.begin.line, 1);
|
|
||||||
size_t highlight_start = get_index(loc.begin.line, loc.begin.column);
|
|
||||||
size_t highlight_end = get_index(loc.end.line, loc.end.column);
|
|
||||||
size_t print_end = get_line_end(loc.end.line);
|
|
||||||
const char* content = read_file.c_str();
|
|
||||||
stream.write(content + print_start, highlight_start - print_start);
|
|
||||||
stream << "\033[4;31m";
|
|
||||||
stream.write(content + highlight_start, highlight_end - highlight_start);
|
|
||||||
stream << "\033[0m";
|
|
||||||
stream.write(content + highlight_end, print_end - highlight_end);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, parse_driver& drv)
|
#define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, parse_driver& drv)
|
||||||
|
|||||||
@@ -9,10 +9,10 @@ type_ptr parsed_type_app::to_type(
|
|||||||
const type_env& e) const {
|
const type_env& e) const {
|
||||||
auto parent_type = e.lookup_type(name);
|
auto parent_type = e.lookup_type(name);
|
||||||
if(parent_type == nullptr)
|
if(parent_type == nullptr)
|
||||||
throw type_error(std::string("no such type or type constructor ") + name);
|
throw type_error("no such type or type constructor " + name);
|
||||||
type_base* base_type;
|
type_base* base_type;
|
||||||
if(!(base_type = dynamic_cast<type_base*>(parent_type.get())))
|
if(!(base_type = dynamic_cast<type_base*>(parent_type.get())))
|
||||||
throw type_error(std::string("invalid type ") + name);
|
throw type_error("invalid type " + name);
|
||||||
if(base_type->arity != arguments.size()) {
|
if(base_type->arity != arguments.size()) {
|
||||||
std::ostringstream error_stream;
|
std::ostringstream error_stream;
|
||||||
error_stream << "invalid application of type ";
|
error_stream << "invalid application of type ";
|
||||||
@@ -34,7 +34,7 @@ type_ptr parsed_type_var::to_type(
|
|||||||
const std::set<std::string>& vars,
|
const std::set<std::string>& vars,
|
||||||
const type_env& e) const {
|
const type_env& e) const {
|
||||||
if(vars.find(var) == vars.end())
|
if(vars.find(var) == vars.end())
|
||||||
throw type_error(std::string("the type variable ") + var + std::string(" was not explicitly declared."));
|
throw type_error("the type variable " + var + " was not explicitly declared.");
|
||||||
return type_ptr(new type_var(var));
|
return type_ptr(new type_var(var));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,14 +18,10 @@ using yyscan_t = void*;
|
|||||||
}
|
}
|
||||||
|
|
||||||
%token BACKSLASH
|
%token BACKSLASH
|
||||||
%token BACKTICK
|
|
||||||
%token PLUS
|
%token PLUS
|
||||||
%token TIMES
|
%token TIMES
|
||||||
%token MINUS
|
%token MINUS
|
||||||
%token DIVIDE
|
%token DIVIDE
|
||||||
%token MODULO
|
|
||||||
%token EQUALS
|
|
||||||
%token LESS_EQUALS
|
|
||||||
%token <int> INT
|
%token <int> INT
|
||||||
%token DEFN
|
%token DEFN
|
||||||
%token DATA
|
%token DATA
|
||||||
@@ -53,10 +49,9 @@ using yyscan_t = void*;
|
|||||||
%type <std::vector<branch_ptr>> branches
|
%type <std::vector<branch_ptr>> branches
|
||||||
%type <std::vector<constructor_ptr>> constructors
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
%type <std::vector<parsed_type_ptr>> typeList
|
%type <std::vector<parsed_type_ptr>> typeList
|
||||||
%type <binop> anyBinop
|
|
||||||
%type <definition_group> definitions
|
%type <definition_group> definitions
|
||||||
%type <parsed_type_ptr> type nonArrowType typeListElement
|
%type <parsed_type_ptr> type nonArrowType typeListElement
|
||||||
%type <ast_ptr> aInfix aEq aAdd aMul case let lambda app appBase
|
%type <ast_ptr> aAdd aMul case let lambda app appBase
|
||||||
%type <definition_data_ptr> data
|
%type <definition_data_ptr> data
|
||||||
%type <definition_defn_ptr> defn
|
%type <definition_defn_ptr> defn
|
||||||
%type <branch_ptr> branch
|
%type <branch_ptr> branch
|
||||||
@@ -68,7 +63,7 @@ using yyscan_t = void*;
|
|||||||
%%
|
%%
|
||||||
|
|
||||||
program
|
program
|
||||||
: definitions { $1.vis = visibility::global; std::swap(drv.global_defs, $1); }
|
: definitions { $1.vis = visibility::global; std::swap(drv.get_global_defs(), $1); }
|
||||||
;
|
;
|
||||||
|
|
||||||
definitions
|
definitions
|
||||||
@@ -78,7 +73,7 @@ definitions
|
|||||||
;
|
;
|
||||||
|
|
||||||
defn
|
defn
|
||||||
: DEFN LID lowercaseParams EQUAL OCURLY aInfix CCURLY
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
{ $$ = definition_defn_ptr(
|
{ $$ = definition_defn_ptr(
|
||||||
new definition_defn(std::move($2), std::move($3), std::move($6), @$)); }
|
new definition_defn(std::move($2), std::move($3), std::move($6), @$)); }
|
||||||
;
|
;
|
||||||
@@ -88,22 +83,6 @@ lowercaseParams
|
|||||||
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
;
|
;
|
||||||
|
|
||||||
aInfix
|
|
||||||
: aInfix BACKTICK LID BACKTICK aEq
|
|
||||||
{ $$ = ast_ptr(new ast_app(
|
|
||||||
ast_ptr(new ast_app(ast_ptr(new ast_lid(std::move($3))), std::move($1))), std::move($5))); }
|
|
||||||
| aInfix BACKTICK UID BACKTICK aEq
|
|
||||||
{ $$ = ast_ptr(new ast_app(
|
|
||||||
ast_ptr(new ast_app(ast_ptr(new ast_uid(std::move($3))), std::move($1))), std::move($5))); }
|
|
||||||
| aEq { $$ = std::move($1); }
|
|
||||||
;
|
|
||||||
|
|
||||||
aEq
|
|
||||||
: aAdd EQUALS aAdd { $$ = ast_ptr(new ast_binop(EQUALS, std::move($1), std::move($3), @$)); }
|
|
||||||
| aAdd LESS_EQUALS aAdd { $$ = ast_ptr(new ast_binop(LESS_EQUALS, std::move($1), std::move($3), @$)); }
|
|
||||||
| aAdd { $$ = std::move($1); }
|
|
||||||
;
|
|
||||||
|
|
||||||
aAdd
|
aAdd
|
||||||
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3), @$)); }
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3), @$)); }
|
||||||
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3), @$)); }
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3), @$)); }
|
||||||
@@ -113,7 +92,6 @@ aAdd
|
|||||||
aMul
|
aMul
|
||||||
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3), @$)); }
|
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3), @$)); }
|
||||||
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3), @$)); }
|
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3), @$)); }
|
||||||
| aMul MODULO app { $$ = ast_ptr(new ast_binop(MODULO, std::move($1), std::move($3), @$)); }
|
|
||||||
| app { $$ = std::move($1); }
|
| app { $$ = std::move($1); }
|
||||||
;
|
;
|
||||||
|
|
||||||
@@ -126,35 +104,24 @@ appBase
|
|||||||
: INT { $$ = ast_ptr(new ast_int($1, @$)); }
|
: INT { $$ = ast_ptr(new ast_int($1, @$)); }
|
||||||
| LID { $$ = ast_ptr(new ast_lid(std::move($1), @$)); }
|
| LID { $$ = ast_ptr(new ast_lid(std::move($1), @$)); }
|
||||||
| UID { $$ = ast_ptr(new ast_uid(std::move($1), @$)); }
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1), @$)); }
|
||||||
| OPAREN aInfix CPAREN { $$ = std::move($2); }
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
| OPAREN anyBinop CPAREN { $$ = ast_ptr(new ast_lid(op_name($2))); }
|
|
||||||
| case { $$ = std::move($1); }
|
| case { $$ = std::move($1); }
|
||||||
| let { $$ = std::move($1); }
|
| let { $$ = std::move($1); }
|
||||||
| lambda { $$ = std::move($1); }
|
| lambda { $$ = std::move($1); }
|
||||||
;
|
;
|
||||||
|
|
||||||
anyBinop
|
|
||||||
: PLUS { $$ = PLUS; }
|
|
||||||
| MINUS { $$ = MINUS; }
|
|
||||||
| TIMES { $$ = TIMES; }
|
|
||||||
| DIVIDE { $$ = DIVIDE; }
|
|
||||||
| MODULO { $$ = MODULO; }
|
|
||||||
| EQUALS { $$ = EQUALS; }
|
|
||||||
| LESS_EQUALS { $$ = LESS_EQUALS; }
|
|
||||||
;
|
|
||||||
|
|
||||||
let
|
let
|
||||||
: LET OCURLY definitions CCURLY IN OCURLY aInfix CCURLY
|
: LET OCURLY definitions CCURLY IN OCURLY aAdd CCURLY
|
||||||
{ $$ = ast_ptr(new ast_let(std::move($3), std::move($7), @$)); }
|
{ $$ = ast_ptr(new ast_let(std::move($3), std::move($7), @$)); }
|
||||||
;
|
;
|
||||||
|
|
||||||
lambda
|
lambda
|
||||||
: BACKSLASH lowercaseParams ARROW OCURLY aInfix CCURLY
|
: BACKSLASH lowercaseParams ARROW OCURLY aAdd CCURLY
|
||||||
{ $$ = ast_ptr(new ast_lambda(std::move($2), std::move($5), @$)); }
|
{ $$ = ast_ptr(new ast_lambda(std::move($2), std::move($5), @$)); }
|
||||||
;
|
;
|
||||||
|
|
||||||
case
|
case
|
||||||
: CASE aInfix OF OCURLY branches CCURLY
|
: CASE aAdd OF OCURLY branches CCURLY
|
||||||
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5), @$)); }
|
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5), @$)); }
|
||||||
;
|
;
|
||||||
|
|
||||||
@@ -164,7 +131,7 @@ branches
|
|||||||
;
|
;
|
||||||
|
|
||||||
branch
|
branch
|
||||||
: pattern ARROW OCURLY aInfix CCURLY
|
: pattern ARROW OCURLY aAdd CCURLY
|
||||||
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,9 @@
|
|||||||
#include <bits/stdint-intn.h>
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "runtime.h"
|
#include "runtime.h"
|
||||||
|
|
||||||
#define INT_MARKER (1l << 63)
|
|
||||||
#define IS_INT(n) ((uint64_t) n & INT_MARKER)
|
|
||||||
|
|
||||||
struct node_base* alloc_node() {
|
struct node_base* alloc_node() {
|
||||||
struct node_base* new_node = malloc(sizeof(struct node_app));
|
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||||
new_node->gc_next = NULL;
|
new_node->gc_next = NULL;
|
||||||
@@ -25,7 +21,10 @@ struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct node_num* alloc_num(int32_t n) {
|
struct node_num* alloc_num(int32_t n) {
|
||||||
return (struct node_num*) (INT_MARKER | n);
|
struct node_num* node = (struct node_num*) alloc_node();
|
||||||
|
node->base.tag = NODE_NUM;
|
||||||
|
node->value = n;
|
||||||
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
||||||
@@ -50,7 +49,7 @@ void free_node_direct(struct node_base* n) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void gc_visit_node(struct node_base* n) {
|
void gc_visit_node(struct node_base* n) {
|
||||||
if(IS_INT(n) || n->gc_reachable) return;
|
if(n->gc_reachable) return;
|
||||||
n->gc_reachable = 1;
|
n->gc_reachable = 1;
|
||||||
|
|
||||||
if(n->tag == NODE_APP) {
|
if(n->tag == NODE_APP) {
|
||||||
@@ -170,7 +169,6 @@ void gmachine_split(struct gmachine* g, size_t n) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
||||||
if(IS_INT(b)) return b;
|
|
||||||
g->gc_node_count++;
|
g->gc_node_count++;
|
||||||
b->gc_next = g->gc_nodes;
|
b->gc_next = g->gc_nodes;
|
||||||
g->gc_nodes = b;
|
g->gc_nodes = b;
|
||||||
@@ -210,9 +208,7 @@ void unwind(struct gmachine* g) {
|
|||||||
|
|
||||||
while(1) {
|
while(1) {
|
||||||
struct node_base* peek = stack_peek(s, 0);
|
struct node_base* peek = stack_peek(s, 0);
|
||||||
if(IS_INT(peek)) {
|
if(peek->tag == NODE_APP) {
|
||||||
break;
|
|
||||||
} else if(peek->tag == NODE_APP) {
|
|
||||||
struct node_app* n = (struct node_app*) peek;
|
struct node_app* n = (struct node_app*) peek;
|
||||||
stack_push(s, n->left);
|
stack_push(s, n->left);
|
||||||
} else if(peek->tag == NODE_GLOBAL) {
|
} else if(peek->tag == NODE_GLOBAL) {
|
||||||
@@ -238,9 +234,7 @@ void unwind(struct gmachine* g) {
|
|||||||
extern void f_main(struct gmachine* s);
|
extern void f_main(struct gmachine* s);
|
||||||
|
|
||||||
void print_node(struct node_base* n) {
|
void print_node(struct node_base* n) {
|
||||||
if(IS_INT(n)) {
|
if(n->tag == NODE_APP) {
|
||||||
printf("%d", (int32_t) n);
|
|
||||||
} else if(n->tag == NODE_APP) {
|
|
||||||
struct node_app* app = (struct node_app*) n;
|
struct node_app* app = (struct node_app*) n;
|
||||||
print_node(app->left);
|
print_node(app->left);
|
||||||
putchar(' ');
|
putchar(' ');
|
||||||
@@ -252,6 +246,9 @@ void print_node(struct node_base* n) {
|
|||||||
printf("(Global: %p)", global->function);
|
printf("(Global: %p)", global->function);
|
||||||
} else if(n->tag == NODE_IND) {
|
} else if(n->tag == NODE_IND) {
|
||||||
print_node(((struct node_ind*) n)->next);
|
print_node(((struct node_ind*) n)->next);
|
||||||
|
} else if(n->tag == NODE_NUM) {
|
||||||
|
struct node_num* num = (struct node_num*) n;
|
||||||
|
printf("%d", num->value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
%option noyywrap
|
%option noyywrap
|
||||||
%option reentrant
|
%option reentrant
|
||||||
|
%option header-file="scanner.hpp"
|
||||||
|
|
||||||
%{
|
%{
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@@ -8,51 +9,37 @@
|
|||||||
#include "parse_driver.hpp"
|
#include "parse_driver.hpp"
|
||||||
#include "parser.hpp"
|
#include "parser.hpp"
|
||||||
|
|
||||||
#define YY_EXTRA_TYPE parse_driver*
|
#define YY_USER_ACTION \
|
||||||
#define YY_USER_ACTION drv.location.step(); drv.location.columns(yyleng);
|
drv.get_file_manager().write(yytext, yyleng); \
|
||||||
#define YY_INPUT(buf,result,max_size) \
|
LOC.step(); LOC.columns(yyleng);
|
||||||
{ \
|
#define LOC drv.get_current_location()
|
||||||
int c = yyextra->get(); \
|
|
||||||
result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \
|
|
||||||
}
|
|
||||||
%}
|
%}
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
\n { drv.location.lines(); }
|
\n { drv.get_current_location().lines(); drv.get_file_manager().mark_line(); }
|
||||||
[ ]+ {}
|
[ ]+ {}
|
||||||
\\ { return yy::parser::make_BACKSLASH(drv.location); }
|
\\ { return yy::parser::make_BACKSLASH(LOC); }
|
||||||
\+ { return yy::parser::make_PLUS(drv.location); }
|
\+ { return yy::parser::make_PLUS(LOC); }
|
||||||
\* { return yy::parser::make_TIMES(drv.location); }
|
\* { return yy::parser::make_TIMES(LOC); }
|
||||||
- { return yy::parser::make_MINUS(drv.location); }
|
- { return yy::parser::make_MINUS(LOC); }
|
||||||
\/ { return yy::parser::make_DIVIDE(drv.location); }
|
\/ { return yy::parser::make_DIVIDE(LOC); }
|
||||||
% { return yy::parser::make_MODULO(drv.location); }
|
[0-9]+ { return yy::parser::make_INT(atoi(yytext), LOC); }
|
||||||
== { return yy::parser::make_EQUALS(drv.location); }
|
defn { return yy::parser::make_DEFN(LOC); }
|
||||||
\<= { return yy::parser::make_LESS_EQUALS(drv.location); }
|
data { return yy::parser::make_DATA(LOC); }
|
||||||
` { return yy::parser::make_BACKTICK(drv.location); }
|
case { return yy::parser::make_CASE(LOC); }
|
||||||
[0-9]+ { return yy::parser::make_INT(atoi(yytext), drv.location); }
|
of { return yy::parser::make_OF(LOC); }
|
||||||
defn { return yy::parser::make_DEFN(drv.location); }
|
let { return yy::parser::make_LET(LOC); }
|
||||||
data { return yy::parser::make_DATA(drv.location); }
|
in { return yy::parser::make_IN(LOC); }
|
||||||
case { return yy::parser::make_CASE(drv.location); }
|
\{ { return yy::parser::make_OCURLY(LOC); }
|
||||||
of { return yy::parser::make_OF(drv.location); }
|
\} { return yy::parser::make_CCURLY(LOC); }
|
||||||
let { return yy::parser::make_LET(drv.location); }
|
\( { return yy::parser::make_OPAREN(LOC); }
|
||||||
in { return yy::parser::make_IN(drv.location); }
|
\) { return yy::parser::make_CPAREN(LOC); }
|
||||||
\{ { return yy::parser::make_OCURLY(drv.location); }
|
, { return yy::parser::make_COMMA(LOC); }
|
||||||
\} { return yy::parser::make_CCURLY(drv.location); }
|
-> { return yy::parser::make_ARROW(LOC); }
|
||||||
\( { return yy::parser::make_OPAREN(drv.location); }
|
= { return yy::parser::make_EQUAL(LOC); }
|
||||||
\) { return yy::parser::make_CPAREN(drv.location); }
|
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext), LOC); }
|
||||||
, { return yy::parser::make_COMMA(drv.location); }
|
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext), LOC); }
|
||||||
-> { return yy::parser::make_ARROW(drv.location); }
|
<<EOF>> { return yy::parser::make_YYEOF(LOC); }
|
||||||
= { return yy::parser::make_EQUAL(drv.location); }
|
|
||||||
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext), drv.location); }
|
|
||||||
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext), drv.location); }
|
|
||||||
<<EOF>> { return yy::parser::make_YYEOF(drv.location); }
|
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
void scanner_init(parse_driver* d, yyscan_t* scanner) {
|
|
||||||
yylex_init_extra(d, scanner);
|
|
||||||
}
|
|
||||||
void scanner_destroy(yyscan_t* scanner) {
|
|
||||||
yylex_destroy(*scanner);
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -26,9 +26,9 @@ type_ptr type_scheme::instantiate(type_mgr& mgr) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
auto it = mgr.types.find(name);
|
auto type = mgr.lookup(name);
|
||||||
if(it != mgr.types.end()) {
|
if(type) {
|
||||||
it->second->print(mgr, to);
|
type->print(mgr, to);
|
||||||
} else {
|
} else {
|
||||||
to << name;
|
to << name;
|
||||||
}
|
}
|
||||||
@@ -38,10 +38,6 @@ void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
|||||||
to << name;
|
to << name;
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_internal::print(const type_mgr& mgr, std::ostream& to) const {
|
|
||||||
to << "!" << name;
|
|
||||||
}
|
|
||||||
|
|
||||||
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
type_var* var;
|
type_var* var;
|
||||||
bool print_parenths = dynamic_cast<type_arr*>(mgr.resolve(left, var).get()) != nullptr;
|
bool print_parenths = dynamic_cast<type_arr*>(mgr.resolve(left, var).get()) != nullptr;
|
||||||
@@ -82,6 +78,12 @@ type_ptr type_mgr::new_arrow_type() {
|
|||||||
return type_ptr(new type_arr(new_type(), new_type()));
|
return type_ptr(new type_arr(new_type(), new_type()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::lookup(const std::string& var) const {
|
||||||
|
auto types_it = types.find(var);
|
||||||
|
if(types_it != types.end()) return types_it->second;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||||
type_var* cast;
|
type_var* cast;
|
||||||
|
|
||||||
@@ -122,8 +124,7 @@ void type_mgr::unify(type_ptr l, type_ptr r, const std::optional<yy::location>&
|
|||||||
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
(rid = dynamic_cast<type_base*>(r.get()))) {
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
if(lid->name == rid->name &&
|
if(lid->name == rid->name &&
|
||||||
lid->arity == rid->arity &&
|
lid->arity == rid->arity)
|
||||||
lid->is_internal() == rid->is_internal())
|
|
||||||
return;
|
return;
|
||||||
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
||||||
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
#include <optional>
|
#include <optional>
|
||||||
#include "location.hh"
|
#include "location.hh"
|
||||||
|
|
||||||
struct type_mgr;
|
class type_mgr;
|
||||||
|
|
||||||
struct type {
|
struct type {
|
||||||
virtual ~type() = default;
|
virtual ~type() = default;
|
||||||
@@ -46,17 +46,6 @@ struct type_base : public type {
|
|||||||
: name(std::move(n)), arity(a) {}
|
: name(std::move(n)), arity(a) {}
|
||||||
|
|
||||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
|
||||||
virtual bool is_internal() const { return false; }
|
|
||||||
};
|
|
||||||
|
|
||||||
struct type_internal : public type_base {
|
|
||||||
type_internal(std::string n, int32_t a = 0)
|
|
||||||
: type_base(std::move(n), a) {}
|
|
||||||
|
|
||||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
|
||||||
|
|
||||||
bool is_internal() const { return true; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct type_data : public type_base {
|
struct type_data : public type_base {
|
||||||
@@ -90,20 +79,23 @@ struct type_app : public type {
|
|||||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct type_mgr {
|
class type_mgr {
|
||||||
int last_id = 0;
|
private:
|
||||||
std::map<std::string, type_ptr> types;
|
int last_id = 0;
|
||||||
|
std::map<std::string, type_ptr> types;
|
||||||
|
|
||||||
std::string new_type_name();
|
public:
|
||||||
type_ptr new_type();
|
std::string new_type_name();
|
||||||
type_ptr new_arrow_type();
|
type_ptr new_type();
|
||||||
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
void unify(type_ptr l, type_ptr r, const std::optional<yy::location>& loc = std::nullopt);
|
void unify(type_ptr l, type_ptr r, const std::optional<yy::location>& loc = std::nullopt);
|
||||||
type_ptr substitute(
|
type_ptr substitute(
|
||||||
const std::map<std::string, type_ptr>& subst,
|
const std::map<std::string, type_ptr>& subst,
|
||||||
const type_ptr& t) const;
|
const type_ptr& t) const;
|
||||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
type_ptr lookup(const std::string& var) const;
|
||||||
void bind(const std::string& s, type_ptr t);
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
void bind(const std::string& s, type_ptr t);
|
||||||
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;
|
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
||||||
|
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
#include "type.hpp"
|
#include "type.hpp"
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const {
|
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const {
|
||||||
if(parent != nullptr) parent->find_free(mgr, into);
|
if(parent != nullptr) parent->find_free(mgr, into);
|
||||||
@@ -34,20 +35,23 @@ bool type_env::is_global(const std::string& name) const {
|
|||||||
|
|
||||||
void type_env::set_mangled_name(const std::string& name, const std::string& mangled) {
|
void type_env::set_mangled_name(const std::string& name, const std::string& mangled) {
|
||||||
auto it = names.find(name);
|
auto it = names.find(name);
|
||||||
if(it != names.end()) {
|
|
||||||
// Local names shouldn't need mangling.
|
|
||||||
assert(it->second.vis == visibility::global);
|
|
||||||
|
|
||||||
it->second.mangled_name = mangled;
|
// Can't set mangled name for non-existent variable.
|
||||||
}
|
assert(it != names.end());
|
||||||
|
// Local names shouldn't need mangling.
|
||||||
|
assert(it->second.vis == visibility::global);
|
||||||
|
|
||||||
|
it->second.mangled_name = mangled;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string& type_env::get_mangled_name(const std::string& name) const {
|
const std::string& type_env::get_mangled_name(const std::string& name) const {
|
||||||
auto it = names.find(name);
|
auto it = names.find(name);
|
||||||
if(it != names.end())
|
if(it != names.end()) {
|
||||||
return (it->second.mangled_name != "") ? it->second.mangled_name : name;
|
assert(it->second.mangled_name);
|
||||||
if(parent) return parent->get_mangled_name(name);
|
return *it->second.mangled_name;
|
||||||
return name;
|
}
|
||||||
|
assert(parent != nullptr);
|
||||||
|
return parent->get_mangled_name(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr type_env::lookup_type(const std::string& name) const {
|
type_ptr type_env::lookup_type(const std::string& name) const {
|
||||||
@@ -59,7 +63,7 @@ type_ptr type_env::lookup_type(const std::string& name) const {
|
|||||||
|
|
||||||
void type_env::bind(const std::string& name, type_ptr t, visibility v) {
|
void type_env::bind(const std::string& name, type_ptr t, visibility v) {
|
||||||
type_scheme_ptr new_scheme(new type_scheme(std::move(t)));
|
type_scheme_ptr new_scheme(new type_scheme(std::move(t)));
|
||||||
names[name] = variable_data(std::move(new_scheme), v, "");
|
names[name] = variable_data(std::move(new_scheme), v, std::nullopt);
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_env::bind(const std::string& name, type_scheme_ptr t, visibility v) {
|
void type_env::bind(const std::string& name, type_scheme_ptr t, visibility v) {
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <optional>
|
||||||
#include "graph.hpp"
|
#include "graph.hpp"
|
||||||
#include "type.hpp"
|
#include "type.hpp"
|
||||||
|
|
||||||
@@ -10,39 +11,41 @@ using type_env_ptr = std::shared_ptr<type_env>;
|
|||||||
|
|
||||||
enum class visibility { global,local };
|
enum class visibility { global,local };
|
||||||
|
|
||||||
struct type_env {
|
class type_env {
|
||||||
struct variable_data {
|
private:
|
||||||
type_scheme_ptr type;
|
struct variable_data {
|
||||||
visibility vis;
|
type_scheme_ptr type;
|
||||||
std::string mangled_name;
|
visibility vis;
|
||||||
|
std::optional<std::string> mangled_name;
|
||||||
|
|
||||||
variable_data()
|
variable_data()
|
||||||
: variable_data(nullptr, visibility::local, "") {}
|
: variable_data(nullptr, visibility::local, std::nullopt) {}
|
||||||
variable_data(type_scheme_ptr t, visibility v, std::string n)
|
variable_data(type_scheme_ptr t, visibility v, std::optional<std::string> n)
|
||||||
: type(std::move(t)), vis(v), mangled_name(std::move(n)) {}
|
: type(std::move(t)), vis(v), mangled_name(std::move(n)) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
type_env_ptr parent;
|
type_env_ptr parent;
|
||||||
std::map<std::string, variable_data> names;
|
std::map<std::string, variable_data> names;
|
||||||
std::map<std::string, type_ptr> type_names;
|
std::map<std::string, type_ptr> type_names;
|
||||||
|
|
||||||
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
public:
|
||||||
type_env() : type_env(nullptr) {}
|
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
||||||
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
|
void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
|
||||||
void find_free_except(const type_mgr& mgr, const group& avoid,
|
void find_free_except(const type_mgr& mgr, const group& avoid,
|
||||||
std::set<std::string>& into) const;
|
std::set<std::string>& into) const;
|
||||||
type_scheme_ptr lookup(const std::string& name) const;
|
type_scheme_ptr lookup(const std::string& name) const;
|
||||||
bool is_global(const std::string& name) const;
|
bool is_global(const std::string& name) const;
|
||||||
void set_mangled_name(const std::string& name, const std::string& mangled);
|
void set_mangled_name(const std::string& name, const std::string& mangled);
|
||||||
const std::string& get_mangled_name(const std::string& name) const;
|
const std::string& get_mangled_name(const std::string& name) const;
|
||||||
type_ptr lookup_type(const std::string& name) const;
|
type_ptr lookup_type(const std::string& name) const;
|
||||||
void bind(const std::string& name, type_ptr t,
|
void bind(const std::string& name, type_ptr t,
|
||||||
visibility v = visibility::local);
|
visibility v = visibility::local);
|
||||||
void bind(const std::string& name, type_scheme_ptr t,
|
void bind(const std::string& name, type_scheme_ptr t,
|
||||||
visibility v = visibility::local);
|
visibility v = visibility::local);
|
||||||
void bind_type(const std::string& type_name, type_ptr t);
|
void bind_type(const std::string& type_name, type_ptr t);
|
||||||
void generalize(const std::string& name, const group& grp, type_mgr& mgr);
|
void generalize(const std::string& name, const group& grp, type_mgr& mgr);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
102
code/typesafe-imperative/TypesafeImp.idr
Normal file
102
code/typesafe-imperative/TypesafeImp.idr
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
data Reg = A | B | R
|
||||||
|
|
||||||
|
data Ty = IntTy | BoolTy
|
||||||
|
|
||||||
|
TypeState : Type
|
||||||
|
TypeState = (Ty, Ty, Ty)
|
||||||
|
|
||||||
|
getRegTy : Reg -> TypeState -> Ty
|
||||||
|
getRegTy A (a, _, _) = a
|
||||||
|
getRegTy B (_, b, _) = b
|
||||||
|
getRegTy R (_, _, r) = r
|
||||||
|
|
||||||
|
setRegTy : Reg -> Ty -> TypeState -> TypeState
|
||||||
|
setRegTy A a (_, b, r) = (a, b, r)
|
||||||
|
setRegTy B b (a, _, r) = (a, b, r)
|
||||||
|
setRegTy R r (a, b, _) = (a, b, r)
|
||||||
|
|
||||||
|
data Expr : TypeState -> Ty -> Type where
|
||||||
|
Lit : Int -> Expr s IntTy
|
||||||
|
Load : (r : Reg) -> Expr s (getRegTy r s)
|
||||||
|
Add : Expr s IntTy -> Expr s IntTy -> Expr s IntTy
|
||||||
|
Leq : Expr s IntTy -> Expr s IntTy -> Expr s BoolTy
|
||||||
|
Not : Expr s BoolTy -> Expr s BoolTy
|
||||||
|
|
||||||
|
mutual
|
||||||
|
data Stmt : TypeState -> TypeState -> TypeState -> Type where
|
||||||
|
Store : (r : Reg) -> Expr s t -> Stmt l s (setRegTy r t s)
|
||||||
|
If : Expr s BoolTy -> Prog l s n -> Prog l s n -> Stmt l s n
|
||||||
|
Loop : Prog s s s -> Stmt l s s
|
||||||
|
Break : Stmt s s s
|
||||||
|
|
||||||
|
data Prog : TypeState -> TypeState -> TypeState -> Type where
|
||||||
|
Nil : Prog l s s
|
||||||
|
(::) : Stmt l s n -> Prog l n m -> Prog l s m
|
||||||
|
|
||||||
|
initialState : TypeState
|
||||||
|
initialState = (IntTy, IntTy, IntTy)
|
||||||
|
|
||||||
|
testProg : Prog Main.initialState Main.initialState Main.initialState
|
||||||
|
testProg =
|
||||||
|
[ Store A (Lit 1 `Leq` Lit 2)
|
||||||
|
, If (Load A)
|
||||||
|
[ Store A (Lit 1) ]
|
||||||
|
[ Store A (Lit 2) ]
|
||||||
|
, Store B (Lit 2)
|
||||||
|
, Store R (Add (Load A) (Load B))
|
||||||
|
]
|
||||||
|
|
||||||
|
prodProg : Prog Main.initialState Main.initialState Main.initialState
|
||||||
|
prodProg =
|
||||||
|
[ Store A (Lit 7)
|
||||||
|
, Store B (Lit 9)
|
||||||
|
, Store R (Lit 0)
|
||||||
|
, Loop
|
||||||
|
[ If (Load A `Leq` Lit 0)
|
||||||
|
[ Break ]
|
||||||
|
[ Store R (Load R `Add` Load B)
|
||||||
|
, Store A (Load A `Add` Lit (-1))
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
repr : Ty -> Type
|
||||||
|
repr IntTy = Int
|
||||||
|
repr BoolTy = Bool
|
||||||
|
|
||||||
|
data State : TypeState -> Type where
|
||||||
|
MkState : (repr a, repr b, repr c) -> State (a, b, c)
|
||||||
|
|
||||||
|
getReg : (r : Reg) -> State s -> repr (getRegTy r s)
|
||||||
|
getReg A (MkState (a, _, _)) = a
|
||||||
|
getReg B (MkState (_, b, _)) = b
|
||||||
|
getReg R (MkState (_, _, r)) = r
|
||||||
|
|
||||||
|
setReg : (r : Reg) -> repr t -> State s -> State (setRegTy r t s)
|
||||||
|
setReg A a (MkState (_, b, r)) = MkState (a, b, r)
|
||||||
|
setReg B b (MkState (a, _, r)) = MkState (a, b, r)
|
||||||
|
setReg R r (MkState (a, b, _)) = MkState (a, b, r)
|
||||||
|
|
||||||
|
expr : Expr s t -> State s -> repr t
|
||||||
|
expr (Lit i) _ = i
|
||||||
|
expr (Load r) s = getReg r s
|
||||||
|
expr (Add l r) s = expr l s + expr r s
|
||||||
|
expr (Leq l r) s = expr l s <= expr r s
|
||||||
|
expr (Not e) s = not $ expr e s
|
||||||
|
|
||||||
|
mutual
|
||||||
|
stmt : Stmt l s n -> State s -> Either (State l) (State n)
|
||||||
|
stmt (Store r e) s = Right $ setReg r (expr e s) s
|
||||||
|
stmt (If c t e) s = if expr c s then prog t s else prog e s
|
||||||
|
stmt (Loop p) s =
|
||||||
|
case prog p s >>= stmt (Loop p) of
|
||||||
|
Right s => Right s
|
||||||
|
Left s => Right s
|
||||||
|
stmt Break s = Left s
|
||||||
|
|
||||||
|
prog : Prog l s n -> State s -> Either (State l) (State n)
|
||||||
|
prog Nil s = Right s
|
||||||
|
prog (st::p) s = stmt st s >>= prog p
|
||||||
|
|
||||||
|
run : Prog l s l -> State s -> State l
|
||||||
|
run p s = either id id $ prog p s
|
||||||
5
config-gen.toml
Normal file
5
config-gen.toml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
[params]
|
||||||
|
[params.submoduleLinks]
|
||||||
|
[params.submoduleLinks.aoc2020]
|
||||||
|
url = "https://dev.danilafe.com/Advent-of-Code/AdventOfCode-2020/src/commit/7a8503c3fe1aa7e624e4d8672aa9b56d24b4ba82"
|
||||||
|
path = "aoc-2020"
|
||||||
@@ -6,6 +6,15 @@ pygmentsCodeFences = true
|
|||||||
pygmentsUseClasses = true
|
pygmentsUseClasses = true
|
||||||
summaryLength = 20
|
summaryLength = 20
|
||||||
|
|
||||||
|
[outputFormats]
|
||||||
|
[outputFormats.Toml]
|
||||||
|
name = "toml"
|
||||||
|
mediaType = "application/toml"
|
||||||
|
isHTML = false
|
||||||
|
|
||||||
|
[outputs]
|
||||||
|
home = ["html","rss","toml"]
|
||||||
|
|
||||||
[markup]
|
[markup]
|
||||||
[markup.tableOfContents]
|
[markup.tableOfContents]
|
||||||
endLevel = 4
|
endLevel = 4
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
---
|
---
|
||||||
title: About
|
title: About
|
||||||
---
|
---
|
||||||
|
{{< donate_css >}}
|
||||||
|
|
||||||
I'm Daniel, a Computer Science student currently working towards my Master's Degree at Oregon State University.
|
I'm Daniel, a Computer Science student currently working towards my Master's Degree at Oregon State University.
|
||||||
Due to my initial interest in calculators and compilers, I got involved in the Programming Language Theory research
|
Due to my initial interest in calculators and compilers, I got involved in the Programming Language Theory research
|
||||||
group, gaining same experience in formal verification, domain specific language, and explainable computing.
|
group, gaining same experience in formal verification, domain specific language, and explainable computing.
|
||||||
@@ -8,3 +10,34 @@ group, gaining same experience in formal verification, domain specific language,
|
|||||||
For work, school, and hobby projects, I use a variety of programming languages, most commonly C/C++,
|
For work, school, and hobby projects, I use a variety of programming languages, most commonly C/C++,
|
||||||
Haskell, [Crystal](https://crystal-lang.org/), and [Elm](https://elm-lang.org/). I also have experience
|
Haskell, [Crystal](https://crystal-lang.org/), and [Elm](https://elm-lang.org/). I also have experience
|
||||||
with Java, Python, Haxe, and JavaScript.
|
with Java, Python, Haxe, and JavaScript.
|
||||||
|
|
||||||
|
A few notes about me or this site:
|
||||||
|
* __Correctness__: I mostly write technical content. Even though I proofread my articles, there's
|
||||||
|
always a fairly good chance that I'm wrong. You should always use your best judgement when reading
|
||||||
|
anything on this site -- if something seems wrong, it may very well be. I'm far from an expert.
|
||||||
|
* __Schedule__: I do not have a set post schedule. There are many reasons for this:
|
||||||
|
schoolwork, personal life, lack of inspiration. It also takes a _very_ long time for
|
||||||
|
me to write a single article. My article on [polymorphic type checking]({{< relref "/blog/10_compiler_polymorphism.md" >}})
|
||||||
|
is around 8,000 words long; besides writing it, I have to edit it, link up all the code
|
||||||
|
references, and proofread the final result. And of course, I need to write the code and
|
||||||
|
occasionally do some research.
|
||||||
|
* __Design__: I am doing my best to keep this website accessible and easy on the eyes.
|
||||||
|
I'm also doing my best to avoid any and all uses of JavaScript. I used to use a lot of
|
||||||
|
uMatrix, and most of the websites I browsed during this time were broken. Similarly,
|
||||||
|
a lot of websites were unusable on my weaker machines. So, I'm doing my part and
|
||||||
|
making this site usable without any JavaScript, and, as it seems to me, even
|
||||||
|
without any CSS.
|
||||||
|
* __Source code__: This blog is open source, but not on GitHub. Instead,
|
||||||
|
you can find the code on my [Gitea instance](https://dev.danilafe.com/Web-Projects/blog-static).
|
||||||
|
If you use this code for your own site, I would prefer that you don't copy the theme.
|
||||||
|
|
||||||
|
### Donate
|
||||||
|
I don't run ads, nor do I profit from writing anything on here. I have no trouble paying for hosting,
|
||||||
|
and I write my articles voluntarily, for my own enjoyment. However, if you found something particularly
|
||||||
|
helpful on here, and would like to buy me a cup of coffee or help host the site, you can donate using
|
||||||
|
the method(s) below.
|
||||||
|
|
||||||
|
{{< donation_methods >}}
|
||||||
|
{{< donation_method "Bitcoin" "1BbXPZhdzv4xHq5LYhme3xBiUsHw5fmafd" >}}
|
||||||
|
{{< donation_method "Ethereum" "0xd111E49344bEC80570e68EE0A00b87B1EFcb5D56" >}}
|
||||||
|
{{< /donation_methods >}}
|
||||||
|
|||||||
351
content/blog/00_aoc_coq.md
Normal file
351
content/blog/00_aoc_coq.md
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
---
|
||||||
|
title: "Advent of Code in Coq - Day 1"
|
||||||
|
date: 2020-12-02T18:44:56-08:00
|
||||||
|
tags: ["Advent of Code", "Coq"]
|
||||||
|
favorite: true
|
||||||
|
---
|
||||||
|
|
||||||
|
The first puzzle of this year's [Advent of Code](https://adventofcode.com) was quite
|
||||||
|
simple, which gave me a thought: "Hey, this feels within reach for me to formally verify!"
|
||||||
|
At first, I wanted to formalize and prove the correctness of the [two-pointer solution](https://www.geeksforgeeks.org/two-pointers-technique/).
|
||||||
|
However, I didn't have the time to mess around with the various properties of sorted
|
||||||
|
lists and their traversals. So, I settled for the brute force solution. Despite
|
||||||
|
the simplicity of its implementation, there is plenty to talk about when proving
|
||||||
|
its correctness using Coq. Let's get right into it!
|
||||||
|
|
||||||
|
Before we start, in the interest of keeping the post self-contained, here's the (paraphrased)
|
||||||
|
problem statement:
|
||||||
|
|
||||||
|
> Given an unsorted list of numbers, find two distinct numbers that add up to 2020.
|
||||||
|
|
||||||
|
With this in mind, we can move on to writing some Coq!
|
||||||
|
|
||||||
|
### Defining the Functions
|
||||||
|
The first step to proving our code correct is to actually write the code! To start with,
|
||||||
|
let's write a helper function that, given a number `x`, tries to find another number
|
||||||
|
`y` such that `x + y = 2020`. In fact, rather than hardcoding the desired
|
||||||
|
sum to `2020`, let's just use another argument called `total`. The code is quite simple:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 11 18 >}}
|
||||||
|
|
||||||
|
Here, `is` is the list of numbers that we want to search.
|
||||||
|
We proceed by case analysis: if the list is empty, we can't
|
||||||
|
find a match, so we return `None` (the Coq equivalent of Haskell's `Nothing`).
|
||||||
|
On the other hand, if the list has at least one element `y`, we see if it adds
|
||||||
|
up to `total`, and return `Some y` (equivalent to `Just y` in Haskell) if it does.
|
||||||
|
If it doesn't, we continue our search into the rest of the list.
|
||||||
|
|
||||||
|
It's somewhat unusual, in my experience, to put the list argument first when writing
|
||||||
|
functions in a language with [currying](https://wiki.haskell.org/Currying). However,
|
||||||
|
it seems as though Coq's `simpl` tactic, which we will use later, works better
|
||||||
|
for our purposes when the argument being case analyzed is given first.
|
||||||
|
|
||||||
|
We can now use `find_matching` to define our `find_sum` function, which solves part 1.
|
||||||
|
Here's the code:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 20 28 >}}
|
||||||
|
|
||||||
|
For every `x` that we encounter in our input list `is`, we want to check if there's
|
||||||
|
a matching number in the rest of the list. We only search the remainder of the list
|
||||||
|
because we can't use `x` twice: the `x` and `y` we return that add up to `total`
|
||||||
|
must be different elements. We use `find_matching` to try find a complementary number
|
||||||
|
for `x`. If we don't find it, this `x` isn't it, so we recursively move on to `xs`.
|
||||||
|
On the other hand, if we _do_ find a matching `y`, we're done! We return `(x,y)`,
|
||||||
|
wrapped in `Some` to indicate that we found something useful.
|
||||||
|
|
||||||
|
What about that `(* Was buggy! *)` line? Well, it so happens that my initial
|
||||||
|
implementation had a bug on this line, one that came up as I was proving
|
||||||
|
the correctness of my function. When I wasn't able to prove a particular
|
||||||
|
behavior in one of the cases, I realized something was wrong. In short,
|
||||||
|
my proof actually helped me find and fix a bug!
|
||||||
|
|
||||||
|
This is all the code we'll need to get our solution. Next, let's talk about some
|
||||||
|
properties of our two functions.
|
||||||
|
|
||||||
|
### Our First Lemma
|
||||||
|
When we call `find_matching`, we want to be sure that if we get a number,
|
||||||
|
it does indeed add up to our expected total. We can state it a little bit more
|
||||||
|
formally as follows:
|
||||||
|
|
||||||
|
> For any numbers `k` and `x`, and for any list of number `is`,
|
||||||
|
> if `find_matching is k x` returns a number `y`, then `x + y = k`.
|
||||||
|
|
||||||
|
And this is how we write it in Coq:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 30 31 >}}
|
||||||
|
|
||||||
|
The arrow, `->`, reads "implies". Other than that, I think this
|
||||||
|
property reads pretty well. The proof, unfortunately, is a little bit more involved.
|
||||||
|
Here are the first few lines:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 32 35 >}}
|
||||||
|
|
||||||
|
We start with the `intros is` tactic, which is akin to saying
|
||||||
|
"consider a particular list of integers `is`". We do this without losing
|
||||||
|
generality: by simply examining a concrete list, we've said nothing about
|
||||||
|
what that list is like. We then proceed by induction on `is`.
|
||||||
|
|
||||||
|
To prove something by induction for a list, we need to prove two things:
|
||||||
|
|
||||||
|
* The __base case__. Whatever property we want to hold, it must
|
||||||
|
hold for the empty list, which is the simplest possible list.
|
||||||
|
In our case, this means `find_matching` searching an empty list.
|
||||||
|
* The __inductive case__. Assuming that a property holds for any list
|
||||||
|
`[b, c, ...]`, we want to show that the property also holds for
|
||||||
|
the list `[a, b, c, ...]`. That is, the property must remain true if we
|
||||||
|
prepend an element to a list for which this property holds.
|
||||||
|
|
||||||
|
These two things combined give us a proof for _all_ lists, which is exactly
|
||||||
|
what we want! If you don't belive me, here's how it works. Suppose you want
|
||||||
|
to prove that some property `P` holds for `[1,2,3,4]`. Given the base
|
||||||
|
case, we know that `P []` holds. Next, by the inductive case, since
|
||||||
|
`P []` holds, we can prepend `4` to the list, and the property will
|
||||||
|
still hold. Thus, `P [4]`. Now that `P [4]` holds, we can again prepend
|
||||||
|
an element to the list, this time a `3`, and conclude that `P [3,4]`.
|
||||||
|
Repeating this twice more, we arrive at our desired fact: `P [1,2,3,4]`.
|
||||||
|
|
||||||
|
When we write `induction is`, Coq will generate two proof goals for us,
|
||||||
|
one for the base case, and one for the inductive case. We will have to prove
|
||||||
|
each of them separately. Since we have
|
||||||
|
not yet introduced the variables `k`, `x`, and `y`, they remain
|
||||||
|
inside a `forall` quantifier at that time. To be able to refer
|
||||||
|
to them, we want to use `intros`. We want to do this in both the
|
||||||
|
base and the inductive case. To quickly do this, we use Coq's `;`
|
||||||
|
operator. When we write `a; b`, Coq runs the tactic `a`, and then
|
||||||
|
runs the tactic `b` in every proof goal generated by `a`. This is
|
||||||
|
exactly what we want.
|
||||||
|
|
||||||
|
There's one more variable inside our second `intros`: `Hev`.
|
||||||
|
This variable refers to the hypothesis of our statement:
|
||||||
|
that is, the part on the left of the `->`. To prove that `A`
|
||||||
|
implies `B`, we assume that `A` holds, and try to argue `B` from there.
|
||||||
|
Here is no different: when we use `intros Hev`, we say, "suppose that you have
|
||||||
|
a proof that `find_matching` evaluates to `Some y`, called `Hev`". The thing
|
||||||
|
on the right of `->` becomes our proof goal.
|
||||||
|
|
||||||
|
Now, it's time to look at the cases. To focus on one case at a time,
|
||||||
|
we use `-`. The first case is our base case. Here's what Coq prints
|
||||||
|
out at this time:
|
||||||
|
|
||||||
|
```
|
||||||
|
k, x, y : nat
|
||||||
|
Hev : find_matching nil k x = Some y
|
||||||
|
|
||||||
|
========================= (1 / 1)
|
||||||
|
|
||||||
|
x + y = k
|
||||||
|
```
|
||||||
|
|
||||||
|
All the stuff above the `===` line are our hypotheses. We know
|
||||||
|
that we have some `k`, `x`, and `y`, all of which are numbers.
|
||||||
|
We also have the assumption that `find_matching` returns `Some y`.
|
||||||
|
In the base case, `is` is just `[]`, and this is reflected in the
|
||||||
|
type for `Hev`. To make this more clear, we'll simplify the call to `find_matching`
|
||||||
|
in `Hev`, using `simpl in Hev`. Now, here's what Coq has to say about `Hev`:
|
||||||
|
|
||||||
|
```
|
||||||
|
Hev : None = Some y
|
||||||
|
```
|
||||||
|
|
||||||
|
Well, this doesn't make any sense. How can something be equal to nothing?
|
||||||
|
We ask Coq this question using `inversion Hev`. Effectively, the question
|
||||||
|
that `inversion` asks is: what are the possible ways we could have acquired `Hev`?
|
||||||
|
Coq generates a proof goal for each of these possible ways. Alas, there are
|
||||||
|
no ways to arrive at this contradictory assumption: the number of proof sub-goals
|
||||||
|
is zero. This means we're done with the base case!
|
||||||
|
|
||||||
|
The inductive case is the meat of this proof. Here's the corresponding part
|
||||||
|
of the Coq source file:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 36 40 >}}
|
||||||
|
|
||||||
|
This time, the proof state is more complicated:
|
||||||
|
|
||||||
|
```
|
||||||
|
a : nat
|
||||||
|
is : list nat
|
||||||
|
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||||
|
k, x, y : nat
|
||||||
|
Hev : find_matching (a :: is) k x = Some y
|
||||||
|
|
||||||
|
========================= (1 / 1)
|
||||||
|
|
||||||
|
x + y = k
|
||||||
|
```
|
||||||
|
|
||||||
|
Following the footsteps of our informal description of the inductive case,
|
||||||
|
Coq has us prove our property for `(a :: is)`, or the list `is` to which
|
||||||
|
`a` is being prepended. Like before, we assume that our property holds for `is`.
|
||||||
|
This is represented in the __induction hypothesis__ `IHis`. It states that if
|
||||||
|
`find_matching` finds a `y` in `is`, it must add up to `k`. However, `IHis`
|
||||||
|
doesn't tell us anything about `a :: is`: that's our job. We also still have
|
||||||
|
`Hev`, which is our assumption that `find_matching` finds a `y` in `(a :: is)`.
|
||||||
|
Running `simpl in Hev` gives us the following:
|
||||||
|
|
||||||
|
```
|
||||||
|
Hev : (if x + a =? k then Some a else find_matching is k x) = Some y
|
||||||
|
```
|
||||||
|
|
||||||
|
The result of `find_matching` now depends on whether or not the new element `a`
|
||||||
|
adds up to `k`. If it does, then `find_matching` will return `a`, which means
|
||||||
|
that `y` is the same as `a`. If not, it must be that `find_matching` finds
|
||||||
|
the `y` in the rest of the list, `is`. We're not sure which of the possibilities
|
||||||
|
is the case. Fortunately, we don't need to be!
|
||||||
|
If we can prove that the `y` that `find_matching` finds is correct regardless
|
||||||
|
of whether `a` adds up to `k` or not, we're good to go! To do this,
|
||||||
|
we perform case analysis using `destruct`.
|
||||||
|
|
||||||
|
Our particular use of `destruct` says: check any possible value for `x + a ?= k`,
|
||||||
|
and create an equation `Heq` that tells us what that value is. `?=` returns a boolean
|
||||||
|
value, and so `destruct` generates two new goals: one where the function returns `true`,
|
||||||
|
and one where it returns `false`. We start with the former. Here's the proof state:
|
||||||
|
|
||||||
|
```
|
||||||
|
a : nat
|
||||||
|
is : list nat
|
||||||
|
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||||
|
k, x, y : nat
|
||||||
|
Heq : (x + a =? k) = true
|
||||||
|
Hev : Some a = Some y
|
||||||
|
|
||||||
|
========================= (1 / 1)
|
||||||
|
|
||||||
|
x + y = k
|
||||||
|
```
|
||||||
|
|
||||||
|
There is a new hypothesis: `Heq`. It tells us that we're currently
|
||||||
|
considering the case where `?=` evaluates to `true`. Also,
|
||||||
|
`Hev` has been considerably simplified: now that we know the condition
|
||||||
|
of the `if` expression, we can just replace it with the `then` branch.
|
||||||
|
|
||||||
|
Looking at `Hev`, we can see that our prediction was right: `a` is equal to `y`. After all,
|
||||||
|
if they weren't, `Some a` wouldn't equal to `Some y`. To make Coq
|
||||||
|
take this information into account, we use `injection`. This will create
|
||||||
|
a new hypothesis, `a = y`. But if one is equal to the other, why don't we
|
||||||
|
just use only one of these variables everywhere? We do exactly that by using
|
||||||
|
`subst`, which replaces `a` with `y` everywhere in our proof.
|
||||||
|
|
||||||
|
The proof state is now:
|
||||||
|
|
||||||
|
```
|
||||||
|
is : list nat
|
||||||
|
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||||
|
k, x, y : nat
|
||||||
|
Heq : (x + y =? k) = true
|
||||||
|
|
||||||
|
========================= (1 / 1)
|
||||||
|
|
||||||
|
x + y = k
|
||||||
|
```
|
||||||
|
|
||||||
|
We're close, but there's one more detail to keep in mind. Our goal, `x + y = k`,
|
||||||
|
is the __proposition__ that `x + y` is equal to `k`. However, `Heq` tells us
|
||||||
|
that the __function__ `?=` evaluates to `true`. These are fundamentally different.
|
||||||
|
One talks about mathematical equality, while the other about some function `?=`
|
||||||
|
defined somewhere in Coq's standard library. Who knows - maybe there's a bug in
|
||||||
|
Coq's implementation! Fortunately, Coq comes with a proof that if two numbers
|
||||||
|
are equal according to `?=`, they are mathematically equal. This proof is
|
||||||
|
called `eqb_nat_eq`. We tell Coq to use this with `apply`. Our proof goal changes to:
|
||||||
|
|
||||||
|
```
|
||||||
|
true = (x + y =? k)
|
||||||
|
```
|
||||||
|
|
||||||
|
This is _almost_ like `Heq`, but flipped. Instead of manually flipping it and using `apply`
|
||||||
|
with `Heq`, I let Coq do the rest of the work using `auto`.
|
||||||
|
|
||||||
|
Phew! All this for the `true` case of `?=`. Next, what happens if `x + a` does not equal `k`?
|
||||||
|
Here's the proof state at this time:
|
||||||
|
|
||||||
|
```
|
||||||
|
a : nat
|
||||||
|
is : list nat
|
||||||
|
IHis : forall k x y : nat, find_matching is k x = Some y -> x + y = k
|
||||||
|
k, x, y : nat
|
||||||
|
Heq : (x + a =? k) = false
|
||||||
|
Hev : find_matching is k x = Some y
|
||||||
|
|
||||||
|
========================= (1 / 1)
|
||||||
|
|
||||||
|
x + y = k
|
||||||
|
```
|
||||||
|
|
||||||
|
Since `a` was not what it was looking for, `find_matching` moved on to `is`. But hey,
|
||||||
|
we're in the inductive case! We are assuming that `find_matching` will work properly
|
||||||
|
with the list `is`. Since `find_matching` found its `y` in `is`, this should be all we need!
|
||||||
|
We use our induction hypothesis `IHis` with `apply`. `IHis` itself does not know that
|
||||||
|
`find_matching` moved on to `is`, so it asks us to prove it. Fortunately, `Hev` tells us
|
||||||
|
exactly that, so we use `assumption`, and the proof is complete! Quod erat demonstrandum, QED!
|
||||||
|
|
||||||
|
### The Rest of the Owl
|
||||||
|
Here are a couple of other properties of `find_matching`. For brevity's sake, I will
|
||||||
|
not go through their proofs step-by-step. I find that the best way to understand
|
||||||
|
Coq proofs is to actually step through them in the IDE!
|
||||||
|
|
||||||
|
First on the list is `find_matching_skip`. Here's the type:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 42 43 >}}
|
||||||
|
|
||||||
|
It reads: if we correctly find a number in a small list `is`, we can find that same number
|
||||||
|
even if another number is prepended to `is`. That makes sense: _adding_ a number to
|
||||||
|
a list doesn't remove whatever we found in it! I used this lemma to prove another,
|
||||||
|
`find_matching_works`:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 53 54 >}}
|
||||||
|
|
||||||
|
This reads, if there _is_ an element `y` in `is` that adds up to `k` with `x`, then
|
||||||
|
`find_matching` will find it. This is an important property. After all, if it didn't
|
||||||
|
hold, it would mean that `find_matching` would occasionally fail to find a matching
|
||||||
|
number, even though it's there! We can't have that.
|
||||||
|
|
||||||
|
Finally, we want to specify what it means for `find_sum`, our solution function, to actually
|
||||||
|
work. The naive definition would be:
|
||||||
|
|
||||||
|
> Given a list of integers, `find_sum` always finds a pair of numbers that add up to `k`.
|
||||||
|
|
||||||
|
Unfortunately, this is not true. What if, for instance, we give `find_sum` an empty list?
|
||||||
|
There are no numbers from that list to find and add together. Even a non-empty list
|
||||||
|
may not include such a pair! We need a way to characterize valid input lists. I claim
|
||||||
|
that all lists from this Advent of Code puzzle are guaranteed to have two numbers that
|
||||||
|
add up to our goal, and that these numbers are not equal to each other. In Coq,
|
||||||
|
we state this as follows:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 8 9 >}}
|
||||||
|
|
||||||
|
This defines a new property, `has_pair t is` (read "`is` has a pair of numbers that add to `t`"),
|
||||||
|
which means:
|
||||||
|
|
||||||
|
> There are two numbers `n1` and `n2` such that, they are not equal to each other (`n1<>n2`) __and__
|
||||||
|
> the number `n1` is an element of `is` (`In n1 is`) __and__
|
||||||
|
> the number `n2` is an element of `is` (`In n2 is`) __and__
|
||||||
|
> the two numbers add up to `t` (`n1 + n2 = t`).
|
||||||
|
|
||||||
|
When making claims about the correctness of our algorithm, we will assume that this
|
||||||
|
property holds. Finally, here's the theorem we want to prove:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 68 70 >}}
|
||||||
|
|
||||||
|
It reads, "for any total `k` and list `is`, if `is` has a pair of numbers that add to `k`,
|
||||||
|
then `find_sum` will return a pair of numbers `x` and `y` that add to `k`".
|
||||||
|
There's some nuance here. We hardly reference the `has_pair` property in this definition,
|
||||||
|
and for good reason. Our `has_pair` hypothesis only says that there is _at least one_
|
||||||
|
pair of numbers in `is` that meets our criteria. However, this pair need not be the only
|
||||||
|
one, nor does it need to be the one returned by `find_sum`! However, if we have many pairs,
|
||||||
|
we want to confirm that `find_sum` will find one of them. Finally, here is the proof.
|
||||||
|
I will not be able to go through it in detail in this post, but I did comment it to
|
||||||
|
make it easier to read:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day1.v" 71 106 >}}
|
||||||
|
|
||||||
|
Coq seems happy with it, and so am I! The bug I mentioned earlier popped up on line 96.
|
||||||
|
I had accidentally made `find_sum` return `None` if it couldn't find a complement
|
||||||
|
for the `x` it encountered. This meant that it never recursed into the remaining
|
||||||
|
list `xs`, and thus, the pair was never found at all! It this became impossible
|
||||||
|
to prove that `find_some` will return `Some y`, and I had to double back
|
||||||
|
and check my definitions.
|
||||||
|
|
||||||
|
I hope you enjoyed this post! If you're interested to learn more about Coq, I strongly recommend
|
||||||
|
checking out [Software Foundations](https://softwarefoundations.cis.upenn.edu/), a series
|
||||||
|
of books on Coq written as comments in a Coq source file! In particular, check out
|
||||||
|
[Logical Foundations](https://softwarefoundations.cis.upenn.edu/lf-current/index.html)
|
||||||
|
for an introduction to using Coq. Thanks for reading!
|
||||||
@@ -145,4 +145,5 @@ Here are the posts that I've written so far for this series:
|
|||||||
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
|
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
|
||||||
* [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}})
|
* [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}})
|
||||||
* [Let/In and Lambdas]({{< relref "12_compiler_let_in_lambda/index.md" >}})
|
* [Let/In and Lambdas]({{< relref "12_compiler_let_in_lambda/index.md" >}})
|
||||||
|
* [Cleanup]({{< relref "13_compiler_cleanup/index.md" >}})
|
||||||
|
|
||||||
|
|||||||
940
content/blog/01_aoc_coq.md
Normal file
940
content/blog/01_aoc_coq.md
Normal file
@@ -0,0 +1,940 @@
|
|||||||
|
---
|
||||||
|
title: "Advent of Code in Coq - Day 8"
|
||||||
|
date: 2021-01-10T22:48:39-08:00
|
||||||
|
tags: ["Advent of Code", "Coq"]
|
||||||
|
---
|
||||||
|
|
||||||
|
Huh? We're on day 8? What happened to days 2 through 7?
|
||||||
|
|
||||||
|
Well, for the most part, I didn't think they were that interesting from the Coq point of view.
|
||||||
|
Day 7 got close, but not close enough to inspire me to create a formalization. Day 8, on the other
|
||||||
|
hand, is
|
||||||
|
{{< sidenote "right" "pl-note" "quite interesting," >}}
|
||||||
|
Especially to someone like me who's interested in programming languages!
|
||||||
|
{{< /sidenote >}} and took quite some time to formalize.
|
||||||
|
|
||||||
|
As before, here's an (abridged) description of the problem:
|
||||||
|
|
||||||
|
> Given a tiny assembly-like language, determine the state of its accumulator
|
||||||
|
> when the same instruction is executed twice.
|
||||||
|
|
||||||
|
Before we start on the Coq formalization, let's talk about an idea from
|
||||||
|
Programming Language Theory (PLT), _big step operational semantics_.
|
||||||
|
|
||||||
|
### Big Step Operational Semantics
|
||||||
|
What we have in Advent of Code's Day 8 is, undeniably, a small programming language.
|
||||||
|
We are tasked with executing this language, or, in PLT lingo, defining its _semantics_.
|
||||||
|
There are many ways of doing this - at university, I've been taught of [denotational](https://en.wikipedia.org/wiki/Denotational_semantics), [axiomatic](https://en.wikipedia.org/wiki/Axiomatic_semantics),
|
||||||
|
and [operational](https://en.wikipedia.org/wiki/Operational_semantics) semantics.
|
||||||
|
I believe that Coq's mechanism of inductive definitions lends itself very well
|
||||||
|
to operational semantics, so we'll take that route. But even "operational semantics"
|
||||||
|
doesn't refer to a concrete technique - we have a choice between small-step (structural) and
|
||||||
|
big-step (natural) operational semantics. The former describe the minimal "steps" a program
|
||||||
|
takes as it's being evaluated, while the latter define the final results of evaluating a program.
|
||||||
|
I decided to go with big-step operational semantics, since they're more intutive (natural!).
|
||||||
|
|
||||||
|
So, how does one go about "[defining] the final results of evaluating a program?" Most commonly,
|
||||||
|
we go about using _inference rules_. Let's talk about those next.
|
||||||
|
|
||||||
|
#### Inference Rules
|
||||||
|
Inference rules are a very general notion. The describe how we can determine (infer) a conclusion
|
||||||
|
from a set of assumptions. It helps to look at an example. Here's a silly little inference rule:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac
|
||||||
|
{\text{I'm allergic to cats} \quad \text{My friend has a cat}}
|
||||||
|
{\text{I will not visit my friend very much}}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
It reads, "if I'm allergic to cats, and if my friend has a cat, then I will not visit my friend very much".
|
||||||
|
Here, "I'm allergic to cats" and "my friend has a cat" are _premises_, and "I will not visit my friend very much" is
|
||||||
|
a _conclusion_. An inference rule states that if all its premises are true, then its conclusion must be true.
|
||||||
|
Here's another inference rule, this time with some mathematical notation instead of words:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac
|
||||||
|
{n < m}
|
||||||
|
{n + 1 < m + 1}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
This one reads, "if \\(n\\) is less than \\(m\\), then \\(n+1\\) is less than \\(m+1\\)". We can use inference
|
||||||
|
rules to define various constructs. As an example, let's define what it means for a natural number to be even.
|
||||||
|
It takes two rules:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac
|
||||||
|
{}
|
||||||
|
{0 \; \text{is even}}
|
||||||
|
\quad
|
||||||
|
\frac
|
||||||
|
{n \; \text{is even}}
|
||||||
|
{n+2 \; \text{is even}}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
First of all, zero is even. We take this as fact - there are no premises for the first rule, so they
|
||||||
|
are all trivially true. Next, if a number is even, then adding 2 to that number results in another
|
||||||
|
even number. Using the two of these rules together, we can correctly determine whether any number
|
||||||
|
is or isn't even. We start knowing that 0 is even. Adding 2 we learn that 2 is even, and adding 2
|
||||||
|
again we see that 4 is even, as well. We can continue this to determine that 6, 8, 10, and so on
|
||||||
|
are even too. Never in this process will we visit the numbers 1 or 3 or 5, and that's good - they're not even!
|
||||||
|
|
||||||
|
Let's now extend this notion to programming languages, starting with a simple arithmetic language.
|
||||||
|
This language is made up of natural numbers and the \\(\square\\) operation, which represents the addition
|
||||||
|
of two numbers. Again, we need two rules:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac
|
||||||
|
{n \in \mathbb{N}}
|
||||||
|
{n \; \text{evaluates to} \; n}
|
||||||
|
\quad
|
||||||
|
\frac
|
||||||
|
{e_1 \; \text{evaluates to} \; n_1 \quad e_2 \; \text{evaluates to} \; n_2}
|
||||||
|
{e_1 \square e_2 \; \text{evaluates to} \; n_1 + n_2}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
First, let me explain myself. I used \\(\square\\) to demonstrate two important points. First, languages can be made of
|
||||||
|
any kind of characters we want; it's the rules that we define that give these languages meaning.
|
||||||
|
Second, while \\(\square\\) is the addition operation _in our language_, \\(+\\) is the _mathematical addition operator_.
|
||||||
|
They are not the same - we use the latter to define how the former works.
|
||||||
|
|
||||||
|
Finally, writing "evaluates to" gets quite tedious, especially for complex languages. Instead,
|
||||||
|
PLT people use notation to make their semantics more concise. The symbol \\(\Downarrow\\) is commonly
|
||||||
|
used to mean "evaluates to"; thus, \\(e \Downarrow v\\) reads "the expression \\(e\\) evaluates to the value \\(v\\).
|
||||||
|
Using this notation, our rules start to look like the following:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac
|
||||||
|
{n \in \mathbb{N}}
|
||||||
|
{n \Downarrow n}
|
||||||
|
\quad
|
||||||
|
\frac
|
||||||
|
{e_1 \Downarrow n_1 \quad e_2 \Downarrow n_2}
|
||||||
|
{e_1 \square e_2 \Downarrow n_1 + n_2}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
If nothing else, these are way more compact! Though these may look intimidating at first, it helps to
|
||||||
|
simply read each symbol as its English meaning.
|
||||||
|
|
||||||
|
#### Encoding Inference Rules in Coq
|
||||||
|
Now that we've seen what inference rules are, we can take a look at how they can be represented in Coq.
|
||||||
|
We can use Coq's `Inductive` mechanism to define the rules. Let's start with our "is even" property.
|
||||||
|
|
||||||
|
```Coq
|
||||||
|
Inductive is_even : nat -> Prop :=
|
||||||
|
| zero_even : is_even 0
|
||||||
|
| plustwo_even : is_even n -> is_even (n+2).
|
||||||
|
```
|
||||||
|
|
||||||
|
The first line declares the property `is_even`, which, given a natural number, returns proposition.
|
||||||
|
This means that `is_even` is not a proposition itself, but `is_even 0`, `is_even 1`, and `is_even 2`
|
||||||
|
are all propositions.
|
||||||
|
|
||||||
|
The following two lines each encode one of our aforementioned inference rules. The first rule, `zero_even`,
|
||||||
|
is of type `is_even 0`. The `zero_even` rule doesn't require any arguments, and we can use it to create
|
||||||
|
a proof that 0 is even. On the other hand, the `plustwo_even` rule _does_ require an argument, `is_even n`.
|
||||||
|
To construct a proof that a number `n+2` is even using `plustwo_even`, we need to provide a proof
|
||||||
|
that `n` itself is even. From this definition we can see a general principle: we encode each inference
|
||||||
|
rule as constructor of an inductive Coq type. Each rule encoded in this manner takes as arguments
|
||||||
|
the proofs of its premises, and returns a proof of its conclusion.
|
||||||
|
|
||||||
|
For another example, let's encode our simple addition language. First, we have to define the language
|
||||||
|
itself:
|
||||||
|
|
||||||
|
```Coq
|
||||||
|
Inductive tinylang : Type :=
|
||||||
|
| number (n : nat) : tinylang
|
||||||
|
| box (e1 e2 : tinylang) : tinylang.
|
||||||
|
```
|
||||||
|
|
||||||
|
This defines the two elements of our example language: `number n` corresponds to \\(n\\), and `box e1 e2` corresponds
|
||||||
|
to \\(e_1 \square e_2\\). Finally, we define the inference rules:
|
||||||
|
|
||||||
|
```Coq {linenos=true}
|
||||||
|
Inductive tinylang_sem : tinylang -> nat -> Prop :=
|
||||||
|
| number_sem : forall (n : nat), tinylang_sem (number n) n
|
||||||
|
| box_sem : forall (e1 e2 : tinylang) (n1 n2 : nat),
|
||||||
|
tinylang_sem e1 n1 -> tinylang_sem e2 n2 ->
|
||||||
|
tinylang_sem (box e1 e2) (n1 + n2).
|
||||||
|
```
|
||||||
|
|
||||||
|
When we wrote our rules earlier, by using arbitrary variables like \\(e_1\\) and \\(n_1\\), we implicitly meant
|
||||||
|
that our rules work for _any_ number or expression. When writing Coq we have to make this assumption explicit
|
||||||
|
by using `forall`. For instance, the rule on line 2 reads, "for any number `n`, the expression `n` evaluates to `n`".
|
||||||
|
|
||||||
|
#### Semantics of Our Language
|
||||||
|
|
||||||
|
We've now written some example big-step operational semantics, both "on paper" and in Coq. Now, it's time to take a look at
|
||||||
|
the specific semantics of the language from Day 8! Our language consists of a few parts.
|
||||||
|
|
||||||
|
First, there are three opcodes: \\(\texttt{jmp}\\), \\(\\texttt{nop}\\), and \\(\\texttt{add}\\). Opcodes, combined
|
||||||
|
with an integer, make up an instruction. For example, the instruction \\(\\texttt{add} \\; 3\\) will increase the
|
||||||
|
content of the accumulator by three. Finally, a program consists of a sequence of instructions; They're separated
|
||||||
|
by newlines in the puzzle input, but we'll instead separate them by semicolons. For example, here's a complete program.
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\texttt{add} \; 0; \; \texttt{nop} \; 2; \; \texttt{jmp} \; -2
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
Now, let's try evaluating this program. Starting at the beginning and with 0 in the accumulator,
|
||||||
|
it will add 0 to the accumulator (keeping it the same),
|
||||||
|
do nothing, and finally jump back to the beginning. At this point, it will try to run the addition instruction again,
|
||||||
|
which is not allowed; thus, the program will terminate.
|
||||||
|
|
||||||
|
Did you catch that? The semantics of this language will require more information than just our program itself (which we'll denote by \\(p\\)).
|
||||||
|
* First, to evaluate the program we will need a program counter, \\(\\textit{c}\\). This program counter
|
||||||
|
will tell us the position of the instruction to be executed next. It can also point past the last instruction,
|
||||||
|
which means our program terminated successfully.
|
||||||
|
* Next, we'll need the accumulator \\(a\\). Addition instructions can change the accumulator, and we will be interested
|
||||||
|
in the number that ends up in the accumulator when our program finishes executing.
|
||||||
|
* Finally, and more subtly, we'll need to keep track of the states we visited. For instance,
|
||||||
|
in the course of evaluating our program above, we encounter the \\((c, a)\\) pair of \\((0, 0)\\) twice: once
|
||||||
|
at the beginning, and once at the end. However, whereas at the beginning we have not yet encountered the addition
|
||||||
|
instruction, at the end we have, so the evaluation behaves differently. To make the proofs work better in Coq,
|
||||||
|
we'll use a set \\(v\\) of
|
||||||
|
{{< sidenote "right" "allowed-note" "allowed (valid) program counters (as opposed to visited program counters)." >}}
|
||||||
|
Whereas the set of "visited" program counters keeps growing as our evaluation continues,
|
||||||
|
the set of "allowed" program counters keeps shrinking. Because the "allowed" set never stops shrinking,
|
||||||
|
assuming we're starting with a finite set, our execution will eventually terminate.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
Now we have all the elements of our evaluation. Let's define some notation. A program starts at some state,
|
||||||
|
and terminates in another, possibly different state. In the course of a regular evaluation, the program
|
||||||
|
never changes; only the state does. So I propose this (rather unorthodox) notation:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
(c, a, v) \Rightarrow_p (c', a', v')
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
This reads, "after starting at program counter \\(c\\), accumulator \\(a\\), and set of valid addresses \\(v\\),
|
||||||
|
the program \\(p\\) terminates with program counter \\(c'\\), accumulator \\(a'\\), and set of valid addresses \\(v'\\)".
|
||||||
|
Before creating the inference rules for this evaluation relation, let's define the effect of evaluating a single
|
||||||
|
instruction, using notation \\((c, a) \rightarrow_i (c', a')\\). An addition instruction changes the accumulator,
|
||||||
|
and increases the program counter by 1.
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac{}
|
||||||
|
{(c, a) \rightarrow_{\texttt{add} \; n} (c+1, a+n)}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
A no-op instruction does even less. All it does is increment the program counter.
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac{}
|
||||||
|
{(c, a) \rightarrow_{\texttt{nop} \; n} (c+1, a)}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
Finally, a jump instruction leaves the accumulator intact, but adds a number to the program counter itself!
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac{}
|
||||||
|
{(c, a) \rightarrow_{\texttt{jmp} \; n} (c+n, a)}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
None of these rules have any premises, and they really are quite simple. Now, let's define the rules
|
||||||
|
for evaluating a program. First of all, a program starting in a state that is not considered "valid"
|
||||||
|
is done evaluating, and is in a "failed" state.
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac{c \not \in v \quad c \not= \text{length}(p)}
|
||||||
|
{(c, a, v) \Rightarrow_{p} (c, a, v)}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
We use \\(\\text{length}(p)\\) to represent the number of instructions in \\(p\\). Note the second premise:
|
||||||
|
even if our program counter \\(c\\) is not included in the valid set, if it's "past the end of the program",
|
||||||
|
the program terminates in an "ok" state.
|
||||||
|
{{< sidenote "left" "avoid-c-note" "Here's a rule for terminating in the \"ok\" state:" >}}
|
||||||
|
In the presented rule, we don't use the variable <code>c</code> all that much, and we know its concrete
|
||||||
|
value (from the equality premise). We could thus avoid introducing the name \(c\) by
|
||||||
|
replacing it with said known value:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac{}
|
||||||
|
{(\text{length}(p), a, v) \Rightarrow_{p} (\text{length}(p), a, v)}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
This introduces some duplication, but that is really because all "base case" evaluation rules
|
||||||
|
start and stop in the same state. To work around this, we could define a separate proposition
|
||||||
|
to mean "program \(p\) is done in state \(s\)", then \(s\) will really only need to occur once,
|
||||||
|
and so will \(\text{length}(p)\). This is, in fact, what we will do later on,
|
||||||
|
since being able to talk abut "programs being done" will help us with
|
||||||
|
components of our proof.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac{c = \text{length}(p)}
|
||||||
|
{(c, a, v) \Rightarrow_{p} (c, a, v)}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
When our program counter reaches the end of the program, we are also done evaluating it. Even though
|
||||||
|
both rules {{< sidenote "right" "redundant-note" "lead to the same conclusion," >}}
|
||||||
|
In fact, if the end of the program is never included in the valid set, the second rule is completely redundant.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
it helps to distinguish the two possible outcomes. Finally, if neither of the termination conditions are met,
|
||||||
|
our program can take a step, and continue evaluating from there.
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac{c \in v \quad p[c] = i \quad (c, a) \rightarrow_i (c', a') \quad (c', a', v - \{c\}) \Rightarrow_p (c'', a'', v'')}
|
||||||
|
{(c, a, v) \Rightarrow_{p} (c'', a'', v'')}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
This is quite a rule. A lot of things need to work out for a program to evauate from a state that isn't
|
||||||
|
currently the final state:
|
||||||
|
|
||||||
|
* The current program counter \\(c\\) must be valid. That is, it must be an element of \\(v\\).
|
||||||
|
* This program counter must correspond to an instruction \\(i\\) in \\(p\\), which we write as \\(p[c] = i\\).
|
||||||
|
* This instruction must be executed, changing our program counter from \\(c\\) to \\(c'\\) and our
|
||||||
|
accumulator from \\(a\\) to \\(a'\\). The set of valid instructions will no longer include \\(c\\),
|
||||||
|
and will become \\(v - \\{c\\}\\).
|
||||||
|
* Our program must then finish executing, starting at state
|
||||||
|
\\((c', a', v - \\{c\\})\\), and ending in some (unknown) state \\((c'', a'', v'')\\).
|
||||||
|
|
||||||
|
If all of these conditions are met, our program, starting at \\((c, a, v)\\), will terminate in the state \\((c'', a'', v'')\\). This third rule completes our semantics; a program being executed will keep running instructions using the third rule, until it finally
|
||||||
|
hits an invalid program counter (terminating with the first rule) or gets to the end of the program (terminating with the second rule).
|
||||||
|
|
||||||
|
#### Aside: Vectors and Finite \\(\mathbb{N}\\)
|
||||||
|
We'll be getting to the Coq implementation of our semantics soon, but before we do:
|
||||||
|
what type should \\(c\\) be? It's entirely possible for an instruction like \\(\\texttt{jmp} \\; -10000\\)
|
||||||
|
to throw our program counter way before the first instruction of our program, so at first, it seems
|
||||||
|
as though we should use an integer. But the prompt doesn't even specify what should happen in this
|
||||||
|
case - it only says an instruction shouldn't be run twice. The "valid set", although it may help resolve
|
||||||
|
this debate, is our invention, and isn't part of the original specification.
|
||||||
|
|
||||||
|
There is, however, something we can infer from this problem. Since the problem of jumping "too far behind" or
|
||||||
|
"too far ahead" is never mentioned, we can assume that _all jumps will lead either to an instruction,
|
||||||
|
or right to the end of a program_. This means that \\(c\\) is a natural number, with
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
0 \leq c \leq \text{length}(p)
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
In a language like Coq, it's possible to represent such a number. Since we've gotten familliar with
|
||||||
|
inference rules, let's present two rules that define such a number:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac
|
||||||
|
{n \in \mathbb{N}^+}
|
||||||
|
{Z : \text{Fin} \; n}
|
||||||
|
\quad
|
||||||
|
\frac
|
||||||
|
{f : \text{Fin} \; n}
|
||||||
|
{S f : \text{Fin} \; (n+1)}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
This is a variation of the [Peano encoding](https://wiki.haskell.org/Peano_numbers) of natural numbers.
|
||||||
|
It reads as follows: zero (\\(Z\\)) is a finite natural number less than any positive natural number \\(n\\). Then, if a finite natural number
|
||||||
|
\\(f\\) is less than \\(n\\), then adding one to that number (using the successor function \\(S\\))
|
||||||
|
will create a natural number less than \\(n+1\\). We encode this in Coq as follows
|
||||||
|
([originally from here](https://coq.inria.fr/library/Coq.Vectors.Fin.html#t)):
|
||||||
|
|
||||||
|
```Coq
|
||||||
|
Inductive t : nat -> Set :=
|
||||||
|
| F1 : forall {n}, t (S n)
|
||||||
|
| FS : forall {n}, t n -> t (S n).
|
||||||
|
```
|
||||||
|
|
||||||
|
The `F1` constructor here is equivalent to our \\(Z\\), and `FS` is equivalent to our \\(S\\).
|
||||||
|
To represent positive natural numbers \\(\\mathbb{N}^+\\), we simply take a regular natural
|
||||||
|
number from \\(\mathbb{N}\\) and find its successor using `S` (simply adding 1). Again, we have
|
||||||
|
to explicitly use `forall` in our type signatures.
|
||||||
|
|
||||||
|
We can use a similar technique to represent a list with a known number of elements, known
|
||||||
|
in the Idris and Coq world as a vector. Again, we only need two inference rules to define such
|
||||||
|
a vector:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac
|
||||||
|
{t : \text{Type}}
|
||||||
|
{[] : \text{Vec} \; t \; 0}
|
||||||
|
\quad
|
||||||
|
\frac
|
||||||
|
{x : \text{t} \quad \textit{xs} : \text{Vec} \; t \; n}
|
||||||
|
{(x::\textit{xs}) : \text{Vec} \; t \; (n+1)}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
These rules read: the empty list \\([]\\) is zero-length vector of any type \\(t\\). Then,
|
||||||
|
if we take an element \\(x\\) of type \\(t\\), and an \\(n\\)-long vector \\(\textit{xs}\\) of \\(t\\),
|
||||||
|
then we can prepend \\(x\\) to \\(\textit{xs}\\) and get an \\((n+1)\\)-long vector of \\(t\\).
|
||||||
|
In Coq, we write this as follows ([originally from here](https://coq.inria.fr/library/Coq.Vectors.VectorDef.html#t)):
|
||||||
|
|
||||||
|
```Coq
|
||||||
|
Inductive t A : nat -> Type :=
|
||||||
|
| nil : t A 0
|
||||||
|
| cons : forall (h:A) (n:nat), t A n -> t A (S n).
|
||||||
|
```
|
||||||
|
|
||||||
|
The `nil` constructor represents the empty list \\([]\\), and `cons` represents
|
||||||
|
the operation of prepending an element (called `h` in the code and \\(x\\) in our inference rules)
|
||||||
|
to another vector of length \\(n\\), which remains unnamed in the code but is called \\(\\textit{xs}\\) in our rules.
|
||||||
|
|
||||||
|
These two definitions work together quite well. For instance, suppose we have a vector of length \\(n\\).
|
||||||
|
If we were to access its elements by indices starting at 0, we'd be allowed to access indices 0 through \\(n-1\\).
|
||||||
|
These are precisely the values of the finite natural numbers less than \\(n\\), \\(\\text{Fin} \\; n \\).
|
||||||
|
Thus, given such an index \\(\\text{Fin} \\; n\\) and a vector \\(\\text{Vec} \\; t \\; n\\), we are guaranteed
|
||||||
|
to be able to retrieve the element at the given index! In our code, we will not have to worry about bounds checking.
|
||||||
|
|
||||||
|
Of course, if our program has \\(n\\) elements, our program counter will be a finite number less than \\(n+1\\),
|
||||||
|
since there's always the possibility of it pointing past the instructions, indicating that we've finished
|
||||||
|
running the program. This leads to some minor complications: we can't safely access the program instruction
|
||||||
|
at index \\(\\text{Fin} \\; (n+1)\\). We can solve this problem by considering two cases:
|
||||||
|
either our index points one past the end of the program (in which case its value is exactly the finite
|
||||||
|
representation of \\(n\\)), or it's less than \\(n\\), in which case we can "tighten" the upper bound,
|
||||||
|
and convert that index into a \\(\\text{Fin} \\; n\\). We formalize it in a lemma:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 80 82 >}}
|
||||||
|
|
||||||
|
There's a little bit of a gotcha here. Instead of translating our above statement literally,
|
||||||
|
and returning a value that's the result of "tightening" our input `f`, we return a value
|
||||||
|
`f'` that can be "weakened" to `f`. This is because "tightening" is not a total function -
|
||||||
|
it's not always possible to convert a \\(\\text{Fin} \\; (n+1)\\) into a \\(\\text{Fin} \\; n\\).
|
||||||
|
However, "weakening" \\(\\text{Fin} \\; n\\) _is_ a total function, since a number less than \\(n\\)
|
||||||
|
is, by the transitive property of a total order, also less than \\(n+1\\).
|
||||||
|
|
||||||
|
The Coq proof for this claim is as follows:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 88 97 >}}
|
||||||
|
|
||||||
|
The `Fin.rectS` function is a convenient way to perform inductive proofs over
|
||||||
|
our finite natural numbers. Informally, our proof proceeds as follows:
|
||||||
|
|
||||||
|
* If the current finite natural number is zero, take a look at the "bound" (which
|
||||||
|
we assume is nonzero, since there isn't a natural number less than zero).
|
||||||
|
* If this "bounding number" is one, our `f` can't be tightened any further,
|
||||||
|
since doing so would create a number less than zero. Fortunately, in this case,
|
||||||
|
`n` must be `0`, so `f` is the finite representation of `n`.
|
||||||
|
* Otherwise, `f` is most definitely a weakened version of another `f'`,
|
||||||
|
since the tightest possible type for zero has a "bounding number" of one, and
|
||||||
|
our "bounding number" is greater than that. We return a tighter version of our finite zero.
|
||||||
|
* If our number is a successor of another finite number, we check if that other number
|
||||||
|
can itself be tightened.
|
||||||
|
* If it can't be tightened, then our smaller number is a finite representation of
|
||||||
|
`n-1`. This, in turn, means that adding one to it will be the finite representation
|
||||||
|
of `n` (if \\(x\\) is equal to \\(n-1\\), then \\(x+1\\) is equal to \\(n\\)).
|
||||||
|
* If it _can_ be tightened, then so can the successor (if \\(x\\) is less
|
||||||
|
than \\(n-1\\), then \\(x+1\\) is less than \\(n\\)).
|
||||||
|
|
||||||
|
Next, let's talk about addition, specifically the kind of addition done by the \\(\\texttt{jmp}\\) instruction.
|
||||||
|
We can always add an integer to a natural number, but we can at best guarantee that the result
|
||||||
|
will be an integer. For instance, we can add `-1000` to `1`, and get `-999`, which is _not_ a natural
|
||||||
|
number. We implement this kind of addition in a function called `jump_t`:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 56 56 >}}
|
||||||
|
|
||||||
|
At the moment, its definition is not particularly important. What is important, though,
|
||||||
|
is that it takes a bounded natural number `pc` (our program counter), an integer `off`
|
||||||
|
(the offset provided by the jump instruction) and returns another integer representing
|
||||||
|
the final offset. Why are integers of type `t`? Well, it so happens
|
||||||
|
that Coq provides facilities for working with arbitrary implementations of integers,
|
||||||
|
without relying on how they are implemented under the hood. This can be seen in its
|
||||||
|
[`Coq.ZArith.Int`](https://coq.inria.fr/library/Coq.ZArith.Int.html) module,
|
||||||
|
which describes what functions and types an implementation of integers should provide.
|
||||||
|
Among those is `t`, the type of an integer in such an arbitrary implementation. We too
|
||||||
|
will not make an assumption about how the integers are implemented, and simply
|
||||||
|
use this generic `t` from now on.
|
||||||
|
|
||||||
|
Now, suppose we wanted to write a function that _does_ return a valid program
|
||||||
|
counter after adding the offset to it. Since it's possible for this function to fail
|
||||||
|
(for instance, if the offset is very negative), it has to return `option (fin (S n))`.
|
||||||
|
That is, this function may either fail (returning `None`) or succeed, returning
|
||||||
|
`Some f`, where `f` is of type `fin (S n)`, aka \\(\\text{Fin} \\; (n + 1)\\). Here's
|
||||||
|
the function in Coq (again, don't worry too much about the definition):
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 61 61 >}}
|
||||||
|
|
||||||
|
We will make use of this function when we define and verify our semantics.
|
||||||
|
Let's take a look at that next.
|
||||||
|
|
||||||
|
#### Semantics in Coq
|
||||||
|
|
||||||
|
Now that we've seen finite sets and vectors, it's time to use them to
|
||||||
|
encode our semantics in Coq. Before we do anything else, we need
|
||||||
|
to provide Coq definitions for the various components of our
|
||||||
|
language, much like what we did with `tinylang`. We can start with opcodes:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 20 23 >}}
|
||||||
|
|
||||||
|
Now we can define a few other parts of our language and semantics, namely
|
||||||
|
states, instructions and programs (which I called "inputs" since, we'll, they're
|
||||||
|
our puzzle input). A state is simply the 3-tuple of the program counter, the set
|
||||||
|
of valid program counters, and the accumulator. We write it as follows:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 33 33 >}}
|
||||||
|
|
||||||
|
The star `*` is used here to represent a [product type](https://en.wikipedia.org/wiki/Product_type)
|
||||||
|
rather than arithmetic multiplication. Our state type accepts an argument,
|
||||||
|
`n`, much like a finite natural number or a vector. In fact, this `n` is passed on
|
||||||
|
to the state's program counter and set types. Rightly, a state for a program
|
||||||
|
of length \\(n\\) will not be of the same type as a state for a program of length \\(n+1\\).
|
||||||
|
|
||||||
|
An instruction is also a tuple, but this time containing only two elements: the opcode and
|
||||||
|
the number. We write this as follows:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 36 36 >}}
|
||||||
|
|
||||||
|
Finally, we have to define the type of a program. This type will also be
|
||||||
|
indexed by `n`, the program's length. A program of length `n` is simply a
|
||||||
|
vector of instructions `inst` of length `n`. This leads to the following
|
||||||
|
definition:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 38 38 >}}
|
||||||
|
|
||||||
|
So far, so good! Finally, it's time to get started on the semantics themselves.
|
||||||
|
We begin with the inductive definition of \\((\\rightarrow_i)\\).
|
||||||
|
I think this is fairly straightforward. However, we do use
|
||||||
|
`t` instead of \\(n\\) from the rules, and we use `FS`
|
||||||
|
instead of \\(+1\\). Also, we make the formerly implicit
|
||||||
|
assumption that \\(c+n\\) is valid explicit, by
|
||||||
|
providing a proof that `valid_jump_t pc t = Some pc'`.
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 103 110 >}}
|
||||||
|
|
||||||
|
Next, it will help us to combine the premises for
|
||||||
|
"failed" and "ok" terminations into Coq data types.
|
||||||
|
This will make it easier for us to formulate a lemma later on.
|
||||||
|
Here are the definitions:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 112 117 >}}
|
||||||
|
|
||||||
|
Since all of out "termination" rules start and
|
||||||
|
end in the same state, there's no reason to
|
||||||
|
write that state twice. Thus, both `done`
|
||||||
|
and `stuck` only take the input `inp`,
|
||||||
|
and the state, which includes the accumulator
|
||||||
|
`acc`, the set of allowed program counters `v`, and
|
||||||
|
the program counter at which the program came to an end.
|
||||||
|
When the program terminates successfully, this program
|
||||||
|
counter will be equal to the length of the program `n`,
|
||||||
|
so we use `nat_to_fin n`. On the other hand, if the program
|
||||||
|
terminates in as stuck state, it must be that it terminated
|
||||||
|
at a program counter that points to an instruction. Thus, this
|
||||||
|
program counter is actually a \\(\\text{Fin} \\; n\\), and not
|
||||||
|
a \\(\\text{Fin} \\ (n+1)\\), and is not in the set of allowed program counters.
|
||||||
|
We use the same "weakening" trick we saw earlier to represent
|
||||||
|
this.
|
||||||
|
|
||||||
|
Finally, we encode the three inference rules we came up with:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 119 126 >}}
|
||||||
|
|
||||||
|
Notice that we fused two of the premises in the last rule.
|
||||||
|
Instead of naming the instruction at the current program
|
||||||
|
counter (by writing \\(p[c] = i\\)) and using it in another premise, we simply use
|
||||||
|
`nth inp pc`, which corresponds to \\(p[c]\\) in our
|
||||||
|
"paper" semantics.
|
||||||
|
|
||||||
|
Before we go on writing some actual proofs, we have
|
||||||
|
one more thing we have to address. Earlier, we said:
|
||||||
|
|
||||||
|
> All jumps will lead either to an instruction, or right to the end of a program.
|
||||||
|
|
||||||
|
To make Coq aware of this constraint, we'll have to formalize it. To
|
||||||
|
start off, we'll define the notion of a "valid instruction", which is guaranteed
|
||||||
|
to keep the program counter in the correct range.
|
||||||
|
There are a couple of ways to do this, but we'll use yet another definition based
|
||||||
|
on inference rules. First, though, observe that the same instruction may be valid
|
||||||
|
for one program, and invalid for another. For instance, \\(\\texttt{jmp} \\; 100\\)
|
||||||
|
is perfectly valid for a program with thousands of instructions, but if it occurs
|
||||||
|
in a program with only 3 instructions, it will certainly lead to disaster. Specifically,
|
||||||
|
the validity of an instruction depends on the length of the program in which it resides,
|
||||||
|
and the program counter at which it's encountered.
|
||||||
|
Thus, we refine our idea of validity to "being valid for a program of length \\(n\\) at program counter \\(f\\)".
|
||||||
|
For this, we can use the following two inference rules:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\frac
|
||||||
|
{c : \text{Fin} \; n}
|
||||||
|
{\texttt{add} \; t \; \text{valid for} \; n, c }
|
||||||
|
\quad
|
||||||
|
\frac
|
||||||
|
{c : \text{Fin} \; n \quad o \in \{\texttt{nop}, \texttt{jmp}\} \quad J_v(c, t) = \text{Some} \; c' }
|
||||||
|
{o \; t \; \text{valid for} \; n, c }
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
The first rule states that if a program has length \\(n\\), then \\(\\texttt{add}\\) is valid
|
||||||
|
at any program counter whose value is less than \\(n\\). This is because running
|
||||||
|
\\(\\texttt{add}\\) will increment the program counter \\(c\\) by 1,
|
||||||
|
and thus, create a new program counter that's less than \\(n+1\\),
|
||||||
|
which, as we discussed above, is perfectly valid.
|
||||||
|
|
||||||
|
The second rule works for the other two instructions. It has an extra premise:
|
||||||
|
the result of `jump_valid_t` (written as \\(J_v\\)) has to be \\(\\text{Some} \\; c'\\),
|
||||||
|
that is, `jump_valid_t` must succeed. Note that we require this even for no-ops,
|
||||||
|
since it later turns out that one of the them may be a jump after all.
|
||||||
|
|
||||||
|
We now have our validity rules. If an instruction satisfies them for a given program
|
||||||
|
and at a given program counter, evaluating it will always result in a program counter that has a proper value.
|
||||||
|
We encode the rules in Coq as follows:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 152 157 >}}
|
||||||
|
|
||||||
|
Note that we have three rules instead of two. This is because we "unfolded"
|
||||||
|
\\(o\\) from our second rule: rather than using set notation (or "or"), we
|
||||||
|
just generated two rules that vary in nothing but the operation involved.
|
||||||
|
|
||||||
|
Of course, we must have that every instruction in a program is valid.
|
||||||
|
We don't really need inference rules for this, as much as a "forall" quantifier.
|
||||||
|
A program \\(p\\) of length \\(n\\) is valid if the following holds:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\forall (c : \text{Fin} \; n). p[c] \; \text{valid for} \; n, c
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
That is, for every possible in-bounds program counter \\(c\\),
|
||||||
|
the instruction at the program counter is valid. We can now
|
||||||
|
encode this in Coq, too:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 160 161 >}}
|
||||||
|
|
||||||
|
In the above, `n` is made implicit where possible.
|
||||||
|
Since \\(c\\) (called `pc` in the code) is of type \\(\\text{Fin} \\; n\\), there's no
|
||||||
|
need to write \\(n\\) _again_. The curly braces tell Coq to infer that
|
||||||
|
argument where possible.
|
||||||
|
|
||||||
|
### Proving Termination
|
||||||
|
Here we go! It's finally time to make some claims about our
|
||||||
|
definitions. Who knows - maybe we wrote down total garbage!
|
||||||
|
We will be creating several related lemmas and theorems.
|
||||||
|
All of them share two common assumptions:
|
||||||
|
|
||||||
|
* We have some valid program `inp` of length `n`.
|
||||||
|
* This program is a valid input, that is, `valid_input` holds for it.
|
||||||
|
There's no sense in arguing based on an invalid input program.
|
||||||
|
|
||||||
|
We represent these grouped assumptions by opening a Coq
|
||||||
|
`Section`, which we call `ValidInput`, and listing our assumptions:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 163 166 >}}
|
||||||
|
|
||||||
|
We had to also explicitly mention the length `n` of our program.
|
||||||
|
From now on, the variables `n`, `inp`, and `Hv` will be
|
||||||
|
available to all of the proofs we write in this section.
|
||||||
|
The first proof is rather simple. The claim is:
|
||||||
|
|
||||||
|
> For our valid program, at any program counter `pc`
|
||||||
|
and accumulator `acc`, there must exist another program
|
||||||
|
counter `pc'` and accumulator `acc'` such that the
|
||||||
|
instruction evaluation relation \\((\rightarrow_i)\\)
|
||||||
|
connects the two. That is, valid addresses aside,
|
||||||
|
we can always make a step.
|
||||||
|
|
||||||
|
Here is this claim encoded in Coq:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 168 169 >}}
|
||||||
|
|
||||||
|
We start our proof by introducing all the relevant variables into
|
||||||
|
the global context. I've mentioned this when I wrote about
|
||||||
|
day 1, but here's the gist: the `intros` keyword takes
|
||||||
|
variables from a `forall`, and makes them concrete.
|
||||||
|
In short, `intros x` is very much like saying "suppose
|
||||||
|
we have an `x`", and going on with the proof.
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 170 171 >}}
|
||||||
|
|
||||||
|
Here, we said "take any program counter `pc` and any
|
||||||
|
accumulator `acc`". Now what? Well, first of all,
|
||||||
|
we want to take a look at the instruction at the current
|
||||||
|
`pc`. We know that this instruction is a combination
|
||||||
|
of an opcode and a number, so we use `destruct` to get
|
||||||
|
access to both of these parts:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 172 172 >}}
|
||||||
|
|
||||||
|
Now, Coq reports the following proof state:
|
||||||
|
|
||||||
|
```
|
||||||
|
1 subgoal
|
||||||
|
|
||||||
|
n : nat
|
||||||
|
inp : input n
|
||||||
|
Hv : valid_input inp
|
||||||
|
pc : Fin.t n
|
||||||
|
acc : t
|
||||||
|
o : opcode
|
||||||
|
t0 : t
|
||||||
|
Hop : nth inp pc = (o, t0)
|
||||||
|
|
||||||
|
========================= (1 / 1)
|
||||||
|
|
||||||
|
exists (pc' : fin (S n)) (acc' : t),
|
||||||
|
step_noswap (o, t0) (pc, acc) (pc', acc')
|
||||||
|
```
|
||||||
|
|
||||||
|
We have some opcode `o`, and some associated number
|
||||||
|
`t0`, and we must show that there exist a `pc'`
|
||||||
|
and `acc'` to which we can move on. To prove
|
||||||
|
that something exists in Coq, we must provide
|
||||||
|
an instance of that "something". If we claim
|
||||||
|
that there exists a dog that's not a good boy,
|
||||||
|
we better have this elusive creature in hand.
|
||||||
|
In other words, proofs in Coq are [constructive](https://en.wikipedia.org/wiki/Constructive_proof).
|
||||||
|
Without knowing the kind of operation we're dealing with, we can't
|
||||||
|
say for sure how the step will proceed. Thus, we proceed by
|
||||||
|
case analysis on `o`.
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 173 173 >}}
|
||||||
|
|
||||||
|
There are three possible cases we have to consider,
|
||||||
|
one for each type of instruction.
|
||||||
|
|
||||||
|
* If the instruction is \\(\\texttt{add}\\), we know
|
||||||
|
that `pc' = pc + 1` and `acc' = acc + t0`. That is,
|
||||||
|
the program counter is simply incremented, and the accumulator
|
||||||
|
is modified with the number part of the instruction.
|
||||||
|
* If the instruction is \\(\\texttt{nop}\\), the program
|
||||||
|
coutner will again be incremented (`pc' = pc + 1`),
|
||||||
|
but the accumulator will stay the same, so `acc' = acc`.
|
||||||
|
* If the instruction is \\(\\texttt{jmp}\\), things are
|
||||||
|
more complicated. We must rely on the assumption
|
||||||
|
that our input is valid, which tells us that adding
|
||||||
|
`t0` to our `pc` will result in `Some f`, and not `None`.
|
||||||
|
Given this, we have `pc' = f`, and `acc' = acc`.
|
||||||
|
|
||||||
|
This is how these three cases are translated to Coq:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 174 177 >}}
|
||||||
|
|
||||||
|
For the first two cases, we simply provide the
|
||||||
|
values we expect for `pc'` and `acc'`, and
|
||||||
|
apply the corresponding inference rule that
|
||||||
|
is satisfied by these values. For the third case, we have
|
||||||
|
to invoke `Hv`, the hypothesis that our input is valid.
|
||||||
|
In particular, we care about the instruction at `pc`,
|
||||||
|
so we use `specialize` to plug `pc` into the more general
|
||||||
|
hypothesis. We then replace `nth inp pc` with its known
|
||||||
|
value, `(jmp, t0)`. This tells us the following, in Coq's words:
|
||||||
|
|
||||||
|
```
|
||||||
|
Hv : valid_inst (jmp, t0) pc
|
||||||
|
```
|
||||||
|
|
||||||
|
That is, `(jmp, t0)` is a valid instruction at `pc`. Then, using
|
||||||
|
Coq's `inversion` tactic, we ask: how is this possible? There is
|
||||||
|
only one inference rule that gives us such a conclusion, and it is named `valid_inst_jmp`
|
||||||
|
in our Coq code. Since we have a proof that our `jmp` is valid,
|
||||||
|
it must mean that this rule was used. Furthermore, since this
|
||||||
|
rule requires that `valid_jump_t` evaluates to `Some f'`, we know
|
||||||
|
that this must be the case here! Coq now has adds the following
|
||||||
|
two lines to our proof state:
|
||||||
|
|
||||||
|
```
|
||||||
|
f' : fin (S n)
|
||||||
|
H0 : valid_jump_t pc t0 = Some f'
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, we specify, as mentioned earlier, that `pc' = f'` and `acc' = acc`.
|
||||||
|
As before, we apply the corresponding step rule for `jmp`. When it asks
|
||||||
|
for a proof that `valid_jump_t` produces a valid program counter,
|
||||||
|
we hand it `H0` using `apply H0`. And with that, Coq is happy!
|
||||||
|
|
||||||
|
Next, we prove a claim that a valid program can always do _something_,
|
||||||
|
and that something is one of three things:
|
||||||
|
|
||||||
|
* It can terminate in the "ok" state if the program counter
|
||||||
|
reaches the programs' end.
|
||||||
|
* It can terminate with an error if it's currently at a program
|
||||||
|
counter that is not included in the valid set.
|
||||||
|
* Otherwise, it can run the current instruction and advance
|
||||||
|
to a "next" state.
|
||||||
|
|
||||||
|
Alternatively, we could say that one of the inference rules
|
||||||
|
for \\((\\Rightarrow_p)\\) must apply. This is not the case if the input
|
||||||
|
is not valid, since, as I said
|
||||||
|
before, an arbitrary input program can lead us to jump
|
||||||
|
to a negative address (or to an address _way_ past the end of the program).
|
||||||
|
Here's the claim, translated to Coq:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 181 186 >}}
|
||||||
|
|
||||||
|
Informally, we can prove this as follows:
|
||||||
|
|
||||||
|
* If the current program counter is equal to the length
|
||||||
|
of the program, we've reached the end. Thus, the program
|
||||||
|
can terminate in the "ok" state.
|
||||||
|
* Otherwise, the current program counter must be
|
||||||
|
less than the length of the program.
|
||||||
|
* If we've already encountered this program counter (that is,
|
||||||
|
if it's gone from the set of valid program counters),
|
||||||
|
then the program will terminate in the "error" state.
|
||||||
|
* Otherwise, the program counter is in the set of
|
||||||
|
valid instructions. By our earlier theorem, in a valid
|
||||||
|
program, the instruction at any program counter can be correctly
|
||||||
|
executed, taking us to the next state. Now too
|
||||||
|
our program can move to this next state.
|
||||||
|
|
||||||
|
Below is the Coq translation of the above.
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 187 203 >}}
|
||||||
|
|
||||||
|
It doesn't seem like we're that far from being done now.
|
||||||
|
A program can always take a step, and each time it does,
|
||||||
|
the set of valid program counters decreases in size. Eventually,
|
||||||
|
this set will become empty, so if nothing else, our program will
|
||||||
|
eventually terminate in an "error" state. Thus, it will stop
|
||||||
|
running no matter what.
|
||||||
|
|
||||||
|
This seems like a task for induction, in this case on the size
|
||||||
|
of the valid set. In particular, strong mathematical induction
|
||||||
|
{{< sidenote "right" "strong-induction-note" "seem to work best." >}}
|
||||||
|
Why strong induction? If we remove a single element from a set,
|
||||||
|
its size should decrease strictly by 1. Thus, why would we need
|
||||||
|
to care about sets of <em>all</em> sizes less than the current
|
||||||
|
set's size?<br>
|
||||||
|
<br>
|
||||||
|
Unfortunately, we're not working with purely mathematical sets.
|
||||||
|
Coq's default facility for sets is simply a layer on top
|
||||||
|
of good old lists, and makes no effort to be "correct by construction".
|
||||||
|
It is thus perfectly possible to have a "set" which inlcudes an element
|
||||||
|
twice. Depending on the implementation of <code>set_remove</code>,
|
||||||
|
we may end up removing the repeated element multiple times, thereby
|
||||||
|
shrinking the length of our list by more than 1. I'd rather
|
||||||
|
not worry about implementation details like that.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
Someone on StackOverflow [implemented this](https://stackoverflow.com/questions/45872719/how-to-do-induction-on-the-length-of-a-list-in-coq),
|
||||||
|
so I'll just use it. The Coq theorem corresonding to strong induction
|
||||||
|
on the length of a list is as follows:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 205 207 >}}
|
||||||
|
|
||||||
|
It reads,
|
||||||
|
|
||||||
|
> If for some list `l`, the property `P` holding for all lists
|
||||||
|
shorter than `l` means that it also holds for `l` itself, then
|
||||||
|
`P` holds for all lists.
|
||||||
|
|
||||||
|
This is perhaps not particularly elucidating. We can alternatively
|
||||||
|
think of this as trying to prove some property for all lists `l`.
|
||||||
|
We start with all empty lists. Here, we have nothing else to rely
|
||||||
|
on; there are no lists shorter than the empty list, and our property
|
||||||
|
must hold for all empty lists. Then, we move on to proving
|
||||||
|
the property for all lists of length 1, already knowing that it holds
|
||||||
|
for all empty lists. Once we're done there, we move on to proving
|
||||||
|
that `P` holds for all lists of length 2, now knowing that it holds
|
||||||
|
for all empty lists _and_ all lists of length 1. We continue
|
||||||
|
doing this, eventually covering lists of any length.
|
||||||
|
|
||||||
|
Before proving termination, there's one last thing we have to
|
||||||
|
take care off. Coq's standard library does not come with
|
||||||
|
a proof that removing an element from a set makes it smaller;
|
||||||
|
we have to provide it ourselves. Here's the claim encoded
|
||||||
|
in Coq:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 217 219 >}}
|
||||||
|
|
||||||
|
This reads, "if a set `s` contains a finite natural
|
||||||
|
number `f`, removing `f` from `s` reduces the set's size".
|
||||||
|
The details of the proof are not particularly interesting,
|
||||||
|
and I hope that you understand intuitively why this is true.
|
||||||
|
Finally, we make our termination claim.
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 230 231 >}}
|
||||||
|
|
||||||
|
It's quite a strong claim - given _any_ program counter,
|
||||||
|
set of valid addresses, and accumulator, a valid input program
|
||||||
|
will terminate. Let's take a look at the proof.
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 232 234 >}}
|
||||||
|
|
||||||
|
We use `intros` again. However, it brings in variables
|
||||||
|
in order, and we really only care about the _second_ variable.
|
||||||
|
We thus `intros` the first two, and then "put back" the first
|
||||||
|
one using `generalize dependent`. Then, we proceed by
|
||||||
|
induction on length, as seen above.
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 235 236>}}
|
||||||
|
|
||||||
|
Now we're in the "inductive step". Our inductive hypothesis
|
||||||
|
is that any set of valid addresses smaller than the current one will
|
||||||
|
guarantee that the program will terminate. We must show
|
||||||
|
that using our set, too, will guarantee termination. We already
|
||||||
|
know that a valid input, given a state, can have one of three
|
||||||
|
possible outcomes: "ok" termination, "failed" termination,
|
||||||
|
or a "step". We use `destruct` to take a look at each of these
|
||||||
|
in turn. The first two cases ("ok" termination and "failed" termination)
|
||||||
|
are fairly trivial:
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 237 240 >}}
|
||||||
|
|
||||||
|
We basically connect the dots between the premises (in a form like `done`)
|
||||||
|
and the corresponding inference rule (`run_noswap_ok`). The more
|
||||||
|
interesting case is when we can take a step.
|
||||||
|
|
||||||
|
{{< codelines "Coq" "aoc-2020/day8.v" 241 253 >}}
|
||||||
|
|
||||||
|
Since we know we can take a step, we know that we'll be removing
|
||||||
|
the current program counter from the set of valid addresses. This
|
||||||
|
set must currently contain the present program counter (since otherwise
|
||||||
|
we'd have "failed"), and thus will shrink when we remove it. This,
|
||||||
|
in turn, lets us use the inductive hypothesis: it tells us that no matter the
|
||||||
|
program counter or accumulator, if we start with this new "shrunk"
|
||||||
|
set, we will terminate in some state. Coq's constructive
|
||||||
|
nature helps us here: it doesn't just tells us that there is some state
|
||||||
|
in which we terminate - it gives us that state! We use `edestruct` to get
|
||||||
|
a handle on this final state, which Coq automatically names `x`. At this
|
||||||
|
time Coq still isn't convinced that our new set is smaller, so we invoke
|
||||||
|
our earlier `set_remove_length` theorem to placate it.
|
||||||
|
|
||||||
|
We now have all the pieces: we know that we can take a step, removing
|
||||||
|
the current program counter from our current set. We also know that
|
||||||
|
with that newly shrunken set, we'll terminate in some final state `x`.
|
||||||
|
Thus, all that's left to say is to apply our "step" rule. It asks
|
||||||
|
us for three things:
|
||||||
|
|
||||||
|
1. That the current program counter is in the set. We've long since
|
||||||
|
established this, and `auto` takes care of that.
|
||||||
|
2. That a step is possible. We've already established this, too,
|
||||||
|
since we're in the "can take a step" case. We apply `Hst`,
|
||||||
|
the hypothesis that confirms that we can, indeed, step.
|
||||||
|
3. That we terminate after this. The `x` we got
|
||||||
|
from our induction hypothesis came with a proof that
|
||||||
|
running with the "next" program counter and accumulator
|
||||||
|
will result in termination. We apply this proof, automatically
|
||||||
|
named `H0` by Coq.
|
||||||
|
|
||||||
|
And that's it! We've proved that a program terminates no matter what.
|
||||||
|
This has also (almost!) given us a solution to part 1. Consider the case
|
||||||
|
in which we start with program counter 0, accumulator 0, and the "full"
|
||||||
|
set of allowed program counters. Since our proof works for _all_ configurations,
|
||||||
|
it will also work for this one. Furthermore, since Coq proofs are constructive,
|
||||||
|
this proof will __return to us the final program counter and accumulator!__
|
||||||
|
This is precisely what we'd need to solve part 1.
|
||||||
|
|
||||||
|
But wait, almost? What's missing? We're missing a few implementation details:
|
||||||
|
* We've not provided a concrete impelmentation of integers. The simplest
|
||||||
|
thing to do here would be to use [`Coq.ZArith.BinInt`](https://coq.inria.fr/library/Coq.ZArith.BinInt.html),
|
||||||
|
for which there is a module [`Z_as_Int`](https://coq.inria.fr/library/Coq.ZArith.Int.html#Z_as_Int)
|
||||||
|
that provides `t` and friends.
|
||||||
|
* We assumed (reasonably, I would say) that it's possible to convert a natural
|
||||||
|
number to an integer. If we're using the aforementioned `BinInt` module,
|
||||||
|
we can use [`Z.of_nat`](https://coq.inria.fr/library/Coq.ZArith.BinIntDef.html#Z.of_nat).
|
||||||
|
* We also assumed (still reasonably) that we can try convert an integer
|
||||||
|
back to a finite natural number, failing if it's too small or too large.
|
||||||
|
There's no built-in function for this, but `Z`, for one, distinguishes
|
||||||
|
between the "positive", "zero", and "negative" cases, and we have
|
||||||
|
`Pos.to_nat` for the positive case.
|
||||||
|
|
||||||
|
Well, I seem to have covered all the implementation details. Why not just
|
||||||
|
go ahead and solve the problem? I tried, and ran into two issues:
|
||||||
|
|
||||||
|
* Although this is "given", we assumed that our input program will be
|
||||||
|
valid. For us to use the result of our Coq proof, we need to provide it
|
||||||
|
a constructive proof that our program is valid. Creating this proof is tedious
|
||||||
|
in theory, and quite difficult in practice: I've run into a
|
||||||
|
strange issue trying to pattern match on finite naturals.
|
||||||
|
* Even supposing we _do_ have a proof of validity, I'm not certain
|
||||||
|
if it's possible to actually extract an answer from it. It seems
|
||||||
|
that Coq distinguishes between proofs (things of type `Prop`) and
|
||||||
|
values (things of type `Set`). things of types `Prop` are supposed
|
||||||
|
to be _erased_. This means that when you convert Coq code,
|
||||||
|
to, say, Haskell, you will see no trace of any `Prop`s in that generated
|
||||||
|
code. Unfortunately, this also means we
|
||||||
|
[can't use our proofs to construct values](https://stackoverflow.com/questions/27322979/why-coq-doesnt-allow-inversion-destruct-etc-when-the-goal-is-a-type),
|
||||||
|
even though our proof objects do indeed contain them.
|
||||||
|
|
||||||
|
So, we "theoretically" have a solution to part 1, down to the algorithm
|
||||||
|
used to compute it and a proof that our algorithm works. In "reality", though, we
|
||||||
|
can't actually use this solution to procure an answer. Like we did with day 1, we'll have
|
||||||
|
to settle for only a proof.
|
||||||
|
|
||||||
|
Let's wrap up for this post. It would be more interesting to devise and
|
||||||
|
formally verify an algorithm for part 2, but this post has already gotten
|
||||||
|
quite long and contains a lot of information. Perhaps I will revisit this
|
||||||
|
at a later time. Thanks for reading!
|
||||||
@@ -3,6 +3,7 @@ title: Compiling a Functional Language Using C++, Part 10 - Polymorphism
|
|||||||
date: 2020-03-25T17:14:20-07:00
|
date: 2020-03-25T17:14:20-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
description: "In this post, we extend our compiler's typechecking algorithm to implement the Hindley-Milner type system, allowing for polymorphic functions."
|
description: "In this post, we extend our compiler's typechecking algorithm to implement the Hindley-Milner type system, allowing for polymorphic functions."
|
||||||
|
favorite: true
|
||||||
---
|
---
|
||||||
|
|
||||||
[In part 8]({{< relref "08_compiler_llvm.md" >}}), we wrote some pretty interesting programs in our little language.
|
[In part 8]({{< relref "08_compiler_llvm.md" >}}), we wrote some pretty interesting programs in our little language.
|
||||||
|
|||||||
@@ -984,5 +984,6 @@ Before either of those things, though, I think that I want to go through
|
|||||||
the compiler and perform another round of improvements, similarly to
|
the compiler and perform another round of improvements, similarly to
|
||||||
[part 4]({{< relref "04_compiler_improvements" >}}). It's hard to do a lot
|
[part 4]({{< relref "04_compiler_improvements" >}}). It's hard to do a lot
|
||||||
of refactoring while covering new content, since major changes need to
|
of refactoring while covering new content, since major changes need to
|
||||||
be explained and presented for the post to make sense. I hope to see
|
be explained and presented for the post to make sense.
|
||||||
you in these future posts!
|
I do this in [part 13]({{< relref "13_compiler_cleanup/index.md" >}}) - cleanup.
|
||||||
|
I hope to see you there!
|
||||||
|
|||||||
964
content/blog/13_compiler_cleanup/index.md
Normal file
964
content/blog/13_compiler_cleanup/index.md
Normal file
@@ -0,0 +1,964 @@
|
|||||||
|
---
|
||||||
|
title: Compiling a Functional Language Using C++, Part 13 - Cleanup
|
||||||
|
date: 2020-09-19T16:14:13-07:00
|
||||||
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we clean up our compiler."
|
||||||
|
---
|
||||||
|
|
||||||
|
In [part 12]({{< relref "12_compiler_let_in_lambda" >}}), we added `let/in`
|
||||||
|
and lambda expressions to our compiler. At the end of that post, I mentioned
|
||||||
|
that before we move on to bigger and better things, I wanted to take a
|
||||||
|
step back and clean up the compiler. Now is the time to do that.
|
||||||
|
|
||||||
|
In particular, I identified four things that could be improved
|
||||||
|
or cleaned up:
|
||||||
|
|
||||||
|
* __Error handling__. We need to stop using `throw 0` and start
|
||||||
|
using `assert`. We can also make our errors much more descriptive
|
||||||
|
by including source locations in the output.
|
||||||
|
* __Name mangling__. I don't think I got it quite right last
|
||||||
|
time. Now is the time to clean it up.
|
||||||
|
* __Code organization__. I think we can benefit from a top-level
|
||||||
|
class, and a more clear "dependency order" between the various
|
||||||
|
classes and structures we've defined.
|
||||||
|
* __Code style__. In particular, I've been lazily using `struct`
|
||||||
|
in a lot of places. That's not a good idea; it's better
|
||||||
|
to use `class`, and only expose _some_ fields and methods
|
||||||
|
to the rest of the code.
|
||||||
|
|
||||||
|
### Error Reporting and Handling
|
||||||
|
The previous post was rather long, which led me to omit
|
||||||
|
a rather important aspect of the compiler: proper error reporting.
|
||||||
|
Once again our compiler has instances of `throw 0`, which is a cheap way
|
||||||
|
of avoiding properly handling a runtime error. Before we move on,
|
||||||
|
it's best to get rid of such blatantly lazy code.
|
||||||
|
|
||||||
|
Our existing exceptions (mostly type errors) can use some work, too.
|
||||||
|
Even the most descriptive issues our compiler reports -- unification errors --
|
||||||
|
don't include the crucial information of _where_ the error is. For large
|
||||||
|
programs, this means having to painstakingly read through the entire file
|
||||||
|
to try figure out which subexpression could possibly have an incorrect type.
|
||||||
|
This is far from the ideal debugging experience.
|
||||||
|
|
||||||
|
Addressing all this is a multi-step change in itself. We want to:
|
||||||
|
|
||||||
|
* Replace all `throw 0` code with actual exceptions.
|
||||||
|
* Replace some exceptions that shouldn't be possible for a user to trigger
|
||||||
|
with assertions.
|
||||||
|
* Keep track of source locations of each subexpression, so that we may
|
||||||
|
be able to print it if it causes an error.
|
||||||
|
* Be able to print out said source locations at will. This isn't
|
||||||
|
a _necessity_, but virtually all "big" compilers do this. Instead
|
||||||
|
of reporting that an error occurs on a particular line, we will
|
||||||
|
actually print the line.
|
||||||
|
|
||||||
|
Let's start with gathering the actual location data.
|
||||||
|
|
||||||
|
#### Bison's Locations
|
||||||
|
Bison actually has some rather nice support for location tracking. It can
|
||||||
|
automatically assemble the "from" and "to" locations of a nonterminal
|
||||||
|
from the locations of children, which would be very tedious to write
|
||||||
|
by hand. We enable this feature using the following option:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parser.y" 46 46 >}}
|
||||||
|
|
||||||
|
There's just one hitch, though. Sure, Bison can compute bigger
|
||||||
|
locations from smaller ones, but it must get the smaller ones
|
||||||
|
from somewhere. Since Bison operates on _tokens_, rather
|
||||||
|
than _characters_, it effectively doesn't interact with the source
|
||||||
|
text at all, and can't determine from which line or column a token
|
||||||
|
originated. The task of determining the locations of input tokens
|
||||||
|
is delegated to the tokenizer -- Flex, in our case. Flex, on the
|
||||||
|
other hand, doesn't have a built-in mechanism for tracking
|
||||||
|
locations. Fortunately, Bison provides a `yy::location` class that
|
||||||
|
includes most of the needed functionality.
|
||||||
|
|
||||||
|
A `yy::location` consists of two source positions, `begin` and `end`,
|
||||||
|
which themselves are represented using lines and columns. It
|
||||||
|
also has the following methods:
|
||||||
|
|
||||||
|
* `yy::location::columns(int)` advances the `end` position by
|
||||||
|
the given number of columns, while `begin` stays the same.
|
||||||
|
If `begin` and `end` both point to the beginning of a token,
|
||||||
|
then `columns(token_length)` will move `end` to the token's end,
|
||||||
|
and thus make the whole `location` contain the token.
|
||||||
|
* `yy::location::lines(int)` behaves similarly to `columns`,
|
||||||
|
except that it advances `end` by the given number of lines,
|
||||||
|
rather than columns. It also resets the columns counter to `1`.
|
||||||
|
* `yy::location::step()` moves `begin` to where `end` is. This
|
||||||
|
is useful for when we've finished processing a token, and want
|
||||||
|
to move on to the next one.
|
||||||
|
|
||||||
|
For Flex specifically, `yyleng` has the length of the token
|
||||||
|
currently being processed. Rather than adding the calls
|
||||||
|
to `columns` and `step` to every rule, we can define the
|
||||||
|
`YY_USER_ACTION` macro, which is run before each token
|
||||||
|
is processed.
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/scanner.l" 12 14 >}}
|
||||||
|
|
||||||
|
We'll see why we are using `LOC` instead of something like `location` soon;
|
||||||
|
for now, you can treat `LOC` as if it were a global variable declared
|
||||||
|
in the tokenizer. Before processing each token, we ensure that
|
||||||
|
the `yy::location` has its `begin` and `end` at the same position,
|
||||||
|
and then advance `end` by `yyleng` columns. This is
|
||||||
|
{{< sidenote "right" "sufficient-note" "sufficient" >}}
|
||||||
|
This doesn't hold for all languages. It may be possible for a language
|
||||||
|
to have tokens that contain <code>\n</code>, in which case,
|
||||||
|
rather than just using <code>yyleng</code>, we'd need to
|
||||||
|
add special logic to iterate over the token and detect the line
|
||||||
|
breaks.<br>
|
||||||
|
<br>
|
||||||
|
Also, this requires that the <code>end</code> of the previous token was
|
||||||
|
correctly computed.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
to make `LOC` represent our token's source position. For
|
||||||
|
the moment, don't worry too much about `drv`; this is the
|
||||||
|
parsing driver, and we will talk about it shortly.
|
||||||
|
|
||||||
|
So now we have a "global" variable `LOC` that gives
|
||||||
|
us the source position of the current token. To get it
|
||||||
|
to Bison, we have to pass it as an argument to each
|
||||||
|
of the `make_TOKEN` calls. Here are a few sample lines
|
||||||
|
that should give you the general idea:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/scanner.l" 40 43 >}}
|
||||||
|
|
||||||
|
That last line is actually new. Previously, we somehow
|
||||||
|
got away without explicitly sending the end-of-file token to Bison.
|
||||||
|
I suspect that this was due to some kind of implicit conversion
|
||||||
|
of the Flex macro `YY_NULL` into a token; now that we have
|
||||||
|
to pass a position to every token constructor, such an implicit
|
||||||
|
conversion is probably impossible.
|
||||||
|
|
||||||
|
Now we have Bison computing source locations for each nonterminal.
|
||||||
|
However, at the moment, we still aren't using them. To change that,
|
||||||
|
we need to add a `yy::location` argument to each of our `ast` nodes,
|
||||||
|
as well as to the `pattern` subclasses, `definition_defn` and
|
||||||
|
`definition_data`. To avoid breaking all the code that creates
|
||||||
|
AST nodes and definitions outside of the parser, we'll make this
|
||||||
|
argument optional. Inside of `ast.hpp`, we define a new field as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/ast.hpp" 16 16 >}}
|
||||||
|
|
||||||
|
Then, we add a constructor to `ast` as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/ast.hpp" 18 18 >}}
|
||||||
|
|
||||||
|
Note that it's not optional here, since `ast` itself is an
|
||||||
|
abstract class, and thus will never be constructed directly.
|
||||||
|
It is in the subclasses of `ast` that we provide a default
|
||||||
|
value. The change is rather mechanical, but here's an example
|
||||||
|
from `ast_binop`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/ast.hpp" 98 99 >}}
|
||||||
|
|
||||||
|
Finally, we tell Bison to pass the computed location
|
||||||
|
data as an argument when constructing our data structures.
|
||||||
|
This too is a mechanical change, and I think the following
|
||||||
|
few lines demonstrate the general idea in sufficient
|
||||||
|
detail:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parser.y" 92 96 >}}
|
||||||
|
|
||||||
|
Here, the `@$` character is used to reference the current
|
||||||
|
nonterminal's location data.
|
||||||
|
|
||||||
|
#### Line Offsets, File Input, and the Parsing Driver
|
||||||
|
There are three more challenges with printing out the line
|
||||||
|
of code where an error occurred. First of all, to
|
||||||
|
print out a line of code, we need to have that line of code
|
||||||
|
available to us. We do not currently meet this requirement:
|
||||||
|
our compiler reads code form `stdin` (as is default for Flex),
|
||||||
|
and `stdin` doesn't always support rewinding. This, in turn,
|
||||||
|
means that once Flex has read a character from the input,
|
||||||
|
it may not be possible to go back and retrieve that character
|
||||||
|
again.
|
||||||
|
|
||||||
|
Second, even if we do have have the entire stream or buffer
|
||||||
|
available to us, to retrieve an offset and length within
|
||||||
|
that buffer from just a line and column number would be a lot
|
||||||
|
of work. A naive approach would be to iterate through
|
||||||
|
the input again, once more keeping track of lines and columns,
|
||||||
|
and print the desired line once we reach it. However, this
|
||||||
|
would lead us to redo a lot of work that our tokenizer
|
||||||
|
is already doing.
|
||||||
|
|
||||||
|
Third, Flex's input mechanism, even if it it's configured
|
||||||
|
not to read from `stdin`, uses a global file descriptor called
|
||||||
|
`yyin`. However, we're better off minimizing global state (especially
|
||||||
|
if we want to read, parse, and compile multiple files in
|
||||||
|
the future). While we're configuring Flex's input mechanism,
|
||||||
|
we may as well fix this, too.
|
||||||
|
|
||||||
|
There are several approaches to fixing the first issue. One possible
|
||||||
|
way is to store the content of `stdin` into a temporary file. Then,
|
||||||
|
it's possible to read from the file multiple times by using
|
||||||
|
the C functions `fseek` and `rewind`. However, since we're
|
||||||
|
working with files, why not just work directly with the files
|
||||||
|
created by the user? Instead of reading from `stdin`, we may
|
||||||
|
as well take in a path to a file via `argv`, and read from there.
|
||||||
|
Also, instead of `fseek` and `rewind`, we can just read the file
|
||||||
|
into memory, and access it like a normal character buffer. This
|
||||||
|
does mean that we can stick with `stdin`, but it's more conventional
|
||||||
|
to read source code from files, anyway.
|
||||||
|
|
||||||
|
To address the second issue, we can keep a mapping of line numbers
|
||||||
|
to their locations in the source buffer. This is rather easy to
|
||||||
|
maintain using an array: the first element of the array is 0,
|
||||||
|
which is the beginning of the first line in any source file. From there,
|
||||||
|
every time we encounter the character `\n`, we can push
|
||||||
|
the current source location to the top, marking it as
|
||||||
|
the beginning of another line. Where exactly we store this
|
||||||
|
array is as yet unclear, since we're trying to avoid global variables.
|
||||||
|
|
||||||
|
Finally, to begin addressing the third issue, we can use Flex's `reentrant`
|
||||||
|
option, which makes it so that all of the tokenizer's state is stored in an
|
||||||
|
opaque `yyscan_t` structure, rather than in global variables. This way,
|
||||||
|
we can configure `yyin` without setting a global variable, which is a step
|
||||||
|
in the right direction. We'll work on this momentarily.
|
||||||
|
|
||||||
|
Our tokenizing and parsing stack has more global variables
|
||||||
|
than just those specific to Flex. Among these variables is `global_defs`,
|
||||||
|
which receives all the top-level function and data type definitions. We
|
||||||
|
will also need some way of accessing the `yy::location` instance, and
|
||||||
|
a way of storing our file input in memory. Fortunately, we're not
|
||||||
|
the only ones to have ever come across the issue of creating non-global
|
||||||
|
state: the Bison documentation has a
|
||||||
|
[section in its C++ guide](https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parsing-Driver.html)
|
||||||
|
that describes a technique for manipulating
|
||||||
|
state -- "parsing context", in their words. This technique involves the
|
||||||
|
creation of a _parsing driver_.
|
||||||
|
|
||||||
|
The parsing driver is a class (or struct) that holds all the parse-related
|
||||||
|
state. We can arrange for this class to be available to our tokenizing
|
||||||
|
and parsing functions, which will allow us to use it pretty much like we'd
|
||||||
|
use a global variable. This is the `drv` that we saw in `YY_USER_ACTION`.
|
||||||
|
We can define it as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.hpp" 36 54 >}}
|
||||||
|
|
||||||
|
There aren't many fields here. The `file_name` string represents
|
||||||
|
the file that we'll be reading code from. The `location` field
|
||||||
|
will be accessed by Flex via `get_current_location`. Bison will
|
||||||
|
store the function and data type definitions it reads into `global_defs`
|
||||||
|
via `get_global_defs`. Finally, `file_m` will be used to keep track
|
||||||
|
of the content of the file we're reading, as well as the line offsets
|
||||||
|
within that file. Notice that a couple of these fields are pointers
|
||||||
|
that we take by reference in the constructor. The `parse_driver` doesn't
|
||||||
|
_own_ the global definitions, nor the file manager. They exist outside
|
||||||
|
of it, and will continue to be used in other ways the `parse_driver`
|
||||||
|
does not need to know about. Also, the `LOC` variable in Flex is
|
||||||
|
actually a call to `get_current_location`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/scanner.l" 15 15 >}}
|
||||||
|
|
||||||
|
The methods of `parse_driver` are rather simple. The majority of
|
||||||
|
them deals with giving access to the parser's members: the `yy::location`,
|
||||||
|
the `definition_group`, and the `file_mgr`. The only exception
|
||||||
|
to this is `operator()`, which we use to actually trigger the parsing process.
|
||||||
|
We'll make this method return `true` if parsing succeeded, and `false`
|
||||||
|
otherwise (if, say, the file we tried to read doesn't exist).
|
||||||
|
Here's its implementation:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 48 60 >}}
|
||||||
|
|
||||||
|
We try open the user-specified file, and return `false` if we can't.
|
||||||
|
After this, we start doing the setup specific to a reentrant
|
||||||
|
Flex scanner. We declare a `yyscan_t` variable, which
|
||||||
|
will contain all of Flex's state. Then, we initialize
|
||||||
|
it using `yylex_init`. Finally, since we can no longer
|
||||||
|
touch the `yyin` global variable (it doesn't exist),
|
||||||
|
we have to resort to using a setter function provided by Flex
|
||||||
|
to configure the tokenizer's input stream.
|
||||||
|
|
||||||
|
Next, we construct our Bison-generated parser. Note that
|
||||||
|
unlike before, we have to pass in two arguments:
|
||||||
|
`scanner` and `*this`, the latter being of type `parse_driver&`.
|
||||||
|
We'll come back to how this works in a moment. With
|
||||||
|
the scanner and parser initialized, we invoke `parser::operator()`,
|
||||||
|
which actually runs the Flex- and Bison-generated code.
|
||||||
|
To clean up, we run `yylex_destroy` and `fclose`. Finally,
|
||||||
|
we call `file_mgr::finalize`, and return. But what
|
||||||
|
_is_ `file_mgr`?
|
||||||
|
|
||||||
|
The `file_mgr` class does two things: it stores the part of the file
|
||||||
|
that has already been read by Flex in memory, and it keeps track of
|
||||||
|
where each line in our source file begins within the text. Here is its
|
||||||
|
definition:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.hpp" 14 34 >}}
|
||||||
|
|
||||||
|
In this class, the `string_stream` member is used to construct
|
||||||
|
an `std::string` from the bits of text that Flex reads,
|
||||||
|
processes, and feeds to the `file_mgr` using the `write` method.
|
||||||
|
It's more efficient to use a string stream than to concatenate
|
||||||
|
strings repeatedly. Once Flex is finished processing the file,
|
||||||
|
the final contents of the `string_stream` are transferred into
|
||||||
|
the `file_contents` string using the `finalize` method. The `offset`
|
||||||
|
and `line_offsets` fields will be used as we described earlier: each time Flex
|
||||||
|
encounters the `\n` character, the `offset` variable will pushed
|
||||||
|
in top of the `line_offsets` vector, marking the beginning of
|
||||||
|
the corresponding line. The methods of the class are as follows:
|
||||||
|
|
||||||
|
* `write` will be called from Flex, and will allow us to
|
||||||
|
record the content of the file we're processing to the `string_stream`.
|
||||||
|
We've already seen it used in the `YY_USER_ACTION` macro.
|
||||||
|
* `mark_line` will also be called from Flex, and will mark the current
|
||||||
|
`file_offset` as the beginning of a line by pushing it into `line_offsets`.
|
||||||
|
* `finalize` will be called by the `parse_driver` when the parsing
|
||||||
|
finishes. At this time, the `string_stream` should contain all of
|
||||||
|
the input file, and this data is transferred to `file_contents`, as
|
||||||
|
we mentioned above.
|
||||||
|
* `get_index` and `get_line_end` will be used for converting
|
||||||
|
`yy::location` instances to offsets within the source code buffer.
|
||||||
|
* `print_location` will be used for printing errors.
|
||||||
|
It will print the lines spanned by the given location, with the
|
||||||
|
location itself colored and underlined if the last argument is `true`.
|
||||||
|
This will make our errors easier on the eyes.
|
||||||
|
|
||||||
|
Let's take a look at their implementations. First, `write`.
|
||||||
|
For the most part, this method is a proxy for the `write`
|
||||||
|
method of our `string_stream`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 9 12 >}}
|
||||||
|
|
||||||
|
We do, however, also keep track of the `file_offset` variable
|
||||||
|
here, which ensures we have up-to-date information
|
||||||
|
regarding our position in the source file. The implementation
|
||||||
|
of `mark_line` uses this information:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 14 16 >}}
|
||||||
|
|
||||||
|
The `finalize` method is trivial, and requires little additional
|
||||||
|
discussion:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 18 20 >}}
|
||||||
|
|
||||||
|
Once we have the line offsets, `get_index` becomes very simple:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 22 25 >}}
|
||||||
|
|
||||||
|
Here, we use an assertion for the first time. Calling
|
||||||
|
`get_index` with a negative or zero line doesn't make
|
||||||
|
any sense, since Bison starts tracking line numbers
|
||||||
|
at 1. Similarly, asking for a line for which we don't
|
||||||
|
have a recorded offset is invalid. Both
|
||||||
|
of these nonsensical calls to `get_index` cannot
|
||||||
|
be caused by the user under normal circumstances,
|
||||||
|
and indicate the method's misuse by the author of
|
||||||
|
the compiler (us!). Thus, we terminate the program.
|
||||||
|
|
||||||
|
Finally, the implementation of `line_end` just finds the
|
||||||
|
beginning of the next line. We stick to the C convention
|
||||||
|
of marking 'end' indices exclusive (pointing just past
|
||||||
|
the end of the array):
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 27 30 >}}
|
||||||
|
|
||||||
|
Since `line_offsets` has as many elements as there are lines,
|
||||||
|
the last line number would be equal to the vector's size.
|
||||||
|
When looking up the end of the last line, we can't look for
|
||||||
|
the beginning of the next line, so instead we return the end of the file.
|
||||||
|
|
||||||
|
Next, the `print_location` method prints three sections
|
||||||
|
of the source file. These are the text "before" the error,
|
||||||
|
the error itself, and, finally, the text "after" the error.
|
||||||
|
For example, if an error began on the fifth column of the third
|
||||||
|
line, and ended on the eighth column of the fourth line, the
|
||||||
|
"before" section would include the first four columns of the third
|
||||||
|
line, and the "after" section would be the ninth column onward
|
||||||
|
on the fourth line. Before and after the error itself,
|
||||||
|
if the `highlight` argument is true,
|
||||||
|
we sprinkle the ANSI escape codes to enable and disable
|
||||||
|
special formatting, respectively. For now, the special
|
||||||
|
formatting involves underlining the text and making it red.
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 32 46 >}}
|
||||||
|
|
||||||
|
Finally, to get the forward declarations for the `yy*` functions
|
||||||
|
and types, we set the `header-file` option in Flex:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/scanner.l" 3 3 >}}
|
||||||
|
|
||||||
|
We also include this `scanner.hpp` file in our `parse_driver.cpp`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.cpp" 2 2 >}}
|
||||||
|
|
||||||
|
#### Adding the Driver to Flex and Bison
|
||||||
|
Bison's C++ language template generates a class called
|
||||||
|
`yy::parser`. We don't really want to modify this class
|
||||||
|
in any way: not only is it generated code, but it's
|
||||||
|
also rather complex. Instead, Bison provides us
|
||||||
|
with a mechanism to pass more data in to the parser.
|
||||||
|
This data is made available to all the actions
|
||||||
|
that the parser runs. Better yet, Bison also attempts
|
||||||
|
to pass this data on to the tokenizer, which in our
|
||||||
|
case would mean that whatever data we provide Bison
|
||||||
|
will also be available to Flex. This is how we'll
|
||||||
|
allow the two components to access our new `parse_driver`
|
||||||
|
class. This is also how we'll pass in the `yyscan_t`
|
||||||
|
that Flex now needs to run its tokenizing code. To
|
||||||
|
do all this, we use Bison's `%param` option. I'm
|
||||||
|
going to include a few more lines from `parser.y`,
|
||||||
|
since they contain the necessary `#include` directives
|
||||||
|
and a required type definition:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parser.y" 1 18 >}}
|
||||||
|
|
||||||
|
The `%param` option effectively adds the parameter listed
|
||||||
|
between the curly braces to the constructor of the generated
|
||||||
|
`yy::parser`. We've already seen this in the implementation
|
||||||
|
of our driver, where we passed `scanner` and `*this` as
|
||||||
|
arguments when creating the parser. The parameters we declare are also passed to the
|
||||||
|
`yylex` function, which is expected to accept them in the same order.
|
||||||
|
|
||||||
|
Since we're adding `parse_driver` as an argument we have to
|
||||||
|
declare it. However, we can't include the `parse_driver` header
|
||||||
|
right away because `parse_driver` itself includes the `parser` header:
|
||||||
|
we'd end up with a circular dependency. Instead, we resort to
|
||||||
|
forward-declaring the driver class, as well as the `yyscan_t`
|
||||||
|
structure containing Flex's state.
|
||||||
|
|
||||||
|
Adding a parameter to Bison doesn't automatically affect
|
||||||
|
Flex. To let Flex know that its `yylex` function must now accept
|
||||||
|
the state and the parsing driver, we have to define the
|
||||||
|
`YY_DECL` macro. We do this in `parse_driver.hpp`, since
|
||||||
|
this forward declaration will be used by both Flex
|
||||||
|
and Bison:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parse_driver.hpp" 56 58 >}}
|
||||||
|
|
||||||
|
#### Improving Exceptions
|
||||||
|
Now, it's time to add location data (and a little bit more) to our
|
||||||
|
exceptions. We want to make it possible for exceptions to include
|
||||||
|
data about where the error occurred, and to print this data to the user.
|
||||||
|
However, it's also possible for us to have exceptions that simply
|
||||||
|
do not have that location data. Furthermore, we want to know
|
||||||
|
whether or not an exception has an associated location; we'd
|
||||||
|
rather not print an invalid or "default" location when an error
|
||||||
|
occurs.
|
||||||
|
|
||||||
|
In the old days of programming, we could represent the absence
|
||||||
|
of location data with a `nullptr`, or `NULL`. But not only
|
||||||
|
does this approach expose us to all kind of `NULl`-safety
|
||||||
|
bugs, but it also requires heap allocation! This doesn't
|
||||||
|
make it sound all that appealing; instead, I think we should
|
||||||
|
opt for using `std::optional`.
|
||||||
|
|
||||||
|
Though `std::optional` is standard (as may be obvious from its
|
||||||
|
namespace), it's a rather recent addition to the C++ STL.
|
||||||
|
In order to gain access to it, we need to ensure that our
|
||||||
|
project is compiled using C++17. To this end, we add
|
||||||
|
the following two lines to our CMakeLists.txt:
|
||||||
|
|
||||||
|
{{< codelines "CMake" "compiler/13/CMakeLists.txt" 5 6 >}}
|
||||||
|
|
||||||
|
Now, let's add a new base class for all of our compiler errors,
|
||||||
|
unsurprisingly called `compiler_error`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.hpp" 10 26 >}}
|
||||||
|
|
||||||
|
We'll put some 'common' exception functionality
|
||||||
|
into the `print_location` and `print_about` methods. If the error
|
||||||
|
has an associated location, the former method will print that
|
||||||
|
location to the screen. We don't always want to highlight
|
||||||
|
the part of the code that caused the error: for instance,
|
||||||
|
an invalid data type definition may span several lines,
|
||||||
|
and coloring that whole section of text red would be
|
||||||
|
too much. To address this, we add the `highlight`
|
||||||
|
boolean argument, which can be used to switch the
|
||||||
|
colors on and off. The `print_about` method
|
||||||
|
will simply print the `what()` message of the exception,
|
||||||
|
in addition to the "specific" error that occurred (stored
|
||||||
|
in `description`). Here are the implementations of the
|
||||||
|
functions:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.cpp" 3 16 >}}
|
||||||
|
|
||||||
|
We will also add a `pretty_print` method to all of
|
||||||
|
our exceptions. This method will handle
|
||||||
|
all the exception-specific printing logic.
|
||||||
|
For the generic compiler error, this means
|
||||||
|
simply printing out the error text and the location:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.cpp" 18 21 >}}
|
||||||
|
|
||||||
|
For `type_error`, this logic slightly changes,
|
||||||
|
enabling colors when printing the location:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.cpp" 27 30 >}}
|
||||||
|
|
||||||
|
Finally, for `unification_error`, we also include
|
||||||
|
the code to print out the two types that our
|
||||||
|
compiler could not unify:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/error.cpp" 32 41 >}}
|
||||||
|
|
||||||
|
There's a subtle change here. Compared to the previous
|
||||||
|
type-printing code (which we had in `main`), what
|
||||||
|
we wrote here deals with "expected" and "actual" types.
|
||||||
|
The `left` type passed to the exception is printed
|
||||||
|
first, and is treat like the "correct" type. The
|
||||||
|
`right` type, on the other hand, is treated
|
||||||
|
like the "wrong" type that should have been
|
||||||
|
unifiable with `left`. This will affect the
|
||||||
|
calling conventions of our unification code.
|
||||||
|
|
||||||
|
Now, we can go through and find all the places where
|
||||||
|
we `throw 0`. One such place was in the data type
|
||||||
|
definition code, where declaring the same type parameter
|
||||||
|
twice is invalid. We replace the `0` with a
|
||||||
|
`compiler_error`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/definition.cpp" 66 69 >}}
|
||||||
|
|
||||||
|
Not all `throw 0` statements should become exceptions.
|
||||||
|
For example, here's code from the previous version of
|
||||||
|
the compiler:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 123 127 >}}
|
||||||
|
|
||||||
|
If a definition `def_defn` has a dependency on a "nearby" (declared
|
||||||
|
in the same group) definition called `dependency`, and if
|
||||||
|
`dependency` does not exist within the same definition group,
|
||||||
|
we throw an exception. But this error is impossible
|
||||||
|
for a user to trigger: the only reason for a variable to appear
|
||||||
|
in the `nearby_variables` vector is that it was previously
|
||||||
|
found in the definition group. Here's the code that proves this
|
||||||
|
(from the current version of the compiler):
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/definition.cpp" 102 106 >}}
|
||||||
|
|
||||||
|
Not being able to find the variable in the definition group
|
||||||
|
is a compiler bug, and should never occur. So, instead
|
||||||
|
of throwing an exception, we'll use an assertion:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/definition.cpp" 128 128 >}}
|
||||||
|
|
||||||
|
For more complicated error messages, we can use a `stringstream`.
|
||||||
|
Here's an example from `parsed_type`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/parsed_type.cpp" 16 23 >}}
|
||||||
|
|
||||||
|
In general, this change is also rather mechanical. Before we
|
||||||
|
move on, to maintain a balance between exceptions and assertions, here
|
||||||
|
are a couple more assertions from `type_env`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type_env.cpp" 81 82 >}}
|
||||||
|
|
||||||
|
Once again, it should not be possible for the compiler
|
||||||
|
to try generalize the type of a variable that doesn't
|
||||||
|
exist, and nor should generalization occur twice.
|
||||||
|
|
||||||
|
While we're on the topic of types, let's talk about
|
||||||
|
`type_mgr::unify`. In practice, I suspect that a lot of
|
||||||
|
errors in our compiler will originate from this method.
|
||||||
|
However, at present, this method does not in any way
|
||||||
|
track the locations of where a unification error occurred.
|
||||||
|
To fix this, we add a new `loc` parameter to `unify`,
|
||||||
|
which we make optional to allow for unification without
|
||||||
|
a known location. Here's the declaration:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type.hpp" 92 92 >}}
|
||||||
|
|
||||||
|
The change to the implementation is mechanical and repetitive,
|
||||||
|
so instead of showing you the whole method, I'll settle for
|
||||||
|
a couple of lines:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type.cpp" 121 122 >}}
|
||||||
|
|
||||||
|
We want to make sure that a location provided to the
|
||||||
|
top-level call to `unify` is also forwarded to the
|
||||||
|
recursive calls, so we have to explicitly add it
|
||||||
|
to the call.
|
||||||
|
|
||||||
|
We'll also have to update the 'main' code to call the
|
||||||
|
`pretty_print` methods, but there's another big change
|
||||||
|
that we're going to make before then. However, once that
|
||||||
|
change is made, our errors will look a lot better.
|
||||||
|
Here is what's printed out to the user when a type error
|
||||||
|
occurs:
|
||||||
|
|
||||||
|
```
|
||||||
|
an error occured while checking the types of the program: failed to unify types
|
||||||
|
occuring on line 2:
|
||||||
|
3 + False
|
||||||
|
the expected type was:
|
||||||
|
Int
|
||||||
|
while the actual type was:
|
||||||
|
Bool
|
||||||
|
```
|
||||||
|
|
||||||
|
Here's an error that was previously a `throw 0` statement in our code:
|
||||||
|
|
||||||
|
```
|
||||||
|
an error occured while compiling the program: type variable a used twice in data type definition.
|
||||||
|
occuring on line 1:
|
||||||
|
data Pair a a = { MkPair a a }
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, not only have we eliminated the lazy uses of `throw 0` in our
|
||||||
|
code, but we've also improved the presentation of the errors
|
||||||
|
to the user!
|
||||||
|
|
||||||
|
### Rethinking Name Mangling
|
||||||
|
In the previous post, I said the following:
|
||||||
|
|
||||||
|
> One more thing. Let’s adopt the convention of storing mangled names into the compilation environment. This way, rather than looking up mangled names only for global functions, which would be a ‘gotcha’ for anyone working on the compiler, we will always use the mangled names during compilation.
|
||||||
|
|
||||||
|
Now that I've had some more time to think about it
|
||||||
|
(and now that I've returned to the compiler after
|
||||||
|
a brief hiatus), I think that this was not the right call.
|
||||||
|
Mangled names make sense when translating to LLVM; we certainly
|
||||||
|
don't want to declare two LLVM functions
|
||||||
|
{{< sidenote "right" "mangling-note" "with the same name." >}}
|
||||||
|
By the way, LLVM has its own name mangling functionality. If you
|
||||||
|
declare two functions with the same name, they'll appear as
|
||||||
|
<code>function</code> and <code>function.0</code>. Since LLVM
|
||||||
|
uses the <code>Function*</code> C++ values to refer to functions,
|
||||||
|
as long as we keep them seaprate on <em>our</em> end, things will
|
||||||
|
work.<br>
|
||||||
|
<br>
|
||||||
|
However, in our compiler, name mangling occurs before LLVM is
|
||||||
|
introduced, at translation time. We could create LLVM functions
|
||||||
|
at that time, too, and associate them with variables. But then,
|
||||||
|
our G-machine instructions will be coupled to LLVM, which
|
||||||
|
would not be as clean.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
But things are different for local variables. Our local variables
|
||||||
|
are graphs on a stack, and are not actually compiled to LLVM
|
||||||
|
definitions. It doesn't make sense to mangle their names, since
|
||||||
|
their names aren't present anywhere in the final executable.
|
||||||
|
It's not even "consistent" to mangle them, since global definitions
|
||||||
|
are compiled directly to __PushGlobal__ instructions, while local
|
||||||
|
variables are only referenced through the current `env`.
|
||||||
|
So, I opted to reverse my decision. We will go back to
|
||||||
|
placing variable names directly into `env_var`. Here's
|
||||||
|
an example of this from `global_scope.cpp`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/global_scope.cpp" 6 8 >}}
|
||||||
|
|
||||||
|
Now that we've started using assertions, I also think it's worth
|
||||||
|
to put our new invariant -- "only global definitions have mangled
|
||||||
|
names" -- into code:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type_env.cpp" 36 45 >}}
|
||||||
|
|
||||||
|
Furthermore, we'll _require_ that a global definition
|
||||||
|
has a mangled name. This way, we can be more confident
|
||||||
|
that a variable from a __PushGlobal__ instruction
|
||||||
|
is referencing the right function. To achieve
|
||||||
|
this, we change `get_mangled_name` to stop
|
||||||
|
returning the input string if a mangled name was not
|
||||||
|
found; doing so makes it impossible to check if a mangled
|
||||||
|
name was explicitly defined. Instead,
|
||||||
|
we add two assertions. First, if an environment scope doesn't
|
||||||
|
contain a variable, then it _must_ have a parent.
|
||||||
|
If it does contain variable, that variable _must_ have
|
||||||
|
a mangled name. We end up with the following:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type_env.cpp" 47 55 >}}
|
||||||
|
|
||||||
|
For this to work, we make one more change. Now that we've
|
||||||
|
enabled C++17, we have access to `std::optional`. We
|
||||||
|
can thus represent the presence or absence of mangled
|
||||||
|
names using an optional field, rather than with the empty string `""`.
|
||||||
|
I hear that C++ compilers have pretty good
|
||||||
|
[empty string optimizations](https://www.youtube.com/watch?v=kPR8h4-qZdk),
|
||||||
|
but nonetheless, I think it makes more sense semantically
|
||||||
|
to use "absent" (`nullopt`) instead of "empty" (`""`).
|
||||||
|
Here's the definition of `type_env::variable_data` now:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type_env.hpp" 16 25 >}}
|
||||||
|
|
||||||
|
Since looking up a mangled name for non-global variable
|
||||||
|
{{< sidenote "right" "unrepresentable-note" "will now result in an assertion failure," >}}
|
||||||
|
A very wise human at the very dawn of our species once said,
|
||||||
|
"make illegal states unrepresentable". Their friends and family were a little
|
||||||
|
busy making a fire, and didn't really understand what the heck they meant. Now,
|
||||||
|
we kind of do.<br>
|
||||||
|
<br>
|
||||||
|
It's <em>possible</em> for our <code>type_env</code> to include a
|
||||||
|
<code>variable_data</code> entry that is both global and has no mangled
|
||||||
|
name. But it doesn't have to be this way. We could define two subclasses
|
||||||
|
of <code>variable_data</code>, one global and one local,
|
||||||
|
where only the global one has a <code>mangled_name</code>
|
||||||
|
field. It would be impossible to reach this assertion failure then.
|
||||||
|
{{< /sidenote >}} we have to change
|
||||||
|
`ast_lid::compile` to only call `get_mangled_name` once
|
||||||
|
it ensures that the variable being compiled is, in fact,
|
||||||
|
global:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/ast.cpp" 58 63 >}}
|
||||||
|
|
||||||
|
Since all global functions now need to have mangled
|
||||||
|
names, we run into a bit of a problem. What are
|
||||||
|
the mangled names of `(+)`, `(-)`, and so on? We could
|
||||||
|
continue to hardcode them as `plus`, `minus`, etc., but this can
|
||||||
|
(and currently does!) lead to errors. Consider the following
|
||||||
|
piece of code:
|
||||||
|
|
||||||
|
```
|
||||||
|
defn plus x y = { x + y }
|
||||||
|
defn main = { plus 320 6 }
|
||||||
|
```
|
||||||
|
|
||||||
|
We've hardcoded the mangled name of `(+)` to be `plus`. However,
|
||||||
|
`global_scope` doesn't know about this, so when the actual
|
||||||
|
`plus` function gets translated, it also gets assigned the
|
||||||
|
mangled name `plus`. The name is also overwritten in the
|
||||||
|
`llvm_context`, which effectively means that `(+)` is
|
||||||
|
now compiled to a call of the user-defined `plus` function.
|
||||||
|
If we didn't overwrite the name, we would've run into an assertion
|
||||||
|
failure in this scenario anyway. In short, this example illustrates
|
||||||
|
an important point: mangling information needs to be available
|
||||||
|
outside of a `global_scope`. We don't want to do this by having
|
||||||
|
every function take in a `global_scope` to access the mangling
|
||||||
|
information; instead, we'll store the mangling information in
|
||||||
|
a new `mangler` class, which `global_scope` will take as an argument.
|
||||||
|
The new class is very simple:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/mangler.hpp" 5 11 >}}
|
||||||
|
|
||||||
|
As with `parse_driver`, `global_scope` takes `mangler` by reference
|
||||||
|
and stores a pointer:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/global_scope.hpp" 50 50 >}}
|
||||||
|
|
||||||
|
The implementation of `new_mangled_name` doesn't change, so I'm
|
||||||
|
not going to show it here. With this new mangling information
|
||||||
|
in hand, we can now correctly set the mangled names of binary
|
||||||
|
operators:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/compiler.cpp" 22 27 >}}
|
||||||
|
|
||||||
|
Wait a moment, what's a `compiler`? Let's talk about that next.
|
||||||
|
|
||||||
|
### A Top-Level Class
|
||||||
|
Now that we've moved name mangling out of `global_scope`, we have
|
||||||
|
to put it somewhere. The same goes for global definition group
|
||||||
|
and the file manager that are given to `parse_driver`. The two
|
||||||
|
classes _make use_ of the other data, but they don't _own it_.
|
||||||
|
That's why they take it by reference, and store it as a pointer.
|
||||||
|
They're just temporarily allowed access.
|
||||||
|
|
||||||
|
So, what should be the owner of all of these disparate components?
|
||||||
|
Thus far, that has been the `main` function, or the utility
|
||||||
|
functions that it calls out to. However, this is sloppy:
|
||||||
|
we have related data and operations on it, but we don't group
|
||||||
|
them into an object. We can group all of the components of our
|
||||||
|
compiler into a `compiler` object, and leave `main.cpp` with
|
||||||
|
exception printing code.
|
||||||
|
|
||||||
|
The definition of the `compiler` class begins with all of the data
|
||||||
|
structures that we use in the process of compilation:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/compiler.hpp" 12 20 >}}
|
||||||
|
|
||||||
|
There's a loose ordering to these fields. In C++, class members are
|
||||||
|
initialized in the order they are declared; we therefore want to make
|
||||||
|
sure that fields that are depended on by other fields are initialized first.
|
||||||
|
Otherwise, I tried to keep the order consistent with the conceptual path
|
||||||
|
of the code through the compiler.
|
||||||
|
* Parsing happens first, so we begin with `parse_driver`, which needs a
|
||||||
|
`file_manager` (to populate with line information) and a `definition_group`
|
||||||
|
(to receive the global definitions from the parser).
|
||||||
|
* We then proceed to typechecking, for which we use a global `type_env_ptr`
|
||||||
|
(to define the built-in functions and constructors) and a `type_mgr` (to
|
||||||
|
manage the assignments of type variables).
|
||||||
|
* Once a program is typechecked, we transform it, eliminating local
|
||||||
|
function definitions and lambda functions. This is done by storing
|
||||||
|
newly-emitted global functions into the `global_scope`, which requires a
|
||||||
|
`mangler` to generate new names for the target functions.
|
||||||
|
* Finally, to generate LLVM IR, we need our `llvm_context` class.
|
||||||
|
|
||||||
|
The methods of the compiler are arranged similarly:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/compiler.hpp" 22 31 >}}
|
||||||
|
|
||||||
|
The methods go as follows:
|
||||||
|
|
||||||
|
* `add_default_types` adds the built-in types to the `global_env`.
|
||||||
|
At this point, these types only include `Int`.
|
||||||
|
* `add_binop_type` adds a single binary operator to the global
|
||||||
|
type environment. We saw its implementation earlier: it deals
|
||||||
|
with both binding a type, and setting a mangled name.
|
||||||
|
* `add_default_types` adds the types for each binary operator.
|
||||||
|
* `parse`, `typecheck`, `translate` and `compile` all do exactly
|
||||||
|
what they say. In this case, compilation refers to creating G-machine
|
||||||
|
instructions.
|
||||||
|
* `create_llvm_binop` creates an internal function that forces the
|
||||||
|
evaluation of its two arguments, and actually applies the given binary
|
||||||
|
operator. Recall that the `(+)` in user code constructs a call to this
|
||||||
|
function, but leaves it unevaluated until it's needed.
|
||||||
|
* `generate_llvm` converts all the definitions in `global_scope`, which
|
||||||
|
are at this point compiled into G-machine `instruction`s, into LLVM IR.
|
||||||
|
* `output_llvm` contains all the code to actually generate an object
|
||||||
|
file from the LLVM IR.
|
||||||
|
|
||||||
|
These functions are mostly taken from part 12's `main.cpp`, and adjusted
|
||||||
|
to use the `compiler`'s members rather than local definitions or arguments.
|
||||||
|
You should compare part 12's
|
||||||
|
[`main.cpp`](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/12/main.cpp)
|
||||||
|
file with the
|
||||||
|
[`compiler.cpp`](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/13/compiler.cpp)
|
||||||
|
file that we end up with at the end of this post.
|
||||||
|
|
||||||
|
Next, we have the compiler's constructor, and its `operator()`. The
|
||||||
|
latter, analogously to our parsing driver, will trigger the compilation
|
||||||
|
process. Their implementations are straightforward:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/compiler.cpp" 131 145 >}}
|
||||||
|
|
||||||
|
We also add a couple of methods to give external code access to
|
||||||
|
some of the compiler's data structures. I omit their (trivial)
|
||||||
|
implementations, but they have the following signatures:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/compiler.hpp" 35 36 >}}
|
||||||
|
|
||||||
|
With all the compilation code tucked into our new `compiler` class,
|
||||||
|
`main` becomes very simple. We also finally get to use our exception
|
||||||
|
pretty printing code:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/main.cpp" 11 27 >}}
|
||||||
|
|
||||||
|
With this, we complete our transition to a compiler object.
|
||||||
|
All that's left is to clean up the code style.
|
||||||
|
|
||||||
|
### Keeping Things Private
|
||||||
|
Hand-writing or generating hundreds of trivial getters and setters
|
||||||
|
for the fields of a data class (which is standard in the world of Java) seems
|
||||||
|
absurd to me. So, for most of this project, I stuck with
|
||||||
|
`struct`s, rather than classes. But this is not a good policy
|
||||||
|
to apply _everywhere_. I still think it makes sense to make
|
||||||
|
data structures like `ast` and `type` public-by-default;
|
||||||
|
however, I _don't_ think that way about classes like `type_mgr`,
|
||||||
|
`llvm_context`, `type_env`, and `env`. All of these have information
|
||||||
|
that we should never be accessing directly. Some guard this
|
||||||
|
information with assertions. In short, it should be protected.
|
||||||
|
|
||||||
|
For most classes, the changes are mechanical. For instance, we
|
||||||
|
can make `type_env` a class simply by changing its declaration,
|
||||||
|
and marking all of its functions public. This requires a slight
|
||||||
|
refactoring of a line that used its `parent` field. Here's
|
||||||
|
what it used to be (in context):
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/main.cpp" 57 60 >}}
|
||||||
|
|
||||||
|
And here's what it is now:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/compiler.cpp" 55 58 >}}
|
||||||
|
|
||||||
|
Rather than traversing the chain of environments from
|
||||||
|
the body of the definition, we just use the definition's
|
||||||
|
own `env_ptr`. This is cleaner and more explicit, and
|
||||||
|
it helps us not use the private members of `type_env`!
|
||||||
|
|
||||||
|
The deal with `env` is about as simple. We just make
|
||||||
|
it and its two descendants classes, and mark their
|
||||||
|
methods and constructors public. The same
|
||||||
|
goes for `global_scope`. To make `type_mgr`
|
||||||
|
a class, we have to add a new method: `lookup`.
|
||||||
|
Here's its implementation:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type.cpp" 81 85 >}}
|
||||||
|
|
||||||
|
It's used in `type_var::print` as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/type.cpp" 28 35 >}}
|
||||||
|
|
||||||
|
We can't use `resolve` here because it takes (and returns)
|
||||||
|
a `type_ptr`. If we make it _take_ a `type*`, it won't
|
||||||
|
be able to return its argument if it's already resolved. If we
|
||||||
|
allow it to _return_ `type*`, we won't have an owning
|
||||||
|
reference. We also don't want to duplicate the
|
||||||
|
method just for this one call. Notice, though, how similar
|
||||||
|
`type_var::print`/`lookup` and `resolve` are in terms of execution.
|
||||||
|
|
||||||
|
The change for `llvm_context` requires a little more work.
|
||||||
|
Right now, `ctx.builder` is used a _lot_ in `instruction.cpp`.
|
||||||
|
Since we don't want to forward each of the LLVM builder methods,
|
||||||
|
and since it feels weird to make `llvm_context` extend `llvm::IRBuilder`,
|
||||||
|
we'll just provide a getter for the `builder` field. The
|
||||||
|
same goes for `module`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/llvm_context.hpp" 46 47 >}}
|
||||||
|
|
||||||
|
Here's what some of the code from `instruction.cpp` looks like now:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/instruction.cpp" 144 145 >}}
|
||||||
|
|
||||||
|
Right now, the `ctx` field of the `llvm_context` (which contains
|
||||||
|
the `llvm::LLVMContext`) is only externally used to create
|
||||||
|
instances of `llvm::BasicBlock`. We'll add a proxy method
|
||||||
|
for this functionality:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/llvm_context.cpp" 174 176 >}}
|
||||||
|
|
||||||
|
Finally, `instruction_pushglobal` needs to access the
|
||||||
|
`llvm::Function` instances that we create in the process
|
||||||
|
of compilation. We add a new `get_custom_function` method
|
||||||
|
to support this, which automatically prefixes the function
|
||||||
|
name with `f_`, much like `create_custom_function`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/13/llvm_context.cpp" 292 294 >}}
|
||||||
|
|
||||||
|
I think that's enough. If we chose to turn more compiler
|
||||||
|
data structures into classes, I think we would've quickly drowned
|
||||||
|
in one-line getter and setter methods.
|
||||||
|
|
||||||
|
That's all for the cleanup! We've added locations and more errors
|
||||||
|
to the compiler, stopped throwing `0` in favor of proper exceptions
|
||||||
|
or assertions, made name mangling more reasonable, fixed a bug with
|
||||||
|
accidentally shadowing default functions, organized our compilation
|
||||||
|
process into a `compiler` class, and made more things into classes.
|
||||||
|
In the next post, I hope to tackle __strings__ and __Input/Output__.
|
||||||
|
I also think that implementing __modules__ would be a good idea,
|
||||||
|
though at the moment I don't know too much on the subject. I hope
|
||||||
|
you'll join me in my future writing!
|
||||||
|
|
||||||
|
### Appendix: Optimization
|
||||||
|
When I started working on the compiler after the previous post,
|
||||||
|
I went a little overboard. I started working on optimizing the generated programs,
|
||||||
|
but eventually decided I wasn't doing a
|
||||||
|
{{< sidenote "right" "good-note" "good enough" >}}
|
||||||
|
I think authors should feel a certain degree of responsibility
|
||||||
|
for the content they create. If I do something badly, somebody
|
||||||
|
else trusts me and learns from it, who knows how much damage I've done.
|
||||||
|
I try not to do damage.<br>
|
||||||
|
<br>
|
||||||
|
If anyone reads what I write, anyway!
|
||||||
|
{{< /sidenote >}} job to present it to others,
|
||||||
|
and scrapped that part of the compiler altogether. I'm not
|
||||||
|
sure if I will try again in the near future. But,
|
||||||
|
if you're curious about optimization, here are a few avenues
|
||||||
|
I've explored or thought about:
|
||||||
|
|
||||||
|
* __Unboxing numbers__. Right now, numbers are allocated and garbage
|
||||||
|
collected just like the rest of the graph nodes. This is far from ideal.
|
||||||
|
We could use pointers to represent numbers, by tagging their most significant
|
||||||
|
bits on 64-bit CPUs. Rather than allocating a node, the runtime will just
|
||||||
|
cast a number to a pointer, tag it, and push it on the stack.
|
||||||
|
* __Converting enumeration data types to numbers__. If no constructor
|
||||||
|
of a data type takes any arguments, then the tag uniquely identifies
|
||||||
|
each constructor. Combined with unboxed numbers, this can save unnecessary
|
||||||
|
allocations and memory accesses.
|
||||||
|
* __Special treatment for global constants__. It makes sense for
|
||||||
|
global functions to be converted into LLVM functions, but the
|
||||||
|
same is not the case for
|
||||||
|
{{< sidenote "right" "constant-note" "constants." >}}
|
||||||
|
Yeah, yeah, a constant is just a nullary function. Get
|
||||||
|
out of here with your pedantry!
|
||||||
|
{{< /sidenote >}} We can find a way to
|
||||||
|
initialize global constants once, which would save some work. To
|
||||||
|
make more constants suitable for this, we could employ
|
||||||
|
[monomorphism restriction](https://wiki.haskell.org/Monomorphism_restriction).
|
||||||
|
* __Optimizing stack operations.__ If you read through the LLVM IR
|
||||||
|
we produce, you can see a lot of code that peeks at something twice,
|
||||||
|
or pops-then-pushes the same value, or does other absurd things. LLVM
|
||||||
|
isn't aware of the semantics of our stacks, but perhaps we could write an
|
||||||
|
optimization pass to deal with some of the more blatant instances of
|
||||||
|
this issue.
|
||||||
|
|
||||||
|
If you attempt any of these, let me know how it goes, please!
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
title: "How Many Values Does a Boolean Have?"
|
title: "How Many Values Does a Boolean Have?"
|
||||||
date: 2020-08-21T23:05:55-07:00
|
date: 2020-08-21T23:05:55-07:00
|
||||||
tags: ["Java", "Haskell", "C and C++"]
|
tags: ["Java", "Haskell", "C and C++"]
|
||||||
|
favorite: true
|
||||||
---
|
---
|
||||||
|
|
||||||
A friend of mine recently had an interview for a software
|
A friend of mine recently had an interview for a software
|
||||||
|
|||||||
BIN
content/blog/codelines/example.png
Normal file
BIN
content/blog/codelines/example.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 42 KiB |
268
content/blog/codelines/index.md
Normal file
268
content/blog/codelines/index.md
Normal file
@@ -0,0 +1,268 @@
|
|||||||
|
---
|
||||||
|
title: "Pleasant Code Includes with Hugo"
|
||||||
|
date: 2021-01-13T21:31:29-08:00
|
||||||
|
tags: ["Hugo"]
|
||||||
|
---
|
||||||
|
|
||||||
|
Ever since I started [the compiler series]({{< relref "00_compiler_intro.md" >}}),
|
||||||
|
I began to include more and more fragments of code into my blog.
|
||||||
|
I didn't want to be copy-pasting my code between my project
|
||||||
|
and my Markdown files, so I quickly wrote up a Hugo [shortcode](https://gohugo.io/content-management/shortcodes/)
|
||||||
|
to pull in other files in the local directory. I've since improved on this
|
||||||
|
some more, so I thought I'd share what I created with others.
|
||||||
|
|
||||||
|
### Including Entire Files and Lines
|
||||||
|
My needs for snippets were modest at first. For the most part,
|
||||||
|
I had a single code file that I wanted to present, so it was
|
||||||
|
acceptable to plop it in the middle of my post in one piece.
|
||||||
|
The shortcode for that was quite simple:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{ highlight (readFile (printf "code/%s" (.Get 1))) (.Get 0) "" }}
|
||||||
|
```
|
||||||
|
|
||||||
|
This leverages Hugo's built-in [`highlight`](https://gohugo.io/functions/highlight/)
|
||||||
|
function to provide syntax highlighting to the included snippet. Hugo
|
||||||
|
doesn't guess at the language of the code, so you have to manually provide
|
||||||
|
it. Calling this shortcode looks as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{</* codeblock "C++" "compiler/03/type.hpp" */>}}
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that this implicitly adds the `code/` prefix to all
|
||||||
|
the files I include. This is a personal convention: I want
|
||||||
|
all my code to be inside a dedicated directory.
|
||||||
|
|
||||||
|
Of course, including entire files only takes you so far.
|
||||||
|
What if you only need to discuss a small part of your code?
|
||||||
|
Alternaitvely, what if you want to present code piece-by-piece,
|
||||||
|
in the style of literate programming? I quickly ran into the
|
||||||
|
need to do this, for which I wrote another shortcode:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{ $s := (readFile (printf "code/%s" (.Get 1))) }}
|
||||||
|
{{ $t := split $s "\n" }}
|
||||||
|
{{ if not (eq (int (.Get 2)) 1) }}
|
||||||
|
{{ .Scratch.Set "u" (after (sub (int (.Get 2)) 1) $t) }}
|
||||||
|
{{ else }}
|
||||||
|
{{ .Scratch.Set "u" $t }}
|
||||||
|
{{ end }}
|
||||||
|
{{ $v := first (add (sub (int (.Get 3)) (int (.Get 2))) 1) (.Scratch.Get "u") }}
|
||||||
|
{{ if (.Get 4) }}
|
||||||
|
{{ .Scratch.Set "opts" (printf ",%s" (.Get 4)) }}
|
||||||
|
{{ else }}
|
||||||
|
{{ .Scratch.Set "opts" "" }}
|
||||||
|
{{ end }}
|
||||||
|
{{ highlight (delimit $v "\n") (.Get 0) (printf "linenos=table,linenostart=%d%s" (.Get 2) (.Scratch.Get "opts")) }}
|
||||||
|
```
|
||||||
|
|
||||||
|
This shortcode takes a language and a filename as before, but it also takes
|
||||||
|
the numbers of the first and last lines indicating the part of the code that should be included. After
|
||||||
|
splitting the contents of the file into lines, it throws away all lines before and
|
||||||
|
after the window of code that you want to include. It seems to me (from my commit history)
|
||||||
|
that Hugo's [`after`](https://gohugo.io/functions/after/) function (which should behave
|
||||||
|
similarly to Haskell's `drop`) doesn't like to be given an argument of `0`.
|
||||||
|
I had to add a special case for when this would occur, where I simply do not invoke `after` at all.
|
||||||
|
The shortcode can be used as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{</* codelines "C++" "compiler/04/ast.cpp" 19 22 */>}}
|
||||||
|
```
|
||||||
|
|
||||||
|
To support a fuller range of Hugo's functionality, I also added an optional argument that
|
||||||
|
accepts Hugo's Chroma settings. This way, I can do things like highlight certain
|
||||||
|
lines in my code snippet, which is done as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{</* codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 31 39 "hl_lines=7 8 9" */>}}
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the `hl_lines` field doesn't seem to work properly with `linenostart`, which means
|
||||||
|
that the highlighted lines are counted from 1 no matter what. This is why in the above snippet,
|
||||||
|
although I include lines 31 through 39, I feed lines 7, 8, and 9 to `hl_lines`. It's unusual,
|
||||||
|
but hey, it works!
|
||||||
|
|
||||||
|
### Linking to Referenced Code
|
||||||
|
Some time after implementing my initial system for including lines of code,
|
||||||
|
I got an email from a reader who pointed out that it was hard for them to find
|
||||||
|
the exact file I was referencing, and to view the surrounding context of the
|
||||||
|
presented lines. To address this, I decided that I'd include the link
|
||||||
|
to the file in question. After all, my website and all the associated
|
||||||
|
code is on a [Git server I host](https://dev.danilafe.com/Web-Projects/blog-static),
|
||||||
|
so any local file I'm referencing should -- assuming it was properly committed --
|
||||||
|
show up there, too. I hardcoded the URL of the `code` directory on the web interface,
|
||||||
|
and appended the relative path of each included file to it. The shortcode came out as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{ $s := (readFile (printf "code/%s" (.Get 1))) }}
|
||||||
|
{{ $t := split $s "\n" }}
|
||||||
|
{{ if not (eq (int (.Get 2)) 1) }}
|
||||||
|
{{ .Scratch.Set "u" (after (sub (int (.Get 2)) 1) $t) }}
|
||||||
|
{{ else }}
|
||||||
|
{{ .Scratch.Set "u" $t }}
|
||||||
|
{{ end }}
|
||||||
|
{{ $v := first (add (sub (int (.Get 3)) (int (.Get 2))) 1) (.Scratch.Get "u") }}
|
||||||
|
{{ if (.Get 4) }}
|
||||||
|
{{ .Scratch.Set "opts" (printf ",%s" (.Get 4)) }}
|
||||||
|
{{ else }}
|
||||||
|
{{ .Scratch.Set "opts" "" }}
|
||||||
|
{{ end }}
|
||||||
|
<div class="highlight-group">
|
||||||
|
<div class="highlight-label">From <a href="https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/{{ .Get 1 }}">{{ path.Base (.Get 1) }}</a>,
|
||||||
|
{{ if eq (.Get 2) (.Get 3) }}line {{ .Get 2 }}{{ else }} lines {{ .Get 2 }} through {{ .Get 3 }}{{ end }}</div>
|
||||||
|
{{ highlight (delimit $v "\n") (.Get 0) (printf "linenos=table,linenostart=%d%s" (.Get 2) (.Scratch.Get "opts")) }}
|
||||||
|
</div>
|
||||||
|
```
|
||||||
|
|
||||||
|
This results in code blocks like the one in the image below. The image
|
||||||
|
is the result of the `codelines` call for the Idris language, presented above.
|
||||||
|
|
||||||
|
{{< figure src="example.png" caption="An example of how the code looks." class="medium" >}}
|
||||||
|
|
||||||
|
I got a lot of mileage out of this setup . . . until I wanted to include code from _other_ git repositories.
|
||||||
|
For instance, I wanted to talk about my [Advent of Code](https://adventofcode.com/) submissions,
|
||||||
|
without having to copy-paste the code into my blog repository!
|
||||||
|
|
||||||
|
### Code from Submodules
|
||||||
|
My first thought when including code from other repositories was to use submodules.
|
||||||
|
This has the added advantage of "pinning" the version of the code I'm talking about,
|
||||||
|
which means that even if I push significant changes to the other repository, the code
|
||||||
|
in my blog will remain the same. This, in turn, means that all of my `codelines`
|
||||||
|
shortcodes will work as intended.
|
||||||
|
|
||||||
|
The problem is, most Git web interfaces (my own included) don't display paths corresponding
|
||||||
|
to submodules. Thus, even if all my code is checked out and Hugo correctly
|
||||||
|
pulls the selected lines into its HTML output, the _links to the file_ remain
|
||||||
|
broken!
|
||||||
|
|
||||||
|
There's no easy way to address this, particularly because _different submodules
|
||||||
|
can be located on different hosts_! The Git URL used for a submodule is
|
||||||
|
not known to Hugo (since, to the best of my knowledge, it can't run
|
||||||
|
shell commands), and it could reside on `dev.danilafe.com`, or `github.com`,
|
||||||
|
or elsewhere. Fortunately, it's fairly easy to tell when a file is part
|
||||||
|
of a submodule, and which submodule that is. It's sufficient to find
|
||||||
|
the longest submodule path that matches the selected file. If no
|
||||||
|
submodule path matches, then the file is part of the blog repository,
|
||||||
|
and no special action is needed.
|
||||||
|
|
||||||
|
Of course, this means that Hugo needs to be made aware of the various
|
||||||
|
submodules in my repository. It also needs to be aware of the submodules
|
||||||
|
_inside_ those submodules, and so on: it needs to be recursive. Git
|
||||||
|
has a command to list all submodules recursively:
|
||||||
|
|
||||||
|
```Bash
|
||||||
|
git submodule status --recursive
|
||||||
|
```
|
||||||
|
|
||||||
|
However, this only prints the commit, submodule path, and the upstream branch.
|
||||||
|
I don't think there's a way to list the remotes' URLs with this command; however,
|
||||||
|
we do _need_ the URLs, since that's how we create links to the Git web interfaces.
|
||||||
|
|
||||||
|
There's another issue: how do we let Hugo know about the various submodules,
|
||||||
|
even if we can find them? Hugo can read files, but doing any serious
|
||||||
|
text processing is downright impractical. However, Hugo
|
||||||
|
itself is not able to run commands, so it needs to be able to read in
|
||||||
|
the output of another command that _can_ find submodules.
|
||||||
|
|
||||||
|
I settled on using Hugo's `params` configuration option. This
|
||||||
|
allows users to communicate arbitrary properties to Hugo themes
|
||||||
|
and templates. In my case, I want to communicate a collection
|
||||||
|
of submodules. I didn't know about TOML's inline tables, so
|
||||||
|
I decided to represent this collection as a map of (meaningless)
|
||||||
|
submodule names to tables:
|
||||||
|
|
||||||
|
```TOML
|
||||||
|
[params]
|
||||||
|
[params.submoduleLinks]
|
||||||
|
[params.submoduleLinks.aoc2020]
|
||||||
|
url = "https://dev.danilafe.com/Advent-of-Code/AdventOfCode-2020/src/commit/7a8503c3fe1aa7e624e4d8672aa9b56d24b4ba82"
|
||||||
|
path = "aoc-2020"
|
||||||
|
```
|
||||||
|
|
||||||
|
Since it was seemingly impossible to wrangle Git into outputting
|
||||||
|
all of this information using one command, I decided
|
||||||
|
to write a quick Ruby script to generate a list of submodules
|
||||||
|
as follows. I had to use `cd` in one of my calls to Git
|
||||||
|
because Git's `--git-dir` option doesn't seem to work
|
||||||
|
with submodules, treating them like a "bare" checkout.
|
||||||
|
I also chose to use an allowlist of remote URLs,
|
||||||
|
since the URL format for linking to files in a
|
||||||
|
particular repository differs from service to service.
|
||||||
|
For now, I only use my own Git server, so only `dev.danilafe.com`
|
||||||
|
is allowed; however, just by adding `elsif`s to my code,
|
||||||
|
I can add other services in the future.
|
||||||
|
|
||||||
|
```Ruby
|
||||||
|
puts "[params]"
|
||||||
|
puts " [params.submoduleLinks]"
|
||||||
|
|
||||||
|
def each_submodule(base_path)
|
||||||
|
`cd #{base_path} && git submodule status`.lines do |line|
|
||||||
|
hash, path = line[1..].split " "
|
||||||
|
full_path = "#{base_path}/#{path}"
|
||||||
|
url = `git config --file #{base_path}/.gitmodules --get 'submodule.#{path}.url'`.chomp.delete_suffix(".git")
|
||||||
|
safe_name = full_path.gsub(/\/|-|_\./, "")
|
||||||
|
|
||||||
|
if url =~ /dev.danilafe.com/
|
||||||
|
file_url = "#{url}/src/commit/#{hash}"
|
||||||
|
else
|
||||||
|
raise "Submodule URL #{url.dump} not in a known format!"
|
||||||
|
end
|
||||||
|
|
||||||
|
yield ({ :path => full_path, :url => file_url, :name => safe_name })
|
||||||
|
each_submodule(full_path) { |m| yield m }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
each_submodule(".") do |m|
|
||||||
|
next unless m[:path].start_with? "./code/"
|
||||||
|
puts " [params.submoduleLinks.#{m[:name].delete_prefix(".code")}]"
|
||||||
|
puts " url = #{m[:url].dump}"
|
||||||
|
puts " path = #{m[:path].delete_prefix("./code/").dump}"
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
I pipe the output of this script into a separate configuration file
|
||||||
|
called `config-gen.toml`, and then run Hugo as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
hugo --config config.toml,config-gen.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, I had to modify my shortcode to find and handle the longest submodule prefix.
|
||||||
|
Here's the relevant portion, and you can
|
||||||
|
[view the entire file here](https://dev.danilafe.com/Web-Projects/blog-static/src/commit/bfeae89ab52d1696c4a56768b7f0c6682efaff82/themes/vanilla/layouts/shortcodes/codelines.html).
|
||||||
|
|
||||||
|
```
|
||||||
|
{{ .Scratch.Set "bestLength" -1 }}
|
||||||
|
{{ .Scratch.Set "bestUrl" (printf "https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/%s" (.Get 1)) }}
|
||||||
|
{{ $filePath := (.Get 1) }}
|
||||||
|
{{ $scratch := .Scratch }}
|
||||||
|
{{ range $module, $props := .Site.Params.submoduleLinks }}
|
||||||
|
{{ $path := index $props "path" }}
|
||||||
|
{{ $bestLength := $scratch.Get "bestLength" }}
|
||||||
|
{{ if and (le $bestLength (len $path)) (hasPrefix $filePath $path) }}
|
||||||
|
{{ $scratch.Set "bestLength" (len $path) }}
|
||||||
|
{{ $scratch.Set "bestUrl" (printf "%s%s" (index $props "url") (strings.TrimPrefix $path $filePath)) }}
|
||||||
|
{{ end }}
|
||||||
|
{{ end }}
|
||||||
|
```
|
||||||
|
|
||||||
|
And that's what I'm using at the time of writing!
|
||||||
|
|
||||||
|
### Conclusion
|
||||||
|
My current system for code includes allows me to do the following
|
||||||
|
things:
|
||||||
|
|
||||||
|
* Include entire files or sections of files into the page. This
|
||||||
|
saves me from having to copy and paste code manually, which
|
||||||
|
is error prone and can cause inconsistencies.
|
||||||
|
* Provide links to the files I reference on my Git interface.
|
||||||
|
This allows users to easily view the entire file that I'm talking about.
|
||||||
|
* Correctly link to files in repositories other than my blog
|
||||||
|
repository, when they are included using submodules. This means
|
||||||
|
I don't need to manually copy and update code from other projects.
|
||||||
|
|
||||||
|
I hope some of these shortcodes and script come in handy for someone else.
|
||||||
|
Thank you for reading!
|
||||||
BIN
content/blog/haskell_lazy_evaluation/lazy-fix.zip
Normal file
BIN
content/blog/haskell_lazy_evaluation/lazy-fix.zip
Normal file
Binary file not shown.
BIN
content/blog/haskell_lazy_evaluation/lazy.zip
Normal file
BIN
content/blog/haskell_lazy_evaluation/lazy.zip
Normal file
Binary file not shown.
79
content/blog/hugo_functions.md
Normal file
79
content/blog/hugo_functions.md
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
---
|
||||||
|
title: "Approximating Custom Functions in Hugo"
|
||||||
|
date: 2021-01-17T18:44:53-08:00
|
||||||
|
tags: ["Hugo"]
|
||||||
|
---
|
||||||
|
|
||||||
|
This will be an uncharacteristically short post. Recently,
|
||||||
|
I wrote about my experience with [including code from local files]({{< relref "codelines" >}}).
|
||||||
|
After I wrote that post, I decided to expand upon my setup. In particular,
|
||||||
|
I wanted to display links to the files I'm referring to, in three
|
||||||
|
different cases: when I'm referring to an entire code file, to an entire raw (non-highlighted)
|
||||||
|
file, or to a portion of a code file.
|
||||||
|
|
||||||
|
The problem was that in all three cases, I needed to determine the
|
||||||
|
correct file URL to link to. The process for doing so is identical: it
|
||||||
|
really only depends on the path to the file I'm including. However,
|
||||||
|
many other aspects of each case are different. In the "entire code file"
|
||||||
|
case, I simply need to read in a file. In the "portion of a code file"
|
||||||
|
case, I have to perform some processing to extract the specific lines I want to include.
|
||||||
|
Whenever I include a code file -- entirely or partially -- I need to invoke the `highlight`
|
||||||
|
function to perform syntax highlighting; however, I don't want to do that when including a raw file.
|
||||||
|
It would be difficult to write a single shortcode or partial to handle all of these different cases.
|
||||||
|
|
||||||
|
However, computing the target URL is a simple transformation
|
||||||
|
of a path and a list of submodules into a link. More plainly,
|
||||||
|
it is a function. Hugo doesn't really have support for
|
||||||
|
custom functions, at least according to this [Discourse post](https://discourse.gohugo.io/t/adding-custom-functions/14164). The only approach to add a _real_ function to Hugo is to edit the Go-based
|
||||||
|
source code, and recompile the thing. However, your own custom functions
|
||||||
|
would then not be included in normal Hugo distributions, and any websites
|
||||||
|
using these functions would not be portable. _Really_ adding your own functions
|
||||||
|
is not viable.
|
||||||
|
|
||||||
|
However, we can approximate functions using Hugo's
|
||||||
|
[scratchpad feature](https://gohugo.io/functions/scratch/)
|
||||||
|
By feeding a
|
||||||
|
{{< sidenote "right" "mutable-note" "scratchpad" >}}
|
||||||
|
In reality, any mutable container will work. The scratchpad
|
||||||
|
just seems like the perfect tool for this purpose.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
to a partial, and expecting the partial to modify the
|
||||||
|
scratchpad in some way, we can effectively recover data.
|
||||||
|
For instance, in my `geturl` partial, I have something like
|
||||||
|
the following:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{ .scratch.Set "bestUrl" theUrl }}
|
||||||
|
```
|
||||||
|
|
||||||
|
Once this partial executes, and the rendering engine is back to its call site,
|
||||||
|
the scratchpad will contain `bestUrl`. To call this partial while providing inputs
|
||||||
|
(like the file path, for example), we can use Hugo's `dict` function. An (abridged)
|
||||||
|
example:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{ partial "geturl.html" (dict "scratch" .Scratch "path" filePath) }}
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, from inside the partial, we'll be able to access the two variable using `.scratch` and `.path`.
|
||||||
|
Once we've called our partial, we simply extract the returned data from the scratchpad and use it:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{ partial "geturl.html" (dict "scratch" .Scratch "path" filePath) }}
|
||||||
|
{{ $bestUrl := .Scratch.Get "bestUrl" }}
|
||||||
|
{{ ... do stuff with $bestUrl ... }}
|
||||||
|
```
|
||||||
|
|
||||||
|
Thus, although it's a little bit tedious, we're able to use `geturl` like a function,
|
||||||
|
thereby refraining from duplicating its definition everywhere the same logic is needed. A few
|
||||||
|
closing thoughts:
|
||||||
|
|
||||||
|
* __Why not just use a real language?__ It's true that I wrote a Ruby script to
|
||||||
|
do some of the work involved with linking submodules. However, generating the same
|
||||||
|
information for all calls to `codelines` would complicate the process of rendering
|
||||||
|
the blog, and make live preview impossible. In general, by limiting the use of external
|
||||||
|
scripts, it's easier to make `hugo` the only "build tool" for the site.
|
||||||
|
* __Is there an easier way?__ I _think_ that calls to `partial` return a string. If you
|
||||||
|
simply wanted to return a string, you could probably do without using a scratchpad.
|
||||||
|
However, if you wanted to do something more complicated (say, return a map or list),
|
||||||
|
you'd probably want the scratchpad method after all.
|
||||||
476
content/blog/typesafe_imperative_lang.md
Normal file
476
content/blog/typesafe_imperative_lang.md
Normal file
@@ -0,0 +1,476 @@
|
|||||||
|
---
|
||||||
|
title: "A Typesafe Representation of an Imperative Language"
|
||||||
|
date: 2020-11-02T01:07:21-08:00
|
||||||
|
tags: ["Idris"]
|
||||||
|
---
|
||||||
|
|
||||||
|
A recent homework assignment for my university's programming languages
|
||||||
|
course was to encode the abstract syntax for a small imperative language
|
||||||
|
into Haskell data types. The language consisted of very few constructs, and was very much a "toy".
|
||||||
|
On the expression side of things, it had three registers (`A`, `B`, and `R`),
|
||||||
|
numbers, addition, comparison using "less than", and logical negation. It also
|
||||||
|
included a statement for storing the result of an expression into
|
||||||
|
a register, `if/else`, and an infinite loop construct with an associated `break` operation.
|
||||||
|
A sample program in the language which computes the product of two
|
||||||
|
numbers is as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
A := 7
|
||||||
|
B := 9
|
||||||
|
R := 0
|
||||||
|
do
|
||||||
|
if A <= 0 then
|
||||||
|
break
|
||||||
|
else
|
||||||
|
R := R + B;
|
||||||
|
A := A + -1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
The homework notes that type errors may arise in the little imperative language.
|
||||||
|
We could, for instance, try to add a boolean to a number: `3 + (1 < 2)`. Alternatively,
|
||||||
|
we could try use a number in the condition of an `if/else` expression. A "naive"
|
||||||
|
encoding of the abstract syntax would allow for such errors.
|
||||||
|
|
||||||
|
However, assuming that registers could only store integers and not booleans, it is fairly easy to
|
||||||
|
separate the expression grammar into two nonterminals, yielding boolean
|
||||||
|
and integer expressions respectively. Since registers can only store integers,
|
||||||
|
the `(:=)` operation will always require an integer expression, and an `if/else`
|
||||||
|
statement will always require a boolean expression. A matching Haskell encoding
|
||||||
|
would not allow "invalid" programs to compile. That is, the programs would be
|
||||||
|
type-correct by construction.
|
||||||
|
|
||||||
|
Then, a question arose in the ensuing discussion: what if registers _could_
|
||||||
|
contain booleans? It would be impossible to create such a "correct-by-construction"
|
||||||
|
representation then, wouldn't it?
|
||||||
|
{{< sidenote "right" "haskell-note" "Although I don't know about Haskell," >}}
|
||||||
|
I am pretty certain that a similar encoding in Haskell is possible. However,
|
||||||
|
Haskell wasn't originally created for that kind of abuse of its type system,
|
||||||
|
so it would probably not look very good.
|
||||||
|
{{< /sidenote >}} I am sure that it _is_ possible to do this
|
||||||
|
in Idris, a dependently typed programming language. In this post I will
|
||||||
|
talk about how to do that.
|
||||||
|
|
||||||
|
### Registers and Expressions
|
||||||
|
Let's start by encoding registers. Since we only have three registers, we
|
||||||
|
can encode them using a simple data type declaration, much the same as we
|
||||||
|
would in Haskell:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 1 1 >}}
|
||||||
|
|
||||||
|
Now that registers can store either integers or booleans (and only those two),
|
||||||
|
we need to know which one is which. For this purpose, we can declare another
|
||||||
|
data type:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 3 3 >}}
|
||||||
|
|
||||||
|
At any point in the (hypothetical) execution of our program, each
|
||||||
|
of the registers will have a type, either boolean or integer. The
|
||||||
|
combined state of the three registers would then be the combination
|
||||||
|
of these three states; we can represent this using a 3-tuple:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 5 6 >}}
|
||||||
|
|
||||||
|
Let's say that the first element of the tuple will be the type of the register
|
||||||
|
`A`, the second the type of `B`, and the third the type of `R`. Then,
|
||||||
|
we can define two helper functions, one for retrieving the type of
|
||||||
|
a register, and one for changing it:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 8 16 >}}
|
||||||
|
|
||||||
|
Now, it's time to talk about expressions. We know now that an expression
|
||||||
|
can evaluate to either a boolean or an integer value (because a register
|
||||||
|
can contain either of those types of values). Perhaps we can specify
|
||||||
|
the type that an expression evaluates to in the expression's own type:
|
||||||
|
`Expr IntTy` would evaluate to integers, and `Expr BoolTy` would evaluate
|
||||||
|
to booleans. Then, we could have constructors as follows:
|
||||||
|
|
||||||
|
```Idris
|
||||||
|
Lit : Int -> Expr IntTy
|
||||||
|
Not : Expr BoolTy -> Expr BoolTy
|
||||||
|
```
|
||||||
|
|
||||||
|
Sounds good! But what about loading a register?
|
||||||
|
|
||||||
|
```Idris
|
||||||
|
Load : Reg -> Expr IntTy -- no; what if the register is a boolean?
|
||||||
|
Load : Reg -> Expr BoolTy -- no; what if the register is an integer?
|
||||||
|
Load : Reg -> Expr a -- no; a register access can't be either!
|
||||||
|
```
|
||||||
|
|
||||||
|
The type of an expression that loads a register depends on the current
|
||||||
|
state of the program! If we last stored an integer into a register,
|
||||||
|
then loading from that register would give us an integer. But if we
|
||||||
|
last stored a boolean into a register, then reading from it would
|
||||||
|
give us a boolean. Our expressions need to be aware of the current
|
||||||
|
types of each register. To do so, we add the state as a parameter to
|
||||||
|
our `Expr` type. This would lead to types like the following:
|
||||||
|
|
||||||
|
```Idris
|
||||||
|
-- An expression that produces a boolean when all the registers
|
||||||
|
-- are integers.
|
||||||
|
Expr (IntTy, IntTy, IntTy) BoolTy
|
||||||
|
|
||||||
|
-- An expression that produces an integer when A and B are integers,
|
||||||
|
-- and R is a boolean.
|
||||||
|
Expr (IntTy, IntTy, BoolTy) IntTy
|
||||||
|
```
|
||||||
|
|
||||||
|
In Idris, the whole definition becomes:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 18 23 >}}
|
||||||
|
|
||||||
|
The only "interesting" constructor is `Load`, which, given a register `r`,
|
||||||
|
creates an expression that has `r`'s type in the current state `s`.
|
||||||
|
|
||||||
|
### Statements
|
||||||
|
Statements are a bit different. Unlike expressions, they don't evaluate to
|
||||||
|
anything; rather, they do something. That "something" may very well be changing
|
||||||
|
the current state. We could, for instance, set `A` to be a boolean, while it was
|
||||||
|
previously an integer. This suggests equipping our `Stmt` type with two
|
||||||
|
arguments: the initial state (before the statement's execution), and the final
|
||||||
|
state (after the statement's execution). This would lead to types like this:
|
||||||
|
|
||||||
|
```Idris
|
||||||
|
-- Statement that, when run while all registers contain integers,
|
||||||
|
-- terminates with registers B and R having been assigned boolean values.
|
||||||
|
Stmt (IntTy, IntTy, IntTy) (IntTy, BoolTy, BoolTy)
|
||||||
|
```
|
||||||
|
|
||||||
|
However, there's a problem with `loop` and `break`. When we run a loop,
|
||||||
|
we will require that the state at the end of one iteration is the
|
||||||
|
same as the state at its beginning. Otherwise, it would be possible
|
||||||
|
for a loop to keep changing the types of registers every iteration,
|
||||||
|
and it would become impossible for us to infer the final state
|
||||||
|
without actually running the program. In itself, this restriction
|
||||||
|
isn't a problem; most static type systems require both branches
|
||||||
|
of an `if/else` expression to be of the same type for a similar
|
||||||
|
reason. The problem comes from the interaction with `break`.
|
||||||
|
|
||||||
|
By itself, the would-be type of `break` seems innocent enough. It
|
||||||
|
doesn't change any registers, so we could call it `Stmt s s`.
|
||||||
|
But consider the following program:
|
||||||
|
|
||||||
|
```
|
||||||
|
A := 0
|
||||||
|
B := 0
|
||||||
|
R := 0
|
||||||
|
do
|
||||||
|
if 5 <= A then
|
||||||
|
B := 1 <= 1
|
||||||
|
break
|
||||||
|
B := 0
|
||||||
|
else
|
||||||
|
A := A + 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
The loop starts with all registers having integer values.
|
||||||
|
As per our aforementioned loop requirement, the body
|
||||||
|
of the loop must terminate with all registers _still_ having
|
||||||
|
integer values. For the first five iterations that's exactly
|
||||||
|
what will happen. However, after we increment `A` the fifth time,
|
||||||
|
we will set `B` to a boolean value -- using a valid statement --
|
||||||
|
and then `break`. The `break` statement will be accepted by
|
||||||
|
the typechecker, and so will the whole `then` branch. After all,
|
||||||
|
it seems as though we reset `B` back to an integer value.
|
||||||
|
But that won't be the case. We will have jumped to the end
|
||||||
|
of the loop, where we are expected to have an all-integer type,
|
||||||
|
which we will not have.
|
||||||
|
|
||||||
|
The solution I came up with to address this issue was to
|
||||||
|
add a _third_ argument to `Stmt`, which contains the "context"
|
||||||
|
type. That is, it contains the type of the innermost loop surrounding
|
||||||
|
the statement. A `break` statement would only be permissible
|
||||||
|
if the current type matches the loop type. With this, we finally
|
||||||
|
write down a definition of `Stmt`:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 26 30 >}}
|
||||||
|
|
||||||
|
The `Store` constructor takes a register `r` and an expression producing some type `t` in state `s`.
|
||||||
|
From these, it creates a statement that starts in `s`, and finishes
|
||||||
|
in a state similar to `s`, but with `r` now having type `t`. The loop
|
||||||
|
type `l` remains unaffected and unused; we are free to assign any register
|
||||||
|
any value.
|
||||||
|
|
||||||
|
The `If` constructor takes a condition `Expr`, which starts in state `s` and _must_ produce
|
||||||
|
a boolean. It also takes two programs (sequences of statements), each of which
|
||||||
|
starts in `s` and finishes in another state `n`. This results in
|
||||||
|
a statement that starts in state `s`, and finishes in state `n`. Conceptually,
|
||||||
|
each branch of the `if/else` statement must result in the same final state (in terms of types);
|
||||||
|
otherwise, we wouldn't know which of the states to pick when deciding the final
|
||||||
|
state of the `If` itself. As with `Store`, the loop type `l` is untouched here.
|
||||||
|
Individual statements are free to modify the state however they wish.
|
||||||
|
|
||||||
|
The `Loop` constructor is very restrictive. It takes a single program (the sequence
|
||||||
|
of instructions that it will be repeating). As we discussed above, this program
|
||||||
|
must start _and_ end in the same state `s`. Furthermore, this program's loop
|
||||||
|
type must also be `s`, since the loop we're constructing will be surrounding the
|
||||||
|
program. The resulting loop itself still has an arbitrary loop type `l`, since
|
||||||
|
it doesn't surround itself.
|
||||||
|
|
||||||
|
Finally, `Break` can only be constructed when the loop state matches the current
|
||||||
|
state. Since we'll be jumping to the end of the innermost loop, the final state
|
||||||
|
is also the same as the loop state.
|
||||||
|
|
||||||
|
These are all the constructors we'll be needing. It's time to move on to
|
||||||
|
whole programs!
|
||||||
|
|
||||||
|
### Programs
|
||||||
|
A program is simply a list of statements. However, we can't use a regular Idris list,
|
||||||
|
because a regular list wouldn't be able to represent the relationship between
|
||||||
|
each successive statement. In our program, we want the final state of one
|
||||||
|
statement to be the initial state of the following one, since they'll
|
||||||
|
be executed in sequence. To represent this, we have to define our own
|
||||||
|
list-like GADT. The definition of the type turns out fairly straightforward:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 32 34 >}}
|
||||||
|
|
||||||
|
The `Nil` constructor represents an empty program (much like the built-in `Nil` represents an empty list).
|
||||||
|
Since no actions are done, it creates a `Prog` that starts and ends in the same state: `s`.
|
||||||
|
The `(::)` constructor, much like the built-in `(::)` constructor, takes a statement
|
||||||
|
and another program. The statement begins in state `s` and ends in state `n`; the program after
|
||||||
|
that statement must then start in state `n`, and end in some other state `m`.
|
||||||
|
The combination of the statement and the program starts in state `s`, and finishes in state `m`.
|
||||||
|
Thus, `(::)` yields `Prog s m`. None of the constructors affect the loop type `l`: we
|
||||||
|
are free to sequence any statements that we want, and it is impossible for us
|
||||||
|
to construct statements using `l` that cause runtime errors.
|
||||||
|
|
||||||
|
This should be all! Let's try out some programs.
|
||||||
|
|
||||||
|
### Trying it Out
|
||||||
|
The following (type-correct) program compiles just fine:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 36 47 >}}
|
||||||
|
|
||||||
|
First, it loads a boolean into register `A`; then,
|
||||||
|
inside the `if/else` statement, it stores an integer into `A`. Finally,
|
||||||
|
it stores another integer into `B`, and adds them into `R`. Even though
|
||||||
|
`A` was a boolean at first, the type checker can deduce that it
|
||||||
|
was reset back to an integer after the `if/else`, and the program is accepted.
|
||||||
|
On the other hand, had we forgotten to set `A` to a boolean first:
|
||||||
|
|
||||||
|
```Idris
|
||||||
|
[ If (Load A)
|
||||||
|
[ Store A (Lit 1) ]
|
||||||
|
[ Store A (Lit 2) ]
|
||||||
|
, Store B (Lit 2)
|
||||||
|
, Store R (Add (Load A) (Load B))
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
We would get a type error:
|
||||||
|
|
||||||
|
```
|
||||||
|
Type mismatch between getRegTy A (IntTy, IntTy, IntTy) and BoolTy
|
||||||
|
```
|
||||||
|
|
||||||
|
The type of register `A` (that is, `IntTy`) is incompatible
|
||||||
|
with `BoolTy`. Our `initialState` says that `A` starts out as
|
||||||
|
an integer, so it can't be used in an `if/else` right away!
|
||||||
|
Similar errors occur if we make one of the branches of
|
||||||
|
the `if/else` empty, or if we set `B` to a boolean.
|
||||||
|
|
||||||
|
We can also encode the example program from the beginning
|
||||||
|
of this post:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 49 61 >}}
|
||||||
|
|
||||||
|
This program compiles just fine, too! It is a little reminiscent of
|
||||||
|
the program we used to demonstrate how `break` could break things
|
||||||
|
if we weren't careful. So, let's go ahead and try `break` in an invalid
|
||||||
|
state:
|
||||||
|
|
||||||
|
```Idris
|
||||||
|
[ Store A (Lit 7)
|
||||||
|
, Store B (Lit 9)
|
||||||
|
, Store R (Lit 0)
|
||||||
|
, Loop
|
||||||
|
[ If (Load A `Leq` Lit 0)
|
||||||
|
[ Store B (Lit 1 `Leq` Lit 1), Break, Store B (Lit 0) ]
|
||||||
|
[ Store R (Load R `Add` Load B)
|
||||||
|
, Store A (Load A `Add` Lit (-1))
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Again, the type checker complains:
|
||||||
|
|
||||||
|
```
|
||||||
|
Type mismatch between IntTy and BoolTy
|
||||||
|
```
|
||||||
|
|
||||||
|
And so, we have an encoding of our language that allows registers to
|
||||||
|
be either integers or booleans, while still preventing
|
||||||
|
type-incorrect programs!
|
||||||
|
|
||||||
|
### Building an Interpreter
|
||||||
|
A good test of such an encoding is the implementation
|
||||||
|
of an interpreter. It should be possible to convince the
|
||||||
|
typechecker that our interpreter doesn't need to
|
||||||
|
handle type errors in the toy language, since they
|
||||||
|
cannot occur.
|
||||||
|
|
||||||
|
Let's start with something simple. First of all, suppose
|
||||||
|
we have an expression of type `Expr InTy`. In our toy
|
||||||
|
language, it produces an integer. Our interpreter, then,
|
||||||
|
will probably want to use Idris' type `Int`. Similarly,
|
||||||
|
an expression of type `Expr BoolTy` will produce a boolean
|
||||||
|
in our toy language, which in Idris we can represent using
|
||||||
|
the built-in `Bool` type. Despite the similar naming, though,
|
||||||
|
there's no connection between Idris' `Bool` and our own `BoolTy`.
|
||||||
|
We need to define a conversion from our own types -- which are
|
||||||
|
values of type `Ty` -- into Idris types that result from evaluating
|
||||||
|
expressions. We do so as follows:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 63 65 >}}
|
||||||
|
|
||||||
|
Similarly, we want to convert our `TypeState` (a tuple describing the _types_
|
||||||
|
of our registers) into a tuple that actually holds the values of each
|
||||||
|
register, which we will call `State`. The value of each register at
|
||||||
|
any point depends on its type. My first thought was to define
|
||||||
|
`State` as a function from `TypeState` to an Idris `Type`:
|
||||||
|
|
||||||
|
```Idris
|
||||||
|
State : TypeState -> Type
|
||||||
|
State (a, b, c) = (repr a, repr b, repr c)
|
||||||
|
```
|
||||||
|
|
||||||
|
Unfortunately, this doesn't quite cut it. The problem is that this
|
||||||
|
function technically doesn't give Idris any guarantees that `State`
|
||||||
|
will be a tuple. The most Idris knows is that `State` will be some
|
||||||
|
`Type`, which could be `Int`, `Bool`, or anything else! This
|
||||||
|
becomes a problem when we try to pattern match on states to get
|
||||||
|
the contents of a particular register. Instead, we have to define
|
||||||
|
a new data type:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 67 68 >}}
|
||||||
|
|
||||||
|
In this snippet, `State` is still a (type level) function from `TypeState` to `Type`.
|
||||||
|
However, by using a GADT, we guarantee that there's only one way to construct
|
||||||
|
a `State (a,b,c)`: using a corresponding tuple. Now, Idris will accept our
|
||||||
|
pattern matching:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 70 78 >}}
|
||||||
|
|
||||||
|
The `getReg` function will take out the value of the corresponding
|
||||||
|
register, returning `Int` or `Bool` depending on the `TypeState`.
|
||||||
|
What's important is that if the `TypeState` is known, then so
|
||||||
|
is the type of `getReg`: no `Either` is involved here, and we
|
||||||
|
can directly use the integer or boolean stored in the
|
||||||
|
register. This is exactly what we do:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 80 85 >}}
|
||||||
|
|
||||||
|
This is pretty concise. Idris knows that `Lit i` is of type `Expr IntTy`,
|
||||||
|
and it knows that `repr IntTy = Int`, so it also knows that
|
||||||
|
`eval (Lit i)` produces an `Int`. Similarly, we wrote
|
||||||
|
`Reg r` to have type `Expr s (getRegTy r s)`. Since `getReg`
|
||||||
|
returns `repr (getRegTy r s)`, things check out here, too.
|
||||||
|
A similar logic applies to the rest of the cases.
|
||||||
|
|
||||||
|
The situation with statements is somewhat different. As we said, a statement
|
||||||
|
doesn't return a value; it changes state. A good initial guess would
|
||||||
|
be that to evaluate a statement that starts in state `s` and terminates in state `n`,
|
||||||
|
we would take as input `State s` and return `State n`. However, things are not
|
||||||
|
quite as simple, thanks to `Break`. Not only does `Break` require
|
||||||
|
special case logic to return control to the end of the `Loop`, but
|
||||||
|
it also requires some additional consideration: in a statement
|
||||||
|
of type `Stmt l s n`, evaluating `Break` can return `State l`.
|
||||||
|
|
||||||
|
To implement this, we'll use the `Either` type. The `Left` constructor
|
||||||
|
will be contain the state at the time of evaluating a `Break`,
|
||||||
|
and will indicate to the interpreter that we're breaking out of a loop.
|
||||||
|
On the other hand, the `Right` constructor will contain the state
|
||||||
|
as produced by all other statements, which would be considered
|
||||||
|
{{< sidenote "right" "left-right-note" "the \"normal\" case." >}}
|
||||||
|
We use <code>Left</code> for the "abnormal" case because of
|
||||||
|
Idris' (and Haskell's) convention to use it as such. For
|
||||||
|
instance, the two languages define a <code>Monad</code>
|
||||||
|
instance for <code>Either a</code> where <code>(>>=)</code>
|
||||||
|
behaves very much like it does for <code>Maybe</code>, with
|
||||||
|
<code>Left</code> being treated as <code>Nothing</code>,
|
||||||
|
and <code>Right</code> as <code>Just</code>. We will
|
||||||
|
use this instance to clean up some of our computations.
|
||||||
|
{{< /sidenote >}} Note that this doesn't indicate an error:
|
||||||
|
we need to represent the two states (breaking out of a loop
|
||||||
|
and normal execution) to define our language's semantics.
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 88 95 >}}
|
||||||
|
|
||||||
|
First, note the type. We return an `Either` value, which will
|
||||||
|
contain `State l` (in the `Left` constructor) if a `Break`
|
||||||
|
was evaluated, and `State n` (in the `Right` constructor)
|
||||||
|
if execution went on without breaking.
|
||||||
|
|
||||||
|
The `Store` case is rather simple. We use `setReg` to update the result
|
||||||
|
of the register `r` with the result of evaluating `e`. Because
|
||||||
|
a store doesn't cause us to start breaking out of a loop,
|
||||||
|
we use `Right` to wrap the new state.
|
||||||
|
|
||||||
|
The `If` case is also rather simple. Its condition is guaranteed
|
||||||
|
to evaluate to a boolean, so it's sufficient for us to use
|
||||||
|
Idris' `if` expression. We use the `prog` function here, which
|
||||||
|
implements the evaluation of a whole program. We'll get to it
|
||||||
|
momentarily.
|
||||||
|
|
||||||
|
`Loop` is the most interesting case. We start by evaluating
|
||||||
|
the program `p` serving as the loop body. The result of this
|
||||||
|
computation will be either a state after a break,
|
||||||
|
held in `Left`, or as the normal execution state, held
|
||||||
|
in `Right`. The `(>>=)` operation will do nothing in
|
||||||
|
the first case, and feed the resulting (normal) state
|
||||||
|
to `stmt (Loop p)` in the second case. This is exactly
|
||||||
|
what we want: if we broke out of the current iteration
|
||||||
|
of the loop, we shouldn't continue on to the next iteration.
|
||||||
|
At the end of evaluating both `p` and the recursive call to
|
||||||
|
`stmt`, we'll either have exited normally, or via breaking
|
||||||
|
out. We don't want to continue breaking out further,
|
||||||
|
so we return the final state wrapped in `Right` in both cases.
|
||||||
|
Finally, `Break` returns the current state wrapped in `Left`,
|
||||||
|
beginning the process of breaking out.
|
||||||
|
|
||||||
|
The task of `prog` is simply to sequence several statements
|
||||||
|
together. The monadic bind operator, `(>>=)`, is again perfect
|
||||||
|
for this task, since it "stops" when it hits a `Left`, but
|
||||||
|
continues otherwise. This is the implementation:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 97 99 >}}
|
||||||
|
|
||||||
|
Awesome! Let's try it out, shall we? I defined a quick `run` function
|
||||||
|
as follows:
|
||||||
|
|
||||||
|
{{< codelines "Idris" "typesafe-imperative/TypesafeImp.idr" 101 102 >}}
|
||||||
|
|
||||||
|
We then have:
|
||||||
|
|
||||||
|
```
|
||||||
|
*TypesafeImp> run prodProg (MkState (0,0,0))
|
||||||
|
MkState (0, 9, 63) : State (IntTy, IntTy, IntTy)
|
||||||
|
```
|
||||||
|
|
||||||
|
This seems correct! The program multiplies seven by nine,
|
||||||
|
and stops when register `A` reaches zero. Our test program
|
||||||
|
runs, too:
|
||||||
|
|
||||||
|
```
|
||||||
|
*TypesafeImp> run testProg (MkState (0,0,0))
|
||||||
|
MkState (1, 2, 3) : State (IntTy, IntTy, IntTy)
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the correct answer: `A` ends up being set to
|
||||||
|
`1` in the `then` branch of the conditional statement,
|
||||||
|
`B` is set to 2 right after, and `R`, the sum of `A`
|
||||||
|
and `B`, is rightly `3`.
|
||||||
|
|
||||||
|
As you can see, we didn't have to write any error handling
|
||||||
|
code! This is because the typechecker _knows_ that type errors
|
||||||
|
aren't possible: our programs are guaranteed to be
|
||||||
|
{{< sidenote "right" "termination-note" "type correct." >}}
|
||||||
|
Our programs <em>aren't</em> guaranteed to terminate:
|
||||||
|
we're lucky that Idris' totality checker is turned off by default.
|
||||||
|
{{< /sidenote >}} This was a fun exercise, and I hope you enjoyed reading along!
|
||||||
|
I hope to see you in my future posts.
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Meaningfully Typechecking a Language in Idris, Revisited
|
title: Meaningfully Typechecking a Language in Idris, Revisited
|
||||||
date: 2020-07-22T14:37:35-07:00
|
date: 2020-07-22T14:37:35-07:00
|
||||||
tags: ["Idris"]
|
tags: ["Idris"]
|
||||||
|
favorite: true
|
||||||
---
|
---
|
||||||
|
|
||||||
Some time ago, I wrote a post titled [Meaningfully Typechecking a Language in Idris]({{< relref "typesafe_interpreter.md" >}}). The gist of the post was as follows:
|
Some time ago, I wrote a post titled [Meaningfully Typechecking a Language in Idris]({{< relref "typesafe_interpreter.md" >}}). The gist of the post was as follows:
|
||||||
|
|||||||
9
content/favorites.md
Normal file
9
content/favorites.md
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
---
|
||||||
|
title: Favorites
|
||||||
|
type: "favorites"
|
||||||
|
description: Posts from Daniel's personal blog that he has enjoyed writing the most, or that he thinks turned out very well.
|
||||||
|
---
|
||||||
|
The amount of content on this blog is monotonically increasing. Thus, as time goes on, it's becoming
|
||||||
|
harder and harder to see at a glance what kind of articles I write. To address this, I've curated
|
||||||
|
a small selection of articles from this site that I've particularly enjoyed writing, or that I think
|
||||||
|
turned out especially well. They're listed below, most recent first.
|
||||||
14
content/search.md
Normal file
14
content/search.md
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
---
|
||||||
|
title: Search
|
||||||
|
type: "search"
|
||||||
|
description: Interactive search for posts on Daniel's personal site.
|
||||||
|
---
|
||||||
|
|
||||||
|
Here's a [Stork](https://github.com/jameslittle230/stork)-powered search for all articles on
|
||||||
|
this site. Stork takes some time to load on slower devices, which is why this isn't on
|
||||||
|
every page (or even on the index page). Because the LaTeX rendering occurs _after_
|
||||||
|
the search indexing, you may see raw LaTeX code in the content of the presented
|
||||||
|
articles, like `\beta`. This does, however, also mean that you can search for mathematical
|
||||||
|
symbols using only the English alphabet!
|
||||||
|
|
||||||
|
If you're just browsing, you could alternatively check out [all posts](/blog), or perhaps my [favorite articles](/favorites) from this blog.
|
||||||
2
layouts/shortcodes/donate_css.html
Normal file
2
layouts/shortcodes/donate_css.html
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
{{ $style := resources.Get "scss/donate.scss" | resources.ToCSS | resources.Minify }}
|
||||||
|
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
||||||
4
layouts/shortcodes/donation_method.html
Normal file
4
layouts/shortcodes/donation_method.html
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
<tr>
|
||||||
|
<td>{{ .Get 0 }}</td>
|
||||||
|
<td><code>{{ .Get 1 }}</code></td>
|
||||||
|
</tr>
|
||||||
3
layouts/shortcodes/donation_methods.html
Normal file
3
layouts/shortcodes/donation_methods.html
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
<table class="donation-methods">
|
||||||
|
{{ .Inner }}
|
||||||
|
</table>
|
||||||
9
layouts/shortcodes/gt_assumption.html
Normal file
9
layouts/shortcodes/gt_assumption.html
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{{ $n := .Page.Scratch.Get "gt-assumption-count" }}
|
||||||
|
{{ $newN := add 1 (int $n) }}
|
||||||
|
{{ .Page.Scratch.Set "gt-assumption-count" $newN }}
|
||||||
|
{{ .Page.Scratch.SetInMap "gt-assumptions" (.Get 0) $newN }}
|
||||||
|
<div class="assumption">
|
||||||
|
<span id="gt-assumption-{{ .Get 0 }}" class="assumption-number">
|
||||||
|
Assumption {{ $newN }} ({{ .Get 1 }}).
|
||||||
|
</span> {{ .Inner }}
|
||||||
|
</div>
|
||||||
2
layouts/shortcodes/gt_css.html
Normal file
2
layouts/shortcodes/gt_css.html
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
{{ $style := resources.Get "scss/gametheory.scss" | resources.ToCSS | resources.Minify }}
|
||||||
|
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
||||||
1
layouts/shortcodes/gt_link.html
Normal file
1
layouts/shortcodes/gt_link.html
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<a href="#gt-assumption-{{ .Get 0 }}">assumption {{ index (.Page.Scratch.Get "gt-assumptions") (.Get 0) }}</a>
|
||||||
Binary file not shown.
BIN
static/index.st
Normal file
BIN
static/index.st
Normal file
Binary file not shown.
27
submodule-links.rb
Normal file
27
submodule-links.rb
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
puts "[params]"
|
||||||
|
puts " [params.submoduleLinks]"
|
||||||
|
|
||||||
|
def each_submodule(base_path)
|
||||||
|
`cd #{base_path} && git submodule status`.lines do |line|
|
||||||
|
hash, path = line[1..].split " "
|
||||||
|
full_path = "#{base_path}/#{path}"
|
||||||
|
url = `git config --file #{base_path}/.gitmodules --get 'submodule.#{path}.url'`.chomp.delete_suffix(".git")
|
||||||
|
safe_name = full_path.gsub(/\/|-|_\./, "")
|
||||||
|
|
||||||
|
if url =~ /dev.danilafe.com/
|
||||||
|
file_url = "#{url}/src/commit/#{hash}"
|
||||||
|
else
|
||||||
|
raise "Submodule URL #{url.dump} not in a known format!"
|
||||||
|
end
|
||||||
|
|
||||||
|
yield ({ :path => full_path, :url => file_url, :name => safe_name })
|
||||||
|
each_submodule(full_path) { |m| yield m }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
each_submodule(".") do |m|
|
||||||
|
next unless m[:path].start_with? "./code/"
|
||||||
|
puts " [params.submoduleLinks.#{m[:name].delete_prefix(".code")}]"
|
||||||
|
puts " url = #{m[:url].dump}"
|
||||||
|
puts " path = #{m[:path].delete_prefix("./code/").dump}"
|
||||||
|
end
|
||||||
1
themes/vanilla
Submodule
1
themes/vanilla
Submodule
Submodule themes/vanilla added at b56ac908f6
@@ -1,20 +0,0 @@
|
|||||||
The MIT License (MIT)
|
|
||||||
|
|
||||||
Copyright (c) 2019 YOUR_NAME_HERE
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
||||||
this software and associated documentation files (the "Software"), to deal in
|
|
||||||
the Software without restriction, including without limitation the rights to
|
|
||||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
||||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
|
||||||
subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
||||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
||||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
||||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
+++
|
|
||||||
+++
|
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 376 B |
Binary file not shown.
|
Before Width: | Height: | Size: 536 B |
@@ -1,79 +0,0 @@
|
|||||||
@import "variables.scss";
|
|
||||||
|
|
||||||
$code-color-lineno: grey;
|
|
||||||
$code-color-keyword: black;
|
|
||||||
$code-color-type: black;
|
|
||||||
$code-color-comment: grey;
|
|
||||||
|
|
||||||
code {
|
|
||||||
font-family: $font-code;
|
|
||||||
background-color: $code-color;
|
|
||||||
border: $code-border;
|
|
||||||
padding: 0 0.25rem 0 0.25rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
pre code {
|
|
||||||
display: block;
|
|
||||||
box-sizing: border-box;
|
|
||||||
padding: 0.5rem;
|
|
||||||
overflow: auto;
|
|
||||||
}
|
|
||||||
|
|
||||||
.chroma {
|
|
||||||
.lntable {
|
|
||||||
border-spacing: 0;
|
|
||||||
padding: 0.5rem 0 0.5rem 0;
|
|
||||||
background-color: $code-color;
|
|
||||||
border-radius: 0;
|
|
||||||
border: $code-border;
|
|
||||||
display: block;
|
|
||||||
overflow: auto;
|
|
||||||
margin-bottom: 1rem;
|
|
||||||
|
|
||||||
td {
|
|
||||||
padding: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
code {
|
|
||||||
border: none;
|
|
||||||
padding: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
pre {
|
|
||||||
margin: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.lntd:last-child {
|
|
||||||
width: 100%;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.lntr {
|
|
||||||
display: table-row;
|
|
||||||
}
|
|
||||||
|
|
||||||
.lnt {
|
|
||||||
display: block;
|
|
||||||
padding: 0 1rem 0 1rem;
|
|
||||||
color: $code-color-lineno;
|
|
||||||
}
|
|
||||||
|
|
||||||
.hl {
|
|
||||||
display: block;
|
|
||||||
background-color: #fffd99;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.kr, .k {
|
|
||||||
font-weight: bold;
|
|
||||||
color: $code-color-keyword;
|
|
||||||
}
|
|
||||||
|
|
||||||
.kt {
|
|
||||||
font-weight: bold;
|
|
||||||
color: $code-color-type;
|
|
||||||
}
|
|
||||||
|
|
||||||
.c, .c1 {
|
|
||||||
color: $code-color-comment;
|
|
||||||
}
|
|
||||||
@@ -1,47 +0,0 @@
|
|||||||
@import "variables.scss";
|
|
||||||
@import "mixins.scss";
|
|
||||||
|
|
||||||
$margin-width: 30rem;
|
|
||||||
$margin-inner-offset: 0.5rem;
|
|
||||||
$margin-outer-offset: 1rem;
|
|
||||||
|
|
||||||
@mixin below-two-margins {
|
|
||||||
@media screen and
|
|
||||||
(max-width: $container-width-threshold +
|
|
||||||
2 * ($margin-width + $margin-inner-offset + $margin-outer-offset)) {
|
|
||||||
@content;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@mixin below-one-margin {
|
|
||||||
@media screen and
|
|
||||||
(max-width: $container-width-threshold +
|
|
||||||
($margin-width + $margin-inner-offset + $margin-outer-offset)) {
|
|
||||||
@content;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@mixin margin-content {
|
|
||||||
display: block;
|
|
||||||
position: absolute;
|
|
||||||
width: $margin-width;
|
|
||||||
box-sizing: border-box;
|
|
||||||
}
|
|
||||||
|
|
||||||
@mixin margin-content-left {
|
|
||||||
left: 0;
|
|
||||||
margin-left: -($margin-width + $container-min-padding + $margin-inner-offset);
|
|
||||||
|
|
||||||
@include below-two-margins {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@mixin margin-content-right {
|
|
||||||
right: 0;
|
|
||||||
margin-right: -($margin-width + $container-min-padding + $margin-inner-offset);
|
|
||||||
|
|
||||||
@include below-one-margin {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
@import "variables.scss";
|
|
||||||
|
|
||||||
@mixin bordered-block {
|
|
||||||
border: $standard-border;
|
|
||||||
border-radius: .2rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
@mixin below-container-width {
|
|
||||||
@media screen and (max-width: $container-width-threshold){
|
|
||||||
@content;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,75 +0,0 @@
|
|||||||
@import "variables.scss";
|
|
||||||
@import "mixins.scss";
|
|
||||||
@import "margin.scss";
|
|
||||||
|
|
||||||
$sidenote-padding: 1rem;
|
|
||||||
$sidenote-highlight-border-width: .2rem;
|
|
||||||
|
|
||||||
.sidenote {
|
|
||||||
&:hover {
|
|
||||||
.sidenote-label {
|
|
||||||
background-color: $primary-color;
|
|
||||||
color: white;
|
|
||||||
}
|
|
||||||
|
|
||||||
.sidenote-content {
|
|
||||||
border: $sidenote-highlight-border-width dashed;
|
|
||||||
padding: $sidenote-padding -
|
|
||||||
($sidenote-highlight-border-width - $standard-border-width);
|
|
||||||
border-color: $primary-color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.sidenote-label {
|
|
||||||
border-bottom: .2rem dashed $primary-color;
|
|
||||||
}
|
|
||||||
|
|
||||||
.sidenote-checkbox {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.sidenote-content {
|
|
||||||
@include margin-content;
|
|
||||||
@include bordered-block;
|
|
||||||
margin-top: -1.5rem;
|
|
||||||
padding: $sidenote-padding;
|
|
||||||
text-align: left;
|
|
||||||
|
|
||||||
&.sidenote-right {
|
|
||||||
@include margin-content-right;
|
|
||||||
}
|
|
||||||
|
|
||||||
&.sidenote-left {
|
|
||||||
@include margin-content-left;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.sidenote-delimiter {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
@mixin hidden-sidenote {
|
|
||||||
position: static;
|
|
||||||
margin-top: 1rem;
|
|
||||||
margin-bottom: 1rem;
|
|
||||||
width: 100%;
|
|
||||||
|
|
||||||
.sidenote-checkbox:checked ~ & {
|
|
||||||
display: block;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@include below-two-margins {
|
|
||||||
.sidenote-content.sidenote-left {
|
|
||||||
@include hidden-sidenote;
|
|
||||||
margin-left: 0rem;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@include below-one-margin {
|
|
||||||
.sidenote-content.sidenote-right {
|
|
||||||
@include hidden-sidenote;
|
|
||||||
margin-right: 0rem;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,235 +0,0 @@
|
|||||||
@import "variables.scss";
|
|
||||||
@import "mixins.scss";
|
|
||||||
@import "margin.scss";
|
|
||||||
@import "toc.scss";
|
|
||||||
|
|
||||||
body {
|
|
||||||
font-family: $font-body;
|
|
||||||
font-size: 1.0rem;
|
|
||||||
line-height: 1.5;
|
|
||||||
margin-bottom: 1rem;
|
|
||||||
text-align: justify;
|
|
||||||
|
|
||||||
@include below-container-width {
|
|
||||||
text-align: left;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
h1, h2, h3, h4, h5, h6 {
|
|
||||||
margin-bottom: .1rem;
|
|
||||||
margin-top: .5rem;
|
|
||||||
font-family: $font-heading;
|
|
||||||
font-weight: normal;
|
|
||||||
text-align: center;
|
|
||||||
|
|
||||||
a {
|
|
||||||
border-bottom: none;
|
|
||||||
|
|
||||||
&:hover {
|
|
||||||
color: $primary-color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.container {
|
|
||||||
position: relative;
|
|
||||||
margin: auto;
|
|
||||||
width: 100%;
|
|
||||||
max-width: $container-width;
|
|
||||||
box-sizing: border-box;
|
|
||||||
|
|
||||||
@include below-container-width {
|
|
||||||
padding: 0 $container-min-padding 0 $container-min-padding;
|
|
||||||
margin: 0;
|
|
||||||
max-width: $container-width + 2 * $container-min-padding;
|
|
||||||
}
|
|
||||||
|
|
||||||
@include below-two-margins {
|
|
||||||
left: -($margin-width + $margin-inner-offset + $margin-outer-offset)/2;
|
|
||||||
}
|
|
||||||
|
|
||||||
@include below-one-margin {
|
|
||||||
left: 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.button, input[type="submit"] {
|
|
||||||
padding: 0.5rem;
|
|
||||||
background-color: $primary-color;
|
|
||||||
border: none;
|
|
||||||
color: white;
|
|
||||||
transition: color 0.25s, background-color 0.25s;
|
|
||||||
text-align: left;
|
|
||||||
|
|
||||||
&:focus {
|
|
||||||
outline: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
&:hover, &:focus {
|
|
||||||
background-color: white;
|
|
||||||
color: $primary-color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
nav {
|
|
||||||
width: 100%;
|
|
||||||
margin: 0rem 0rem 1rem 0rem;
|
|
||||||
|
|
||||||
.container {
|
|
||||||
display: flex;
|
|
||||||
justify-content: center;
|
|
||||||
flex-wrap: wrap;
|
|
||||||
}
|
|
||||||
|
|
||||||
a {
|
|
||||||
padding: 0.25rem 0.75rem 0.25rem .75rem;
|
|
||||||
text-decoration: none;
|
|
||||||
color: black;
|
|
||||||
display: inline-block;
|
|
||||||
border-bottom: none;
|
|
||||||
white-space: nowrap;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.post-subscript {
|
|
||||||
color: #8f8f8f;
|
|
||||||
text-align: center;
|
|
||||||
}
|
|
||||||
|
|
||||||
.post-content {
|
|
||||||
margin-top: .5rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
h1 {
|
|
||||||
font-size: 3.0rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
h2 {
|
|
||||||
font-size: 2.6rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
h3 {
|
|
||||||
font-size: 2.2rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
h4 {
|
|
||||||
font-size: 1.8rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
h5 {
|
|
||||||
font-size: 1.4rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
h6 {
|
|
||||||
font-size: 1.0rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
a {
|
|
||||||
color: black;
|
|
||||||
text-decoration: none;
|
|
||||||
border-bottom: .2rem solid $primary-color;
|
|
||||||
transition: color 0.25s;
|
|
||||||
|
|
||||||
&:hover {
|
|
||||||
color: $primary-color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
img {
|
|
||||||
max-width: 100%
|
|
||||||
}
|
|
||||||
|
|
||||||
table {
|
|
||||||
@include bordered-block;
|
|
||||||
margin: auto;
|
|
||||||
padding: 0.5rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
tr {
|
|
||||||
@include below-container-width {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
td {
|
|
||||||
@include below-container-width {
|
|
||||||
overflow-x: auto;
|
|
||||||
}
|
|
||||||
padding: 0.5rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
div.highlight tr {
|
|
||||||
display: table-row;
|
|
||||||
}
|
|
||||||
|
|
||||||
hr.header-divider {
|
|
||||||
background-color: $primary-color;
|
|
||||||
height: 0.3rem;
|
|
||||||
border: none;
|
|
||||||
border-radius: 0.15rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
ul.post-list {
|
|
||||||
list-style: none;
|
|
||||||
padding: 0;
|
|
||||||
|
|
||||||
li {
|
|
||||||
@include bordered-block;
|
|
||||||
margin-bottom: 1rem;
|
|
||||||
padding: 1rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
p {
|
|
||||||
margin: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
a.post-title {
|
|
||||||
border-bottom: none;
|
|
||||||
font-size: 1.4rem;
|
|
||||||
font-family: $font-heading;
|
|
||||||
text-align: center;
|
|
||||||
display: block;
|
|
||||||
}
|
|
||||||
|
|
||||||
p.post-wordcount {
|
|
||||||
text-align: center;
|
|
||||||
margin-bottom: 0.6rem;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.katex-html {
|
|
||||||
white-space: nowrap;
|
|
||||||
}
|
|
||||||
|
|
||||||
figure {
|
|
||||||
img {
|
|
||||||
max-width: 70%;
|
|
||||||
display: block;
|
|
||||||
margin: auto;
|
|
||||||
|
|
||||||
@include below-container-width {
|
|
||||||
max-width: 100%;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
figcaption {
|
|
||||||
text-align: center;
|
|
||||||
}
|
|
||||||
|
|
||||||
&.tiny img {
|
|
||||||
max-height: 15rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
&.small img {
|
|
||||||
max-height: 20rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
&.medium img {
|
|
||||||
max-height: 30rem;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.twitter-tweet {
|
|
||||||
margin: auto;
|
|
||||||
}
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
@import "variables.scss";
|
|
||||||
@import "mixins.scss";
|
|
||||||
|
|
||||||
$toc-color: $code-color;
|
|
||||||
$toc-border-color: $code-border-color;
|
|
||||||
|
|
||||||
.table-of-contents {
|
|
||||||
@include margin-content;
|
|
||||||
@include margin-content-left;
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
align-items: flex-end;
|
|
||||||
margin-bottom: 1rem;
|
|
||||||
|
|
||||||
em {
|
|
||||||
font-style: normal;
|
|
||||||
font-weight: bold;
|
|
||||||
font-size: 1.2em;
|
|
||||||
display: block;
|
|
||||||
margin-bottom: 0.5rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
#TableOfContents > ul {
|
|
||||||
padding-left: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
nav {
|
|
||||||
margin: 0px;
|
|
||||||
}
|
|
||||||
|
|
||||||
ul {
|
|
||||||
list-style: none;
|
|
||||||
padding-left: 2rem;
|
|
||||||
margin: 0px;
|
|
||||||
}
|
|
||||||
|
|
||||||
a {
|
|
||||||
padding: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
div.wrapper {
|
|
||||||
@include bordered-block;
|
|
||||||
padding: 1rem;
|
|
||||||
background-color: $toc-color;
|
|
||||||
border-color: $toc-border-color;
|
|
||||||
box-sizing: border-box;
|
|
||||||
max-width: 100%;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
$container-width: 45rem;
|
|
||||||
$container-min-padding: 1rem;
|
|
||||||
$container-width-threshold: $container-width + 2 * $container-min-padding;
|
|
||||||
$standard-border-width: .075rem;
|
|
||||||
|
|
||||||
$primary-color: #36e281;
|
|
||||||
$border-color: #bfbfbf;
|
|
||||||
$code-color: #f0f0f0;
|
|
||||||
$code-border-color: darken($code-color, 10%);
|
|
||||||
|
|
||||||
$font-heading: "Lora", serif;
|
|
||||||
$font-body: "Raleway", serif;
|
|
||||||
$font-code: "Inconsolata", monospace;
|
|
||||||
|
|
||||||
$standard-border: $standard-border-width solid $border-color;
|
|
||||||
$code-border: $standard-border-width solid $code-border-color;
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
<!DOCTYPE html>
|
|
||||||
<html lang="{{ .Site.Language.Lang }}">
|
|
||||||
{{- partial "head.html" . -}}
|
|
||||||
<body>
|
|
||||||
{{- partial "header.html" . -}}
|
|
||||||
<div class="container"><hr class="header-divider"></div>
|
|
||||||
<main class="container">
|
|
||||||
{{- block "main" . }}{{- end }}
|
|
||||||
</main>
|
|
||||||
{{- partial "footer.html" . -}}
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
{{ define "main" }}
|
|
||||||
<h2>{{ .Title }}</h2>
|
|
||||||
|
|
||||||
<ul class="post-list">
|
|
||||||
{{ range .Pages.ByDate.Reverse }}
|
|
||||||
{{ partial "post.html" . }}
|
|
||||||
{{ end }}
|
|
||||||
</ul>
|
|
||||||
{{ end }}
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
{{ define "main" }}
|
|
||||||
<h2>{{ .Title }}</h2>
|
|
||||||
{{ .Content }}
|
|
||||||
{{ end }}
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
{{ define "main" }}
|
|
||||||
<h2>{{ .Title }}</h2>
|
|
||||||
<div class="post-subscript">
|
|
||||||
<p>
|
|
||||||
{{ range .Params.tags }}
|
|
||||||
<a class="button" href="{{ $.Site.BaseURL }}/tags/{{ . | urlize }}">{{ . }}</a>
|
|
||||||
{{ end }}
|
|
||||||
</p>
|
|
||||||
<p>Posted on {{ .Date.Format "January 2, 2006" }}.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="post-content">
|
|
||||||
{{ if not (eq .TableOfContents "<nav id=\"TableOfContents\"></nav>") }}
|
|
||||||
<div class="table-of-contents">
|
|
||||||
<div class="wrapper">
|
|
||||||
<em>Table of Contents</em>
|
|
||||||
{{ .TableOfContents }}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{{ end }}
|
|
||||||
{{ .Content }}
|
|
||||||
</div>
|
|
||||||
{{ end }}
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
{{ define "main" }}
|
|
||||||
{{ .Content }}
|
|
||||||
|
|
||||||
Recent posts:
|
|
||||||
<ul class="post-list">
|
|
||||||
{{ range first 10 (where (where .Site.Pages.ByDate.Reverse "Section" "blog") ".Kind" "!=" "section") }}
|
|
||||||
{{ partial "post.html" . }}
|
|
||||||
{{ end }}
|
|
||||||
</ul>
|
|
||||||
{{ end }}
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
<head>
|
|
||||||
<meta charset="utf-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
||||||
<meta name="theme-color" content="#1dc868">
|
|
||||||
{{ if .Description }}
|
|
||||||
<meta name="description" content="{{ .Description }}">
|
|
||||||
{{ end }}
|
|
||||||
|
|
||||||
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inconsolata:wght@400;700&family=Raleway&family=Lora&display=block" media="screen">
|
|
||||||
<link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css" media="screen">
|
|
||||||
{{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }}
|
|
||||||
{{ $sidenotes := resources.Get "scss/sidenotes.scss" | resources.ToCSS | resources.Minify }}
|
|
||||||
{{ $code := resources.Get "scss/code.scss" | resources.ToCSS | resources.Minify }}
|
|
||||||
{{ $icon := resources.Get "img/favicon.png" }}
|
|
||||||
{{- partial "sidenotes.html" . -}}
|
|
||||||
<link rel="stylesheet" href="{{ $style.Permalink }}" media="screen">
|
|
||||||
<link rel="stylesheet" href="{{ $sidenotes.Permalink }}" media="screen">
|
|
||||||
<link rel="stylesheet" href="{{ $code.Permalink }}" media="screen">
|
|
||||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.11.1/dist/katex.min.css" integrity="sha384-zB1R0rpPzHqg7Kpt0Aljp8JPLqbXI3bhnPWROx27a9N0Ll6ZP/+DiW/UqRcLbRjq" crossorigin="anonymous" media="screen">
|
|
||||||
<link rel="icon" type="image/png" href="{{ $icon.Permalink }}">
|
|
||||||
|
|
||||||
<title>{{ .Title }}</title>
|
|
||||||
</head>
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
<div class="container">
|
|
||||||
<h1>Daniel's Blog</h1>
|
|
||||||
</div>
|
|
||||||
<nav>
|
|
||||||
<div class="container">
|
|
||||||
<a href="/">Home</a>
|
|
||||||
<a href="/about">About</a>
|
|
||||||
<a href="https://github.com/DanilaFe">GitHub</a>
|
|
||||||
<a href="/Resume-Danila-Fedorin.pdf">Resume</a>
|
|
||||||
<a href="/tags">Tags</a>
|
|
||||||
<a href="/blog">All Posts</a>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
<li>
|
|
||||||
<a href="{{ .Permalink }}" class="post-title">{{ .Title }}</a>
|
|
||||||
<p class="post-wordcount">{{ .WordCount }} words, about {{ .ReadingTime }} minutes to read.</p>
|
|
||||||
<p class="post-preview">{{ .Summary }} . . .</p>
|
|
||||||
</li>
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
<style>
|
|
||||||
.sidenote-checkbox {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
{{- $pctx := . -}}
|
|
||||||
{{- if .IsHome -}}{{ $pctx = .Site }}{{- end -}}
|
|
||||||
{{- $pages := slice -}}
|
|
||||||
{{- if or $.IsHome $.IsSection -}}
|
|
||||||
{{- $pages = $pctx.RegularPages -}}
|
|
||||||
{{- else -}}
|
|
||||||
{{- $pages = $pctx.Pages -}}
|
|
||||||
{{- end -}}
|
|
||||||
{{- $limit := .Site.Config.Services.RSS.Limit -}}
|
|
||||||
{{- if ge $limit 1 -}}
|
|
||||||
{{- $pages = $pages | first $limit -}}
|
|
||||||
{{- end -}}
|
|
||||||
{{- printf "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>" | safeHTML }}
|
|
||||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
|
||||||
<channel>
|
|
||||||
<title>{{ if eq .Title .Site.Title }}{{ .Site.Title }}{{ else }}{{ with .Title }}{{.}} on {{ end }}{{ .Site.Title }}{{ end }}</title>
|
|
||||||
<link>{{ .Permalink }}</link>
|
|
||||||
<description>Recent content {{ if ne .Title .Site.Title }}{{ with .Title }}in {{.}} {{ end }}{{ end }}on {{ .Site.Title }}</description>
|
|
||||||
<generator>Hugo -- gohugo.io</generator>{{ with .Site.LanguageCode }}
|
|
||||||
<language>{{.}}</language>{{end}}{{ with .Site.Author.email }}
|
|
||||||
<managingEditor>{{.}}{{ with $.Site.Author.name }} ({{.}}){{end}}</managingEditor>{{end}}{{ with .Site.Author.email }}
|
|
||||||
<webMaster>{{.}}{{ with $.Site.Author.name }} ({{.}}){{end}}</webMaster>{{end}}{{ with .Site.Copyright }}
|
|
||||||
<copyright>{{.}}</copyright>{{end}}{{ if not .Date.IsZero }}
|
|
||||||
<lastBuildDate>{{ .Date.Format "Mon, 02 Jan 2006 15:04:05 -0700" | safeHTML }}</lastBuildDate>{{ end }}
|
|
||||||
{{ with .OutputFormats.Get "RSS" }}
|
|
||||||
{{ printf "<atom:link href=%q rel=\"self\" type=%q />" .Permalink .MediaType | safeHTML }}
|
|
||||||
{{ end }}
|
|
||||||
{{ range $pages }}
|
|
||||||
<item>
|
|
||||||
<title>{{ .Title }}</title>
|
|
||||||
<link>{{ .Permalink }}</link>
|
|
||||||
<pubDate>{{ .Date.Format "Mon, 02 Jan 2006 15:04:05 -0700" | safeHTML }}</pubDate>
|
|
||||||
{{ with .Site.Author.email }}<author>{{.}}{{ with $.Site.Author.name }} ({{.}}){{end}}</author>{{end}}
|
|
||||||
<guid>{{ .Permalink }}</guid>
|
|
||||||
<description>{{ .Content | html }}</description>
|
|
||||||
</item>
|
|
||||||
{{ end }}
|
|
||||||
</channel>
|
|
||||||
</rss>
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{{ highlight (readFile (printf "code/%s" (.Get 1))) (.Get 0) "" }}
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
{{ $s := (readFile (printf "code/%s" (.Get 1))) }}
|
|
||||||
{{ $t := split $s "\n" }}
|
|
||||||
{{ if not (eq (int (.Get 2)) 1) }}
|
|
||||||
{{ .Scratch.Set "u" (after (sub (int (.Get 2)) 1) $t) }}
|
|
||||||
{{ else }}
|
|
||||||
{{ .Scratch.Set "u" $t }}
|
|
||||||
{{ end }}
|
|
||||||
{{ $v := first (add (sub (int (.Get 3)) (int (.Get 2))) 1) (.Scratch.Get "u") }}
|
|
||||||
{{ if (.Get 4) }}
|
|
||||||
{{ .Scratch.Set "opts" (printf ",%s" (.Get 4)) }}
|
|
||||||
{{ else }}
|
|
||||||
{{ .Scratch.Set "opts" "" }}
|
|
||||||
{{ end }}
|
|
||||||
{{ highlight (delimit $v "\n") (.Get 0) (printf "linenos=table,linenostart=%d%s" (.Get 2) (.Scratch.Get "opts")) }}
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
$$
|
|
||||||
{{ .Inner }}
|
|
||||||
$$
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
{{ .Page.Scratch.Add "numbernote-id" 1 }}
|
|
||||||
{{ $id := .Page.Scratch.Get "numbernote-id" }}
|
|
||||||
<span class="sidenote">
|
|
||||||
<label class="sidenote-label" for="numbernote-{{ $id }}">({{ $id }})</label>
|
|
||||||
<input class="sidenote-checkbox" type="checkbox" id="numbernote-{{ $id }}"></input>
|
|
||||||
<span class="sidenote-content sidenote-{{ .Get 0 }}">
|
|
||||||
<span class="sidenote-delimiter">[note:</span>
|
|
||||||
{{ .Inner }}
|
|
||||||
<span class="sidenote-delimiter">]</span>
|
|
||||||
</span>
|
|
||||||
</span>
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
<pre><code>{{ readFile (printf "code/%s" (.Get 0)) }}</code></pre>
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
<span class="sidenote">
|
|
||||||
<label class="sidenote-label" for="{{ .Get 1 }}">{{ .Get 2 }}</label>
|
|
||||||
<input class="sidenote-checkbox" type="checkbox" id="{{ .Get 1 }}"></input>
|
|
||||||
<span class="sidenote-content sidenote-{{ .Get 0 }}">
|
|
||||||
<span class="sidenote-delimiter">[note:</span>
|
|
||||||
{{ .Inner }}
|
|
||||||
<span class="sidenote-delimiter">]</span>
|
|
||||||
</span>
|
|
||||||
</span>
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
<div style="background-color: tomato; color: white; padding: 10px;">
|
|
||||||
<em>TODO: </em>{{ .Inner }}
|
|
||||||
</div>
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
{{ define "main" }}
|
|
||||||
<h2>Tagged "{{ .Title }}"</h2>
|
|
||||||
|
|
||||||
<ul class="post-list">
|
|
||||||
{{ range .Pages.ByDate.Reverse }}
|
|
||||||
{{ partial "post.html" . }}
|
|
||||||
{{ end }}
|
|
||||||
</ul>
|
|
||||||
{{ end }}
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
{{ define "main" }}
|
|
||||||
<h2>{{ .Title }}</h2>
|
|
||||||
Below is a list of all the tags ever used on this site.
|
|
||||||
|
|
||||||
<ul>
|
|
||||||
{{ range sort .Pages "Title" }}
|
|
||||||
<li><a href="{{ .Permalink }}">{{ .Title }}</a></li>
|
|
||||||
{{ end }}
|
|
||||||
</ul>
|
|
||||||
{{ end }}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
# theme.toml template for a Hugo theme
|
|
||||||
# See https://github.com/gohugoio/hugoThemes#themetoml for an example
|
|
||||||
|
|
||||||
name = "Vanilla"
|
|
||||||
license = "MIT"
|
|
||||||
# licenselink = "https://github.com/yourname/yourtheme/blob/master/LICENSE"
|
|
||||||
# description = ""
|
|
||||||
# homepage = "http://example.com/"
|
|
||||||
# tags = []
|
|
||||||
# features = []
|
|
||||||
min_version = "0.41"
|
|
||||||
|
|
||||||
[author]
|
|
||||||
name = "Danila Fedorin"
|
|
||||||
homepage = "https://danilafe.com"
|
|
||||||
|
|
||||||
# If porting an existing theme
|
|
||||||
# [original]
|
|
||||||
# name = ""
|
|
||||||
# homepage = ""
|
|
||||||
# repo = ""
|
|
||||||
Reference in New Issue
Block a user