67 Commits

Author SHA1 Message Date
49469bdf12 Fix issues in typesafe interpreter article. 2020-08-12 15:43:22 -07:00
020417e971 Add draft of new Idris typechecking post.
This one uses line highlights!
2020-08-12 01:38:38 -07:00
eff0de5330 Allow the codelines shortcode to use hl_lines. 2020-08-12 01:37:55 -07:00
b219f6855e Change highlight color for code. 2020-08-12 01:37:39 -07:00
65215ccdd6 Start working on improving color handling in code. 2020-08-11 19:29:55 -07:00
3e9f6a14f2 Fix single-line scroll bug 2020-08-11 17:43:59 -07:00
7623787b1c Mention Kai's help in time traveling article. 2020-07-30 02:05:43 -07:00
e15daa8f6d Make the detailed time traveling example a subsection. 2020-07-30 01:09:30 -07:00
298cf6599c Publish time traveling post. 2020-07-30 00:58:48 -07:00
841930a8ef Add time traveling code. 2020-07-30 00:57:47 -07:00
9b37e496cb Add figure size classes to global CSS. 2020-07-30 00:57:27 -07:00
58e6ad9e79 Update lazy evaluation post with images and more. 2020-07-30 00:49:35 -07:00
3aa2a6783e Add images to time traveling post. 2020-07-29 20:09:32 -07:00
d64a0d1fcd Add version of typesafe interpreter with tuples. 2020-07-23 16:38:54 -07:00
ba141031dd Remove the tweet shortcode. 2020-07-23 13:50:09 -07:00
ebdc63f5a0 Make small edit to DELL post. 2020-07-23 13:45:24 -07:00
5af0a09714 Publish DELL post. 2020-07-23 13:41:33 -07:00
8a2bc2660c Update date on typesafe interpreter. 2020-07-22 14:38:01 -07:00
e59b8cf403 Edit and publish typesafe interpreter. 2020-07-22 14:35:19 -07:00
b078ef9a22 Remove implicit arguments from TypsafeIntrV2. 2020-07-22 14:30:47 -07:00
fdaec6d5a9 Make small adjustments to backend math post. 2020-07-21 15:34:46 -07:00
b631346379 Publish the mathematics post. 2020-07-21 14:55:52 -07:00
e9f2378b47 Resume working on the draft of time traveling. 2020-07-20 22:32:14 -07:00
7d2f78d25c Add links and make small clarifications. 2020-07-20 13:56:07 -07:00
1f734a613c Add the second part of the typechecking post. 2020-07-19 22:56:44 -07:00
a3c299b057 Start working on the improved type-safe interpreter. 2020-07-19 17:16:31 -07:00
12aedfce92 Make small fixes to math rendering code. 2020-07-19 14:09:24 -07:00
65645346a2 Adjust title in DELL post. 2020-07-18 20:47:38 -07:00
cb65e89e53 Add math rendering draft. 2020-07-18 20:47:16 -07:00
6a2fec8ef4 Update the about page. 2020-07-17 19:39:43 -07:00
aa59c90810 Add the draft of the DELL post. 2020-07-17 19:39:35 -07:00
2b317930a0 Add resume link. 2020-07-15 15:09:37 -07:00
e7d56dd4bd Clean up some styles. 2020-07-15 13:56:03 -07:00
a4fedb276d Adjust margin spacing. 2020-07-15 13:18:34 -07:00
277c0a2ce6 Rework sidenote spacing and TOC. 2020-07-15 13:13:47 -07:00
ef3c61e9e6 Make table of contents dark. 2020-06-30 22:15:22 -07:00
1908126607 Add border to code. 2020-06-30 21:31:16 -07:00
2d77f8489f Move hiding code into margin SCSS. 2020-06-30 21:22:19 -07:00
0371651fdd Fix headings on Starbound post. 2020-06-24 23:01:35 -07:00
01734d24f7 Get started on tables of contents. 2020-06-24 22:46:22 -07:00
71fc0546e0 Move move code into common 'margin node' mixin. 2020-06-24 22:06:08 -07:00
871a745702 Extract margin variables and mixins into separate file. 2020-06-24 14:21:56 -07:00
3f0df8ae0d Add links for 12th part of compiler series. 2020-06-21 22:21:43 -07:00
1746011c16 Publish 12th part of compiler series. 2020-06-21 00:51:04 -07:00
7c4cfbf3d4 Fix typechecking of mutually recursive functions. 2020-06-21 00:47:26 -07:00
8524e098a8 Make proofreading-based fixes. 2020-06-20 23:50:26 -07:00
971f58da9b Finish draft of part 12 of compiler series. 2020-06-20 22:03:57 -07:00
c496be1031 Finish implementation description in part 12. 2020-06-20 20:46:54 -07:00
21851e3a9c Add more content to part 12. 2020-06-19 02:22:08 -07:00
600d5b91ea Remove unneeded parent class. 2020-06-18 23:06:13 -07:00
09b90c3bbc Add line numbers to codelines shortode. 2020-06-18 22:30:01 -07:00
f6ca13d6dc Add more implementation content to part 12. 2020-06-18 22:29:38 -07:00
9c4d7c514f Add more content to post 12 draft. 2020-06-16 23:32:09 -07:00
ad1946e9fb Add first draft of lambdas. 2020-06-14 02:00:20 -07:00
68910458e8 Properly handle null types in pattern typechecking. 2020-06-14 00:43:39 -07:00
240e87eca4 Use mangled names in variable environments. 2020-06-13 23:43:52 -07:00
6b5f7e25b7 Maybe finish the let/in code? 2020-06-01 00:23:41 -07:00
e7229e644f Start working on translation. 2020-05-31 18:52:52 -07:00
08c8aca144 Start working on a lifted version of a definition. 2020-05-31 14:37:33 -07:00
7f8dae74ac Adjust type output. 2020-05-31 00:50:58 -07:00
08503116ff Mark some definitions as global, so as not to capture them. 2020-05-31 00:34:12 -07:00
a1d679a59d No longer destroy the list of free variables.
It so happens that this list will tell us which variables
need to be captured.
2020-05-30 23:29:36 -07:00
4586bd0188 Check for free variables in the environment before generalizing. 2020-05-30 16:40:27 -07:00
a97b50f497 Add parsing of let/in. 2020-05-28 14:44:12 -07:00
c84ff11d0d Add typechecking to let/in expressions. 2020-05-26 00:52:54 -07:00
e966e74487 Extract ordering functionality into definition group. 2020-05-25 23:58:56 -07:00
3865abfb4d Add a struct to contain groups of mutually recursive definitions. 2020-05-25 22:11:45 -07:00
74 changed files with 4039 additions and 397 deletions

View File

@@ -32,6 +32,7 @@ add_executable(compiler
binop.cpp binop.hpp
instruction.cpp instruction.hpp
graph.cpp graph.hpp
global_scope.cpp global_scope.hpp
${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS}
main.cpp

View File

@@ -3,6 +3,7 @@
#include "binop.hpp"
#include "error.hpp"
#include "type_env.hpp"
#include "env.hpp"
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
@@ -13,14 +14,19 @@ void ast_int::print(int indent, std::ostream& to) const {
to << "INT: " << value << std::endl;
}
void ast_int::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
void ast_int::find_free(std::set<std::string>& into) {
}
type_ptr ast_int::typecheck(type_mgr& mgr) {
type_ptr ast_int::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
return type_ptr(new type_app(env->lookup_type("Int")));
}
void ast_int::translate(global_scope& scope) {
}
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushint(value)));
}
@@ -30,20 +36,25 @@ void ast_lid::print(int indent, std::ostream& to) const {
to << "LID: " << id << std::endl;
}
void ast_lid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
if(env->lookup(id) == nullptr) into.insert(id);
void ast_lid::find_free(std::set<std::string>& into) {
into.insert(id);
}
type_ptr ast_lid::typecheck(type_mgr& mgr) {
type_ptr ast_lid::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
return env->lookup(id)->instantiate(mgr);
}
void ast_lid::translate(global_scope& scope) {
}
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
auto mangled_name = this->env->get_mangled_name(id);
into.push_back(instruction_ptr(
env->has_variable(id) ?
(instruction*) new instruction_push(env->get_offset(id)) :
(instruction*) new instruction_pushglobal(id)));
(env->has_variable(mangled_name) && !this->env->is_global(id)) ?
(instruction*) new instruction_push(env->get_offset(mangled_name)) :
(instruction*) new instruction_pushglobal(mangled_name)));
}
void ast_uid::print(int indent, std::ostream& to) const {
@@ -51,16 +62,22 @@ void ast_uid::print(int indent, std::ostream& to) const {
to << "UID: " << id << std::endl;
}
void ast_uid::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
void ast_uid::find_free(std::set<std::string>& into) {
}
type_ptr ast_uid::typecheck(type_mgr& mgr) {
type_ptr ast_uid::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
return env->lookup(id)->instantiate(mgr);
}
void ast_uid::translate(global_scope& scope) {
}
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
into.push_back(instruction_ptr(
new instruction_pushglobal(this->env->get_mangled_name(id))));
}
void ast_binop::print(int indent, std::ostream& to) const {
@@ -70,15 +87,15 @@ void ast_binop::print(int indent, std::ostream& to) const {
right->print(indent + 1, to);
}
void ast_binop::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
left->find_free(mgr, env, into);
right->find_free(mgr, env, into);
void ast_binop::find_free(std::set<std::string>& into) {
left->find_free(into);
right->find_free(into);
}
type_ptr ast_binop::typecheck(type_mgr& mgr) {
type_ptr ltype = left->typecheck(mgr);
type_ptr rtype = right->typecheck(mgr);
type_ptr ast_binop::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
@@ -90,6 +107,11 @@ type_ptr ast_binop::typecheck(type_mgr& mgr) {
return return_type;
}
void ast_binop::translate(global_scope& scope) {
left->translate(scope);
right->translate(scope);
}
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
@@ -106,15 +128,15 @@ void ast_app::print(int indent, std::ostream& to) const {
right->print(indent + 1, to);
}
void ast_app::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
left->find_free(mgr, env, into);
right->find_free(mgr, env, into);
void ast_app::find_free(std::set<std::string>& into) {
left->find_free(into);
right->find_free(into);
}
type_ptr ast_app::typecheck(type_mgr& mgr) {
type_ptr ltype = left->typecheck(mgr);
type_ptr rtype = right->typecheck(mgr);
type_ptr ast_app::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr return_type = mgr.new_type();
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
@@ -122,6 +144,11 @@ type_ptr ast_app::typecheck(type_mgr& mgr) {
return return_type;
}
void ast_app::translate(global_scope& scope) {
left->translate(scope);
right->translate(scope);
}
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
@@ -139,24 +166,30 @@ void ast_case::print(int indent, std::ostream& to) const {
}
}
void ast_case::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into) {
this->env = env;
of->find_free(mgr, env, into);
void ast_case::find_free(std::set<std::string>& into) {
of->find_free(into);
for(auto& branch : branches) {
type_env_ptr new_env = type_scope(env);
branch->pat->insert_bindings(mgr, new_env);
branch->expr->find_free(mgr, new_env, into);
std::set<std::string> free_in_branch;
std::set<std::string> pattern_variables;
branch->pat->find_variables(pattern_variables);
branch->expr->find_free(free_in_branch);
for(auto& free : free_in_branch) {
if(pattern_variables.find(free) == pattern_variables.end())
into.insert(free);
}
}
}
type_ptr ast_case::typecheck(type_mgr& mgr) {
type_ptr ast_case::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
type_var* var;
type_ptr case_type = mgr.resolve(of->typecheck(mgr), var);
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
type_ptr branch_type = mgr.new_type();
for(auto& branch : branches) {
branch->pat->typecheck(case_type, mgr, branch->expr->env);
type_ptr curr_branch_type = branch->expr->typecheck(mgr);
type_env_ptr new_env = type_scope(env);
branch->pat->typecheck(case_type, mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
mgr.unify(branch_type, curr_branch_type);
}
@@ -170,6 +203,13 @@ type_ptr ast_case::typecheck(type_mgr& mgr) {
return branch_type;
}
void ast_case::translate(global_scope& scope) {
of->translate(scope);
for(auto& branch : branches) {
branch->expr->translate(scope);
}
}
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
@@ -199,7 +239,7 @@ void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) c
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
env_ptr new_env = env;
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
new_env = env_ptr(new env_var(branch->expr->env->get_mangled_name(*it), new_env));
}
branch_instructions.push_back(instruction_ptr(new instruction_split(
@@ -226,16 +266,145 @@ void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) c
}
}
void ast_let::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "LET: " << std::endl;
in->print(indent + 1, to);
}
void ast_let::find_free(std::set<std::string>& into) {
definitions.find_free(into);
std::set<std::string> all_free;
in->find_free(all_free);
for(auto& free_var : all_free) {
if(definitions.defs_defn.find(free_var) == definitions.defs_defn.end())
into.insert(free_var);
}
}
type_ptr ast_let::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
definitions.typecheck(mgr, env);
return in->typecheck(mgr, definitions.env);
}
void ast_let::translate(global_scope& scope) {
for(auto& def : definitions.defs_data) {
def.second->into_globals(scope);
}
for(auto& def : definitions.defs_defn) {
size_t original_params = def.second->params.size();
std::string original_name = def.second->name;
auto& global_definition = def.second->into_global(scope);
size_t captured = global_definition.params.size() - original_params;
type_env_ptr mangled_env = type_scope(env);
mangled_env->bind(def.first, env->lookup(def.first), visibility::global);
mangled_env->set_mangled_name(def.first, global_definition.name);
ast_ptr global_app(new ast_lid(original_name));
global_app->env = mangled_env;
for(auto& param : global_definition.params) {
if(!(captured--)) break;
ast_ptr new_arg(new ast_lid(param));
new_arg->env = env;
global_app = ast_ptr(new ast_app(std::move(global_app), std::move(new_arg)));
global_app->env = env;
}
translated_definitions.push_back({ def.first, std::move(global_app) });
}
in->translate(scope);
}
void ast_let::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_alloc(translated_definitions.size())));
env_ptr new_env = env;
for(auto& def : translated_definitions) {
new_env = env_ptr(new env_var(definitions.env->get_mangled_name(def.first), std::move(new_env)));
}
int offset = translated_definitions.size() - 1;
for(auto& def : translated_definitions) {
def.second->compile(new_env, into);
into.push_back(instruction_ptr(new instruction_update(offset--)));
}
in->compile(new_env, into);
into.push_back(instruction_ptr(new instruction_slide(translated_definitions.size())));
}
void ast_lambda::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "LAMBDA";
for(auto& param : params) {
to << " " << param;
}
to << std::endl;
body->print(indent+1, to);
}
void ast_lambda::find_free(std::set<std::string>& into) {
body->find_free(free_variables);
for(auto& param : params) {
free_variables.erase(param);
}
into.insert(free_variables.begin(), free_variables.end());
}
type_ptr ast_lambda::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
var_env = type_scope(env);
type_ptr return_type = mgr.new_type();
type_ptr full_type = return_type;
for(auto it = params.rbegin(); it != params.rend(); it++) {
type_ptr param_type = mgr.new_type();
var_env->bind(*it, param_type);
full_type = type_ptr(new type_arr(std::move(param_type), full_type));
}
mgr.unify(return_type, body->typecheck(mgr, var_env));
return full_type;
}
void ast_lambda::translate(global_scope& scope) {
std::vector<std::string> function_params;
for(auto& free_variable : free_variables) {
if(env->is_global(free_variable)) continue;
function_params.push_back(free_variable);
}
size_t captured_count = function_params.size();
function_params.insert(function_params.end(), params.begin(), params.end());
auto& new_function = scope.add_function("lambda", std::move(function_params), std::move(body));
type_env_ptr mangled_env = type_scope(env);
mangled_env->bind("lambda", type_scheme_ptr(nullptr), visibility::global);
mangled_env->set_mangled_name("lambda", new_function.name);
ast_ptr new_application = ast_ptr(new ast_lid("lambda"));
new_application->env = mangled_env;
for(auto& param : new_function.params) {
if(!(captured_count--)) break;
ast_ptr new_arg = ast_ptr(new ast_lid(param));
new_arg->env = env;
new_application = ast_ptr(new ast_app(std::move(new_application), std::move(new_arg)));
new_application->env = env;
}
translated = std::move(new_application);
}
void ast_lambda::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
translated->compile(env, into);
}
void pattern_var::print(std::ostream& to) const {
to << var;
}
void pattern_var::insert_bindings(type_mgr& mgr, type_env_ptr& env) const {
env->bind(var, mgr.new_type());
void pattern_var::find_variables(std::set<std::string>& into) const {
into.insert(var);
}
void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
mgr.unify(env->lookup(var)->instantiate(mgr), t);
env->bind(var, t);
}
void pattern_constr::print(std::ostream& to) const {
@@ -245,23 +414,22 @@ void pattern_constr::print(std::ostream& to) const {
}
}
void pattern_constr::insert_bindings(type_mgr& mgr, type_env_ptr& env) const {
for(auto& param : params) {
env->bind(param, mgr.new_type());
}
void pattern_constr::find_variables(std::set<std::string>& into) const {
into.insert(params.begin(), params.end());
}
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
type_ptr constructor_type = env->lookup(constr)->instantiate(mgr);
if(!constructor_type) {
type_scheme_ptr constructor_type_scheme = env->lookup(constr);
if(!constructor_type_scheme) {
throw type_error(std::string("pattern using unknown constructor ") + constr);
}
type_ptr constructor_type = constructor_type_scheme->instantiate(mgr);
for(auto& param : params) {
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
if(!arr) throw type_error("too many parameters in constructor pattern");
mgr.unify(env->lookup(param)->instantiate(mgr), arr->left);
env->bind(param, arr->left);
constructor_type = arr->right;
}

View File

@@ -7,6 +7,8 @@
#include "binop.hpp"
#include "instruction.hpp"
#include "env.hpp"
#include "definition.hpp"
#include "global_scope.hpp"
struct ast {
type_env_ptr env;
@@ -14,9 +16,9 @@ struct ast {
virtual ~ast() = default;
virtual void print(int indent, std::ostream& to) const = 0;
virtual void find_free(type_mgr& mgr,
type_env_ptr& env, std::set<std::string>& into) = 0;
virtual type_ptr typecheck(type_mgr& mgr) = 0;
virtual void find_free(std::set<std::string>& into) = 0;
virtual type_ptr typecheck(type_mgr& mgr, type_env_ptr& env) = 0;
virtual void translate(global_scope& scope) = 0;
virtual void compile(const env_ptr& env,
std::vector<instruction_ptr>& into) const = 0;
};
@@ -27,7 +29,7 @@ struct pattern {
virtual ~pattern() = default;
virtual void print(std::ostream& to) const = 0;
virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const = 0;
virtual void find_variables(std::set<std::string>& into) const = 0;
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0;
};
@@ -50,8 +52,9 @@ struct ast_int : public ast {
: value(v) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@@ -62,8 +65,9 @@ struct ast_lid : public ast {
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@@ -74,8 +78,9 @@ struct ast_uid : public ast {
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@@ -88,8 +93,9 @@ struct ast_binop : public ast {
: op(o), left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@@ -101,8 +107,9 @@ struct ast_app : public ast {
: left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@@ -115,8 +122,46 @@ struct ast_case : public ast {
: of(std::move(o)), branches(std::move(b)) {}
void print(int indent, std::ostream& to) const;
void find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr);
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_let : public ast {
using basic_definition = std::pair<std::string, ast_ptr>;
definition_group definitions;
ast_ptr in;
std::vector<basic_definition> translated_definitions;
ast_let(definition_group g, ast_ptr i)
: definitions(std::move(g)), in(std::move(i)) {}
void print(int indent, std::ostream& to) const;
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_lambda : public ast {
std::vector<std::string> params;
ast_ptr body;
type_env_ptr var_env;
std::set<std::string> free_variables;
ast_ptr translated;
ast_lambda(std::vector<std::string> ps, ast_ptr b)
: params(std::move(ps)), body(std::move(b)) {}
void print(int indent, std::ostream& to) const;
void find_free(std::set<std::string>& into);
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
void translate(global_scope& scope);
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
@@ -127,7 +172,7 @@ struct pattern_var : public pattern {
: var(std::move(v)) {}
void print(std::ostream &to) const;
void insert_bindings(type_mgr& mgr, type_env_ptr& env) const;
void find_variables(std::set<std::string>& into) const;
void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
};
@@ -139,6 +184,6 @@ struct pattern_constr : public pattern {
: constr(std::move(c)), params(std::move(p)) {}
void print(std::ostream &to) const;
virtual void insert_bindings(type_mgr& mgr, type_env_ptr& env) const;
void find_variables(std::set<std::string>& into) const;
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
};

View File

@@ -5,13 +5,20 @@
#include "llvm_context.hpp"
#include "type.hpp"
#include "type_env.hpp"
#include "graph.hpp"
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Type.h>
void definition_defn::find_free(type_mgr& mgr, type_env_ptr& env) {
this->env = env;
void definition_defn::find_free() {
body->find_free(free_variables);
for(auto& param : params) {
free_variables.erase(param);
}
}
void definition_defn::insert_types(type_mgr& mgr, type_env_ptr& env, visibility v) {
this->env = env;
var_env = type_scope(env);
return_type = mgr.new_type();
full_type = return_type;
@@ -21,39 +28,24 @@ void definition_defn::find_free(type_mgr& mgr, type_env_ptr& env) {
full_type = type_ptr(new type_arr(param_type, full_type));
var_env->bind(*it, param_type);
}
body->find_free(mgr, var_env, free_variables);
}
void definition_defn::insert_types(type_mgr& mgr) {
env->bind(name, full_type);
env->bind(name, full_type, v);
}
void definition_defn::typecheck(type_mgr& mgr) {
type_ptr body_type = body->typecheck(mgr);
type_ptr body_type = body->typecheck(mgr, var_env);
mgr.unify(return_type, body_type);
}
void definition_defn::compile() {
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
for(auto it = params.rbegin(); it != params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
body->compile(new_env, instructions);
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
}
void definition_defn::declare_llvm(llvm_context& ctx) {
generated_function = ctx.create_custom_function(name, params.size());
}
void definition_defn::generate_llvm(llvm_context& ctx) {
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
for(auto& instruction : instructions) {
instruction->gen_llvm(ctx, generated_function);
global_function& definition_defn::into_global(global_scope& scope) {
std::vector<std::string> all_params;
for(auto& free : free_variables) {
if(env->is_global(free)) continue;
all_params.push_back(free);
}
ctx.builder.CreateRetVoid();
all_params.insert(all_params.end(), params.begin(), params.end());
body->translate(scope);
return scope.add_function(name, std::move(all_params), std::move(body));
}
void definition_data::insert_types(type_env_ptr& env) {
@@ -91,19 +83,63 @@ void definition_data::insert_constructors() const {
}
}
void definition_data::generate_llvm(llvm_context& ctx) {
void definition_data::into_globals(global_scope& scope) {
for(auto& constructor : constructors) {
auto new_function =
ctx.create_custom_function(constructor->name, constructor->types.size());
std::vector<instruction_ptr> instructions;
instructions.push_back(instruction_ptr(
new instruction_pack(constructor->tag, constructor->types.size())
));
instructions.push_back(instruction_ptr(new instruction_update(0)));
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
for (auto& instruction : instructions) {
instruction->gen_llvm(ctx, new_function);
}
ctx.builder.CreateRetVoid();
global_constructor& c = scope.add_constructor(
constructor->name, constructor->tag, constructor->types.size());
env->set_mangled_name(constructor->name, c.name);
}
}
void definition_group::find_free(std::set<std::string>& into) {
for(auto& def_pair : defs_defn) {
def_pair.second->find_free();
for(auto& free_var : def_pair.second->free_variables) {
if(defs_defn.find(free_var) == defs_defn.end()) {
into.insert(free_var);
} else {
def_pair.second->nearby_variables.insert(free_var);
}
}
}
}
void definition_group::typecheck(type_mgr& mgr, type_env_ptr& env) {
this->env = type_scope(env);
for(auto& def_data : defs_data) {
def_data.second->insert_types(this->env);
}
for(auto& def_data : defs_data) {
def_data.second->insert_constructors();
}
function_graph dependency_graph;
for(auto& def_defn : defs_defn) {
def_defn.second->find_free();
dependency_graph.add_function(def_defn.second->name);
for(auto& dependency : def_defn.second->nearby_variables) {
if(defs_defn.find(dependency) == defs_defn.end())
throw 0;
dependency_graph.add_edge(def_defn.second->name, dependency);
}
}
std::vector<group_ptr> groups = dependency_graph.compute_order();
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
auto& group = *it;
for(auto& def_defnn_name : group->members) {
auto& def_defn = defs_defn.find(def_defnn_name)->second;
def_defn->insert_types(mgr, this->env, vis);
}
for(auto& def_defnn_name : group->members) {
auto& def_defn = defs_defn.find(def_defnn_name)->second;
def_defn->typecheck(mgr);
}
for(auto& def_defnn_name : group->members) {
this->env->generalize(def_defnn_name, *group, mgr);
}
}
}

View File

@@ -1,11 +1,13 @@
#pragma once
#include <memory>
#include <vector>
#include <map>
#include <set>
#include "instruction.hpp"
#include "llvm_context.hpp"
#include "parsed_type.hpp"
#include "type_env.hpp"
#include "global_scope.hpp"
struct ast;
using ast_ptr = std::unique_ptr<ast>;
@@ -29,24 +31,20 @@ struct definition_defn {
type_env_ptr env;
type_env_ptr var_env;
std::set<std::string> free_variables;
std::set<std::string> nearby_variables;
type_ptr full_type;
type_ptr return_type;
std::vector<instruction_ptr> instructions;
llvm::Function* generated_function;
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
}
void find_free(type_mgr& mgr, type_env_ptr& env);
void insert_types(type_mgr& mgr);
void find_free();
void insert_types(type_mgr& mgr, type_env_ptr& env, visibility v);
void typecheck(type_mgr& mgr);
void compile();
void declare_llvm(llvm_context& ctx);
void generate_llvm(llvm_context& ctx);
global_function& into_global(global_scope& scope);
};
using definition_defn_ptr = std::unique_ptr<definition_defn>;
@@ -66,7 +64,20 @@ struct definition_data {
void insert_types(type_env_ptr& env);
void insert_constructors() const;
void generate_llvm(llvm_context& ctx);
void into_globals(global_scope& scope);
};
using definition_data_ptr = std::unique_ptr<definition_data>;
struct definition_group {
std::map<std::string, definition_data_ptr> defs_data;
std::map<std::string, definition_defn_ptr> defs_defn;
visibility vis;
type_env_ptr env;
definition_group(visibility v = visibility::local) : vis(v) {}
void find_free(std::set<std::string>& into);
void typecheck(type_mgr& mgr, type_env_ptr& env);
};

View File

@@ -15,7 +15,7 @@ struct env_var : public env {
std::string name;
env_ptr parent;
env_var(std::string& n, env_ptr p)
env_var(std::string n, env_ptr p)
: name(std::move(n)), parent(std::move(p)) {}
int get_offset(const std::string& name) const;

View File

@@ -0,0 +1,17 @@
data List a = { Nil, Cons a (List a) }
defn fix f = { let { defn x = { f x } } in { x } }
defn fixpointOnes fo = { Cons 1 fo }
defn sumTwo l = {
case l of {
Nil -> { 0 }
Cons x xs -> {
x + case xs of {
Nil -> { 0 }
Cons y ys -> { y }
}
}
}
}
defn main = { sumTwo (fix fixpointOnes) }

View File

@@ -0,0 +1,19 @@
data List a = { Nil, Cons a (List a) }
defn sum l = {
case l of {
Nil -> { 0 }
Cons x xs -> { x + sum xs}
}
}
defn map f l = {
case l of {
Nil -> { Nil }
Cons x xs -> { Cons (f x) (map f xs) }
}
}
defn main = {
sum (map \x -> { x * x } (map (\x -> { x + x }) (Cons 1 (Cons 2 (Cons 3 Nil)))))
}

View File

@@ -0,0 +1,47 @@
data Bool = { True, False }
data List a = { Nil, Cons a (List a) }
defn if c t e = {
case c of {
True -> { t }
False -> { e }
}
}
defn mergeUntil l r p = {
let {
defn mergeLeft nl nr = {
case nl of {
Nil -> { Nil }
Cons x xs -> { if (p x) (Cons x (mergeRight xs nr)) Nil }
}
}
defn mergeRight nl nr = {
case nr of {
Nil -> { Nil }
Cons x xs -> { if (p x) (Cons x (mergeLeft nl xs)) Nil }
}
}
} in {
mergeLeft l r
}
}
defn const x y = { x }
defn sum l = {
case l of {
Nil -> { 0 }
Cons x xs -> { x + sum xs }
}
}
defn main = {
let {
defn firstList = { Cons 1 (Cons 3 (Cons 5 Nil)) }
defn secondList = { Cons 2 (Cons 4 (Cons 6 Nil)) }
} in {
sum (mergeUntil firstList secondList (const True))
}
}

View File

@@ -0,0 +1,23 @@
data Pair a b = { Pair a b }
defn packer = {
let {
data Packed a = { Packed a }
defn pack a = { Packed a }
defn unpack p = {
case p of {
Packed a -> { a }
}
}
} in {
Pair pack unpack
}
}
defn main = {
case packer of {
Pair pack unpack -> {
unpack (pack 3)
}
}
}

View File

@@ -0,0 +1,83 @@
#include "global_scope.hpp"
#include "ast.hpp"
void global_function::compile() {
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
for(auto it = params.rbegin(); it != params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
body->compile(new_env, instructions);
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
}
void global_function::declare_llvm(llvm_context& ctx) {
generated_function = ctx.create_custom_function(name, params.size());
}
void global_function::generate_llvm(llvm_context& ctx) {
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
for(auto& instruction : instructions) {
instruction->gen_llvm(ctx, generated_function);
}
ctx.builder.CreateRetVoid();
}
void global_constructor::generate_llvm(llvm_context& ctx) {
auto new_function =
ctx.create_custom_function(name, arity);
std::vector<instruction_ptr> instructions;
instructions.push_back(instruction_ptr(new instruction_pack(tag, arity)));
instructions.push_back(instruction_ptr(new instruction_update(0)));
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
for (auto& instruction : instructions) {
instruction->gen_llvm(ctx, new_function);
}
ctx.builder.CreateRetVoid();
}
global_function& global_scope::add_function(std::string n, std::vector<std::string> ps, ast_ptr b) {
global_function* new_function = new global_function(mangle_name(n), std::move(ps), std::move(b));
functions.push_back(global_function_ptr(new_function));
return *new_function;
}
global_constructor& global_scope::add_constructor(std::string n, int8_t t, size_t a) {
global_constructor* new_constructor = new global_constructor(mangle_name(n), t, a);
constructors.push_back(global_constructor_ptr(new_constructor));
return *new_constructor;
}
void global_scope::compile() {
for(auto& function : functions) {
function->compile();
}
}
void global_scope::generate_llvm(llvm_context& ctx) {
for(auto& constructor : constructors) {
constructor->generate_llvm(ctx);
}
for(auto& function : functions) {
function->declare_llvm(ctx);
}
for(auto& function : functions) {
function->generate_llvm(ctx);
}
}
std::string global_scope::mangle_name(const std::string& n) {
auto occurence_it = occurence_count.find(n);
int occurence = 0;
if(occurence_it != occurence_count.end()) {
occurence = occurence_it->second + 1;
}
occurence_count[n] = occurence;
std::string final_name = n;
if (occurence != 0) {
final_name += "_";
final_name += std::to_string(occurence);
}
return final_name;
}

View File

@@ -0,0 +1,55 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
#include <llvm/IR/Function.h>
#include "instruction.hpp"
struct ast;
using ast_ptr = std::unique_ptr<ast>;
struct global_function {
std::string name;
std::vector<std::string> params;
ast_ptr body;
std::vector<instruction_ptr> instructions;
llvm::Function* generated_function;
global_function(std::string n, std::vector<std::string> ps, ast_ptr b)
: name(std::move(n)), params(std::move(ps)), body(std::move(b)) {}
void compile();
void declare_llvm(llvm_context& ctx);
void generate_llvm(llvm_context& ctx);
};
using global_function_ptr = std::unique_ptr<global_function>;
struct global_constructor {
std::string name;
int8_t tag;
size_t arity;
global_constructor(std::string n, int8_t t, size_t a)
: name(std::move(n)), tag(t), arity(a) {}
void generate_llvm(llvm_context& ctx);
};
using global_constructor_ptr = std::unique_ptr<global_constructor>;
struct global_scope {
std::map<std::string, int> occurence_count;
std::vector<global_function_ptr> functions;
std::vector<global_constructor_ptr> constructors;
global_function& add_function(std::string n, std::vector<std::string> ps, ast_ptr b);
global_constructor& add_constructor(std::string n, int8_t t, size_t a);
void compile();
void generate_llvm(llvm_context& ctx);
private:
std::string mangle_name(const std::string& n);
};

View File

@@ -7,7 +7,6 @@
#include <map>
#include <memory>
#include <vector>
#include <iostream>
using function = std::string;

View File

@@ -21,12 +21,10 @@ void yy::parser::error(const std::string& msg) {
std::cout << "An error occured: " << msg << std::endl;
}
extern std::map<std::string, definition_data_ptr> defs_data;
extern std::map<std::string, definition_defn_ptr> defs_defn;
extern definition_group global_defs;
void typecheck_program(
const std::map<std::string, definition_data_ptr>& defs_data,
const std::map<std::string, definition_defn_ptr>& defs_defn,
definition_group& defs,
type_mgr& mgr, type_env_ptr& env) {
type_ptr int_type = type_ptr(new type_base("Int"));
env->bind_type("Int", int_type);
@@ -35,63 +33,32 @@ void typecheck_program(
type_ptr binop_type = type_ptr(new type_arr(
int_type_app,
type_ptr(new type_arr(int_type_app, int_type_app))));
env->bind("+", binop_type);
env->bind("-", binop_type);
env->bind("*", binop_type);
env->bind("/", binop_type);
env->bind("+", binop_type, visibility::global);
env->bind("-", binop_type, visibility::global);
env->bind("*", binop_type, visibility::global);
env->bind("/", binop_type, visibility::global);
for(auto& def_data : defs_data) {
def_data.second->insert_types(env);
}
for(auto& def_data : defs_data) {
def_data.second->insert_constructors();
}
std::set<std::string> free;
defs.find_free(free);
defs.typecheck(mgr, env);
function_graph dependency_graph;
for(auto& def_defn : defs_defn) {
def_defn.second->find_free(mgr, env);
dependency_graph.add_function(def_defn.second->name);
for(auto& dependency : def_defn.second->free_variables) {
if(defs_defn.find(dependency) == defs_defn.end())
throw 0;
dependency_graph.add_edge(def_defn.second->name, dependency);
}
}
std::vector<group_ptr> groups = dependency_graph.compute_order();
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
auto& group = *it;
for(auto& def_defnn_name : group->members) {
auto& def_defn = defs_defn.find(def_defnn_name)->second;
def_defn->insert_types(mgr);
}
for(auto& def_defnn_name : group->members) {
auto& def_defn = defs_defn.find(def_defnn_name)->second;
def_defn->typecheck(mgr);
}
for(auto& def_defnn_name : group->members) {
env->generalize(def_defnn_name, mgr);
}
}
for(auto& pair : env->names) {
for(auto& pair : defs.env->names) {
std::cout << pair.first << ": ";
pair.second->print(mgr, std::cout);
pair.second.type->print(mgr, std::cout);
std::cout << std::endl;
}
}
void compile_program(const std::map<std::string, definition_defn_ptr>& defs_defn) {
for(auto& def_defn : defs_defn) {
def_defn.second->compile();
for(auto& instruction : def_defn.second->instructions) {
instruction->print(0, std::cout);
}
std::cout << std::endl;
global_scope translate_program(definition_group& group) {
global_scope scope;
for(auto& data : group.defs_data) {
data.second->into_globals(scope);
}
for(auto& defn : group.defs_defn) {
auto& function = defn.second->into_global(scope);
function.body->env->parent->set_mangled_name(defn.first, function.name);
}
return scope;
}
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
@@ -151,24 +118,14 @@ void output_llvm(llvm_context& ctx, const std::string& filename) {
}
}
void gen_llvm(
const std::map<std::string, definition_data_ptr>& defs_data,
const std::map<std::string, definition_defn_ptr>& defs_defn) {
void gen_llvm(global_scope& scope) {
llvm_context ctx;
gen_llvm_internal_op(ctx, PLUS);
gen_llvm_internal_op(ctx, MINUS);
gen_llvm_internal_op(ctx, TIMES);
gen_llvm_internal_op(ctx, DIVIDE);
for(auto& def_data : defs_data) {
def_data.second->generate_llvm(ctx);
}
for(auto& def_defn : defs_defn) {
def_defn.second->declare_llvm(ctx);
}
for(auto& def_defn : defs_defn) {
def_defn.second->generate_llvm(ctx);
}
scope.generate_llvm(ctx);
ctx.module.print(llvm::outs(), nullptr);
output_llvm(ctx, "program.o");
@@ -180,7 +137,7 @@ int main() {
type_env_ptr env(new type_env);
parser.parse();
for(auto& def_defn : defs_defn) {
for(auto& def_defn : global_defs.defs_defn) {
std::cout << def_defn.second->name;
for(auto& param : def_defn.second->params) std::cout << " " << param;
std::cout << ":" << std::endl;
@@ -188,9 +145,10 @@ int main() {
def_defn.second->body->print(1, std::cout);
}
try {
typecheck_program(defs_data, defs_defn, mgr, env);
compile_program(defs_defn);
gen_llvm(defs_data, defs_defn);
typecheck_program(global_defs, mgr, env);
global_scope scope = translate_program(global_defs);
scope.compile();
gen_llvm(scope);
} catch(unification_error& err) {
std::cout << "failed to unify types: " << std::endl;
std::cout << " (1) \033[34m";

View File

@@ -7,13 +7,13 @@
#include "parser.hpp"
#include "parsed_type.hpp"
std::map<std::string, definition_data_ptr> defs_data;
std::map<std::string, definition_defn_ptr> defs_defn;
definition_group global_defs;
extern yy::parser::symbol_type yylex();
%}
%token BACKSLASH
%token PLUS
%token TIMES
%token MINUS
@@ -23,6 +23,8 @@ extern yy::parser::symbol_type yylex();
%token DATA
%token CASE
%token OF
%token LET
%token IN
%token OCURLY
%token CCURLY
%token OPAREN
@@ -41,8 +43,9 @@ extern yy::parser::symbol_type yylex();
%type <std::vector<branch_ptr>> branches
%type <std::vector<constructor_ptr>> constructors
%type <std::vector<parsed_type_ptr>> typeList
%type <definition_group> definitions
%type <parsed_type_ptr> type nonArrowType typeListElement
%type <ast_ptr> aAdd aMul case app appBase
%type <ast_ptr> aAdd aMul case let lambda app appBase
%type <definition_data_ptr> data
%type <definition_defn_ptr> defn
%type <branch_ptr> branch
@@ -54,17 +57,13 @@ extern yy::parser::symbol_type yylex();
%%
program
: definitions { }
: definitions { global_defs = std::move($1); global_defs.vis = visibility::global; }
;
definitions
: definitions definition { }
| definition { }
;
definition
: defn { auto name = $1->name; defs_defn[name] = std::move($1); }
| data { auto name = $1->name; defs_data[name] = std::move($1); }
: definitions defn { $$ = std::move($1); auto name = $2->name; $$.defs_defn[name] = std::move($2); }
| definitions data { $$ = std::move($1); auto name = $2->name; $$.defs_data[name] = std::move($2); }
| %empty { $$ = definition_group(); }
;
defn
@@ -101,6 +100,18 @@ appBase
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
| OPAREN aAdd CPAREN { $$ = std::move($2); }
| case { $$ = std::move($1); }
| let { $$ = std::move($1); }
| lambda { $$ = std::move($1); }
;
let
: LET OCURLY definitions CCURLY IN OCURLY aAdd CCURLY
{ $$ = ast_ptr(new ast_let(std::move($3), std::move($7))); }
;
lambda
: BACKSLASH lowercaseParams ARROW OCURLY aAdd CCURLY
{ $$ = ast_ptr(new ast_lambda(std::move($2), std::move($5))); }
;
case

View File

@@ -13,6 +13,7 @@
%%
[ \n]+ {}
\\ { return yy::parser::make_BACKSLASH(); }
\+ { return yy::parser::make_PLUS(); }
\* { return yy::parser::make_TIMES(); }
- { return yy::parser::make_MINUS(); }
@@ -22,6 +23,8 @@ defn { return yy::parser::make_DEFN(); }
data { return yy::parser::make_DATA(); }
case { return yy::parser::make_CASE(); }
of { return yy::parser::make_OF(); }
let { return yy::parser::make_LET(); }
in { return yy::parser::make_IN(); }
\{ { return yy::parser::make_OCURLY(); }
\} { return yy::parser::make_CCURLY(); }
\( { return yy::parser::make_OPAREN(); }

View File

@@ -5,6 +5,8 @@
#include <vector>
#include "error.hpp"
bool type::is_arrow(const type_mgr& mgr) const { return false; }
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
if(forall.size() != 0) {
to << "forall ";
@@ -34,20 +36,35 @@ void type_var::print(const type_mgr& mgr, std::ostream& to) const {
}
}
bool type_var::is_arrow(const type_mgr& mgr) const {
auto it = mgr.types.find(name);
if(it != mgr.types.end()) {
return it->second->is_arrow(mgr);
} else {
return false;
}
}
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
to << name;
}
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
bool print_parenths = left->is_arrow(mgr);
if(print_parenths) to << "(";
left->print(mgr, to);
to << " -> (";
if(print_parenths) to << ")";
to << " -> ";
right->print(mgr, to);
to << ")";
}
bool type_arr::is_arrow(const type_mgr& mgr) const {
return true;
}
void type_app::print(const type_mgr& mgr, std::ostream& to) const {
constructor->print(mgr, to);
to << "* ";
to << "*";
for(auto& arg : arguments) {
to << " ";
arg->print(mgr, to);
@@ -185,3 +202,18 @@ void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
for(auto& arg : app->arguments) find_free(arg, into);
}
}
void type_mgr::find_free(const type_scheme_ptr& t, std::set<std::string>& into) const {
std::set<std::string> monotype_free;
type_mgr limited_mgr;
for(auto& binding : types) {
auto existing_position = std::find(t->forall.begin(), t->forall.end(), binding.first);
if(existing_position != t->forall.end()) continue;
limited_mgr.types[binding.first] = binding.second;
}
limited_mgr.find_free(t->monotype, monotype_free);
for(auto& not_free : t->forall) {
monotype_free.erase(not_free);
}
into.insert(monotype_free.begin(), monotype_free.end());
}

View File

@@ -11,6 +11,7 @@ struct type {
virtual ~type() = default;
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
virtual bool is_arrow(const type_mgr& mgr) const;
};
using type_ptr = std::shared_ptr<type>;
@@ -34,6 +35,7 @@ struct type_var : public type {
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
bool is_arrow(const type_mgr& mgr) const;
};
struct type_base : public type {
@@ -65,6 +67,7 @@ struct type_arr : public type {
: left(std::move(l)), right(std::move(r)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
bool is_arrow(const type_mgr& mgr) const;
};
struct type_app : public type {
@@ -92,4 +95,5 @@ struct type_mgr {
type_ptr resolve(type_ptr t, type_var*& var) const;
void bind(const std::string& s, type_ptr t);
void find_free(const type_ptr& t, std::set<std::string>& into) const;
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;
};

View File

@@ -1,13 +1,49 @@
#include "type_env.hpp"
#include "type.hpp"
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const {
if(parent != nullptr) parent->find_free(mgr, into);
for(auto& binding : names) {
mgr.find_free(binding.second.type, into);
}
}
void type_env::find_free_except(const type_mgr& mgr, const group& avoid,
std::set<std::string>& into) const {
if(parent != nullptr) parent->find_free(mgr, into);
for(auto& binding : names) {
if(avoid.members.find(binding.first) != avoid.members.end()) continue;
mgr.find_free(binding.second.type, into);
}
}
type_scheme_ptr type_env::lookup(const std::string& name) const {
auto it = names.find(name);
if(it != names.end()) return it->second;
if(it != names.end()) return it->second.type;
if(parent) return parent->lookup(name);
return nullptr;
}
bool type_env::is_global(const std::string& name) const {
auto it = names.find(name);
if(it != names.end()) return it->second.vis == visibility::global;
if(parent) return parent->is_global(name);
return false;
}
void type_env::set_mangled_name(const std::string& name, const std::string& mangled) {
auto it = names.find(name);
if(it != names.end()) it->second.mangled_name = mangled;
}
const std::string& type_env::get_mangled_name(const std::string& name) const {
auto it = names.find(name);
if(it != names.end())
return (it->second.mangled_name != "") ? it->second.mangled_name : name;
if(parent) return parent->get_mangled_name(name);
return name;
}
type_ptr type_env::lookup_type(const std::string& name) const {
auto it = type_names.find(name);
if(it != type_names.end()) return it->second;
@@ -15,12 +51,13 @@ type_ptr type_env::lookup_type(const std::string& name) const {
return nullptr;
}
void type_env::bind(const std::string& name, type_ptr t) {
names[name] = type_scheme_ptr(new type_scheme(t));
void type_env::bind(const std::string& name, type_ptr t, visibility v) {
type_scheme_ptr new_scheme(new type_scheme(std::move(t)));
names[name] = variable_data(std::move(new_scheme), v, "");
}
void type_env::bind(const std::string& name, type_scheme_ptr t) {
names[name] = t;
void type_env::bind(const std::string& name, type_scheme_ptr t, visibility v) {
names[name] = variable_data(std::move(t), v, "");
}
void type_env::bind_type(const std::string& type_name, type_ptr t) {
@@ -28,15 +65,18 @@ void type_env::bind_type(const std::string& type_name, type_ptr t) {
type_names[type_name] = t;
}
void type_env::generalize(const std::string& name, type_mgr& mgr) {
void type_env::generalize(const std::string& name, const group& grp, type_mgr& mgr) {
auto names_it = names.find(name);
if(names_it == names.end()) throw 0;
if(names_it->second->forall.size() > 0) throw 0;
if(names_it->second.type->forall.size() > 0) throw 0;
std::set<std::string> free_variables;
mgr.find_free(names_it->second->monotype, free_variables);
for(auto& free : free_variables) {
names_it->second->forall.push_back(free);
std::set<std::string> free_in_type;
std::set<std::string> free_in_env;
mgr.find_free(names_it->second.type->monotype, free_in_type);
find_free_except(mgr, grp, free_in_env);
for(auto& free : free_in_type) {
if(free_in_env.find(free) != free_in_env.end()) continue;
names_it->second.type->forall.push_back(free);
}
}

View File

@@ -1,25 +1,48 @@
#pragma once
#include <map>
#include <string>
#include <set>
#include "graph.hpp"
#include "type.hpp"
struct type_env;
using type_env_ptr = std::shared_ptr<type_env>;
enum class visibility { global,local };
struct type_env {
struct variable_data {
type_scheme_ptr type;
visibility vis;
std::string mangled_name;
variable_data()
: variable_data(nullptr, visibility::local, "") {}
variable_data(type_scheme_ptr t, visibility v, std::string n)
: type(std::move(t)), vis(v), mangled_name(std::move(n)) {}
};
type_env_ptr parent;
std::map<std::string, type_scheme_ptr> names;
std::map<std::string, variable_data> names;
std::map<std::string, type_ptr> type_names;
type_env(type_env_ptr p) : parent(std::move(p)) {}
type_env() : type_env(nullptr) {}
void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
void find_free_except(const type_mgr& mgr, const group& avoid,
std::set<std::string>& into) const;
type_scheme_ptr lookup(const std::string& name) const;
bool is_global(const std::string& name) const;
void set_mangled_name(const std::string& name, const std::string& mangled);
const std::string& get_mangled_name(const std::string& name) const;
type_ptr lookup_type(const std::string& name) const;
void bind(const std::string& name, type_ptr t);
void bind(const std::string& name, type_scheme_ptr t);
void bind(const std::string& name, type_ptr t,
visibility v = visibility::local);
void bind(const std::string& name, type_scheme_ptr t,
visibility v = visibility::local);
void bind_type(const std::string& type_name, type_ptr t);
void generalize(const std::string& name, type_mgr& mgr);
void generalize(const std::string& name, const group& grp, type_mgr& mgr);
};

View File

@@ -0,0 +1,21 @@
takeUntilMax :: [Int] -> Int -> (Int, [Int])
takeUntilMax [] m = (m, [])
takeUntilMax [x] _ = (x, [x])
takeUntilMax (x:xs) m
| x == m = (x, [x])
| otherwise =
let (m', xs') = takeUntilMax xs m
in (max m' x, x:xs')
doTakeUntilMax :: [Int] -> [Int]
doTakeUntilMax l = l'
where (m, l') = takeUntilMax l m
takeUntilMax' :: [Int] -> Int -> (Int, [Int])
takeUntilMax' [] m = (m, [])
takeUntilMax' [x] _ = (x, [x])
takeUntilMax' (x:xs) m
| x == m = (maximum (x:xs), [x])
| otherwise =
let (m', xs') = takeUntilMax' xs m
in (max m' x, x:xs')

View File

@@ -0,0 +1,28 @@
import Data.Map as Map
import Data.Maybe
import Control.Applicative
data Element = A | B | C | D
deriving (Eq, Ord, Show)
addElement :: Element -> Map Element Int -> Map Element Int
addElement = alter ((<|> Just 1) . fmap (+1))
getScore :: Element -> Map Element Int -> Float
getScore e m = fromMaybe 1.0 $ ((1.0/) . fromIntegral) <$> Map.lookup e m
data BinaryTree a = Empty | Node a (BinaryTree a) (BinaryTree a) deriving Show
type ElementTree = BinaryTree Element
type ScoredElementTree = BinaryTree (Element, Float)
assignScores :: ElementTree -> Map Element Int -> (Map Element Int, ScoredElementTree)
assignScores Empty m = (Map.empty, Empty)
assignScores (Node e t1 t2) m = (m', Node (e, getScore e m) t1' t2')
where
(m1, t1') = assignScores t1 m
(m2, t2') = assignScores t2 m
m' = addElement e $ unionWith (+) m1 m2
doAssignScores :: ElementTree -> ScoredElementTree
doAssignScores t = t'
where (m, t') = assignScores t m

View File

@@ -0,0 +1,99 @@
data ExprType
= IntType
| BoolType
| StringType
repr : ExprType -> Type
repr IntType = Int
repr BoolType = Bool
repr StringType = String
intBoolImpossible : IntType = BoolType -> Void
intBoolImpossible Refl impossible
intStringImpossible : IntType = StringType -> Void
intStringImpossible Refl impossible
boolStringImpossible : BoolType = StringType -> Void
boolStringImpossible Refl impossible
decEq : (a : ExprType) -> (b : ExprType) -> Dec (a = b)
decEq IntType IntType = Yes Refl
decEq BoolType BoolType = Yes Refl
decEq StringType StringType = Yes Refl
decEq IntType BoolType = No intBoolImpossible
decEq BoolType IntType = No $ intBoolImpossible . sym
decEq IntType StringType = No intStringImpossible
decEq StringType IntType = No $ intStringImpossible . sym
decEq BoolType StringType = No boolStringImpossible
decEq StringType BoolType = No $ boolStringImpossible . sym
data Op
= Add
| Subtract
| Multiply
| Divide
data Expr
= IntLit Int
| BoolLit Bool
| StringLit String
| BinOp Op Expr Expr
| IfElse Expr Expr Expr
data SafeExpr : ExprType -> Type where
IntLiteral : Int -> SafeExpr IntType
BoolLiteral : Bool -> SafeExpr BoolType
StringLiteral : String -> SafeExpr StringType
BinOperation : (repr a -> repr b -> repr c) -> SafeExpr a -> SafeExpr b -> SafeExpr c
IfThenElse : SafeExpr BoolType -> SafeExpr t -> SafeExpr t -> SafeExpr t
typecheckOp : Op -> (a : ExprType) -> (b : ExprType) -> Either String (c : ExprType ** repr a -> repr b -> repr c)
typecheckOp Add IntType IntType = Right (IntType ** (+))
typecheckOp Subtract IntType IntType = Right (IntType ** (-))
typecheckOp Multiply IntType IntType = Right (IntType ** (*))
typecheckOp Divide IntType IntType = Right (IntType ** div)
typecheckOp _ _ _ = Left "Invalid binary operator application"
requireBool : (n : ExprType ** SafeExpr n) -> Either String (SafeExpr BoolType)
requireBool (BoolType ** e) = Right e
requireBool _ = Left "Not a boolean."
typecheck : Expr -> Either String (n : ExprType ** SafeExpr n)
typecheck (IntLit i) = Right (_ ** IntLiteral i)
typecheck (BoolLit b) = Right (_ ** BoolLiteral b)
typecheck (StringLit s) = Right (_ ** StringLiteral s)
typecheck (BinOp o l r) = do
(lt ** le) <- typecheck l
(rt ** re) <- typecheck r
(ot ** f) <- typecheckOp o lt rt
pure (_ ** BinOperation f le re)
typecheck (IfElse c t e) =
do
ce <- typecheck c >>= requireBool
(tt ** te) <- typecheck t
(et ** ee) <- typecheck e
case decEq tt et of
Yes p => pure (_ ** IfThenElse ce (replace p te) ee)
No _ => Left "Incompatible branch types."
eval : SafeExpr t -> repr t
eval (IntLiteral i) = i
eval (BoolLiteral b) = b
eval (StringLiteral s) = s
eval (BinOperation f l r) = f (eval l) (eval r)
eval (IfThenElse c t e) = if (eval c) then (eval t) else (eval e)
resultStr : {t : ExprType} -> repr t -> String
resultStr {t=IntType} i = show i
resultStr {t=BoolType} b = show b
resultStr {t=StringType} s = show s
tryEval : Expr -> String
tryEval ex =
case typecheck ex of
Left err => "Type error: " ++ err
Right (t ** e) => resultStr $ eval {t} e
main : IO ()
main = putStrLn $ tryEval $ BinOp Add (IfElse (BoolLit True) (IntLit 6) (IntLit 7)) (BinOp Multiply (IntLit 160) (IntLit 2))

View File

@@ -0,0 +1,120 @@
data ExprType
= IntType
| BoolType
| StringType
| PairType ExprType ExprType
repr : ExprType -> Type
repr IntType = Int
repr BoolType = Bool
repr StringType = String
repr (PairType t1 t2) = Pair (repr t1) (repr t2)
decEq : (a : ExprType) -> (b : ExprType) -> Maybe (a = b)
decEq IntType IntType = Just Refl
decEq BoolType BoolType = Just Refl
decEq StringType StringType = Just Refl
decEq (PairType lt1 lt2) (PairType rt1 rt2) = do
subEq1 <- decEq lt1 rt1
subEq2 <- decEq lt2 rt2
let firstEqual = replace {P = \t1 => PairType lt1 lt2 = PairType t1 lt2} subEq1 Refl
let secondEqual = replace {P = \t2 => PairType lt1 lt2 = PairType rt1 t2} subEq2 firstEqual
pure secondEqual
decEq _ _ = Nothing
data Op
= Add
| Subtract
| Multiply
| Divide
data Expr
= IntLit Int
| BoolLit Bool
| StringLit String
| BinOp Op Expr Expr
| IfElse Expr Expr Expr
| Pair Expr Expr
| Fst Expr
| Snd Expr
data SafeExpr : ExprType -> Type where
IntLiteral : Int -> SafeExpr IntType
BoolLiteral : Bool -> SafeExpr BoolType
StringLiteral : String -> SafeExpr StringType
BinOperation : (repr a -> repr b -> repr c) -> SafeExpr a -> SafeExpr b -> SafeExpr c
IfThenElse : SafeExpr BoolType -> SafeExpr t -> SafeExpr t -> SafeExpr t
NewPair : SafeExpr t1 -> SafeExpr t2 -> SafeExpr (PairType t1 t2)
First : SafeExpr (PairType t1 t2) -> SafeExpr t1
Second : SafeExpr (PairType t1 t2) -> SafeExpr t2
typecheckOp : Op -> (a : ExprType) -> (b : ExprType) -> Either String (c : ExprType ** repr a -> repr b -> repr c)
typecheckOp Add IntType IntType = Right (IntType ** (+))
typecheckOp Subtract IntType IntType = Right (IntType ** (-))
typecheckOp Multiply IntType IntType = Right (IntType ** (*))
typecheckOp Divide IntType IntType = Right (IntType ** div)
typecheckOp _ _ _ = Left "Invalid binary operator application"
requireBool : (n : ExprType ** SafeExpr n) -> Either String (SafeExpr BoolType)
requireBool (BoolType ** e) = Right e
requireBool _ = Left "Not a boolean."
typecheck : Expr -> Either String (n : ExprType ** SafeExpr n)
typecheck (IntLit i) = Right (_ ** IntLiteral i)
typecheck (BoolLit b) = Right (_ ** BoolLiteral b)
typecheck (StringLit s) = Right (_ ** StringLiteral s)
typecheck (BinOp o l r) = do
(lt ** le) <- typecheck l
(rt ** re) <- typecheck r
(ot ** f) <- typecheckOp o lt rt
pure (_ ** BinOperation f le re)
typecheck (IfElse c t e) =
do
ce <- typecheck c >>= requireBool
(tt ** te) <- typecheck t
(et ** ee) <- typecheck e
case decEq tt et of
Just p => pure (_ ** IfThenElse ce (replace p te) ee)
Nothing => Left "Incompatible branch types."
typecheck (Pair l r) =
do
(lt ** le) <- typecheck l
(rt ** re) <- typecheck r
pure (_ ** NewPair le re)
typecheck (Fst p) =
do
(pt ** pe) <- typecheck p
case pt of
PairType _ _ => pure $ (_ ** First pe)
_ => Left "Applying fst to non-pair."
typecheck (Snd p) =
do
(pt ** pe) <- typecheck p
case pt of
PairType _ _ => pure $ (_ ** Second pe)
_ => Left "Applying snd to non-pair."
eval : SafeExpr t -> repr t
eval (IntLiteral i) = i
eval (BoolLiteral b) = b
eval (StringLiteral s) = s
eval (BinOperation f l r) = f (eval l) (eval r)
eval (IfThenElse c t e) = if (eval c) then (eval t) else (eval e)
eval (NewPair l r) = (eval l, eval r)
eval (First p) = fst (eval p)
eval (Second p) = snd (eval p)
resultStr : {t : ExprType} -> repr t -> String
resultStr {t=IntType} i = show i
resultStr {t=BoolType} b = show b
resultStr {t=StringType} s = show s
resultStr {t=PairType t1 t2} (l,r) = "(" ++ resultStr l ++ ", " ++ resultStr r ++ ")"
tryEval : Expr -> String
tryEval ex =
case typecheck ex of
Left err => "Type error: " ++ err
Right (t ** e) => resultStr $ eval {t} e
main : IO ()
main = putStrLn $ tryEval $ BinOp Add (Fst (IfElse (BoolLit True) (Pair (IntLit 6) (BoolLit True)) (Pair (IntLit 7) (BoolLit False)))) (BinOp Multiply (IntLit 160) (IntLit 2))

View File

@@ -3,5 +3,11 @@ languageCode = "en-us"
title = "Daniel's Blog"
theme = "vanilla"
pygmentsCodeFences = true
pygmentsStyle = "github"
pygmentsUseClasses = true
summaryLength = 20
[markup]
[markup.tableOfContents]
endLevel = 4
ordered = false
startLevel = 3

View File

@@ -1,8 +1,8 @@
---
title: About
---
I'm Daniel, a Computer Science student currently in my third (and final) undergraduate year at Oregon State University.
Due my initial interest in calculators and compilers, I got involved in the Programming Language Theory research
I'm Daniel, a Computer Science student currently working towards my Master's Degree at Oregon State University.
Due to my initial interest in calculators and compilers, I got involved in the Programming Language Theory research
group, gaining same experience in formal verification, domain specific language, and explainable computing.
For work, school, and hobby projects, I use a variety of programming languages, most commonly C/C++,

View File

@@ -144,3 +144,5 @@ Here are the posts that I've written so far for this series:
* [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
* [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}})
* [Let/In and Lambdas]({{< relref "12_compiler_let_in_lambda/index.md" >}})

View File

@@ -396,4 +396,5 @@ Result: 4
This looks good! We have added support for polymorphic data types to our compiler.
We are now free to move on to `let/in` expressions, __lambda functions__, and __Input/Output__,
as promised! I'll see you then!
as promised, starting with [part 12]({{< relref "12_compiler_let_in_lambda/index.md" >}}) - `let/in`
and lambdas!

View File

@@ -1,14 +1,13 @@
---
title: Compiling a Functional Language Using C++, Part 12 - Let/In and Lambdas
date: 2020-04-20T20:15:16-07:00
date: 2020-06-21T00:50:07-07:00
tags: ["C and C++", "Functional Languages", "Compilers"]
description: "In this post, we extend our language with let/in expressions and lambda functions."
draft: true
---
Now that our language's type system is more fleshed out and pleasant to use, it's time to shift our focus to the ergonomics of the language itself. I've been mentioning `let/in` expressions and __lambda expressions__ for a while now. The former will let us create names for expressions that are limited to a certain scope (without having to create global variable bindings), while the latter will allow us to create functions without giving them any name at all.
Now that our language's type system is more fleshed out and pleasant to use, it's time to shift our focus to the ergonomics of the language itself. I've been mentioning `let/in` and __lambda__ expressions for a while now. The former will let us create names for expressions that are limited to a certain scope (without having to create global variable bindings), while the latter will allow us to create functions without giving them any name at all.
Let's take a look at `let/in` expressions first, to make sure we're all on the same page about what it is we're trying to implement. Let's start with some rather basic examples, and then move on to more complex examples. The most basic use of a `let/in` expression is, in Haskell:
Let's take a look at `let/in` expressions first, to make sure we're all on the same page about what it is we're trying to implement. Let's start with some rather basic examples, and then move on to more complex ones. A very basic use of a `let/in` expression is, in Haskell:
```Haskell
let x = 5 in x + x
@@ -93,7 +92,7 @@ addSingle6 x = 6 + x
-- ... and so on ...
```
But now, we end up creating several functions with almost identical bodies, with the exception of the free variables themselves. Wouldn't it be better to perform the well-known strategy of reducing code duplication by factoring out parameters, and leaving only instance of the repeated code? We would end up with:
But now, we end up creating several functions with almost identical bodies, with the exception of the free variables themselves. Wouldn't it be better to perform the well-known strategy of reducing code duplication by factoring out parameters, and leaving only one instance of the repeated code? We would end up with:
```Haskell {linenos=table}
addToAll n xs = map (addSingle n) xs
@@ -106,4 +105,884 @@ Wait a moment, didn't we just talk about nested polymorphic definitions, and how
This is true, but why should we perform transformations on a malformed program? Typechecking before pulling functions to the global scope will help us save the work, and breaking down one dependency-searching problem (which is \(O(n^3)\) thanks to Warshall's) into smaller, independent problems may even lead to better performance. Furthermore, typechecking before program transformations will help us come up with more helpful error messages.
{{< /sidenote >}} and can be transformed into a sequence of instructions just like any other global function. It has been pulled from its `where` (which, by the way, is pretty much equivalent to a `let/in`) to the top level.
Now, see how `addSingle` became `(addSingle n)`? If we chose to rewrite the
program this way, we'd have to find-and-replace every instance of `addSingle`
in the function body, which would be tedious and require us to keep
track of shadowed variables and the like. Also, what if we used a local
definition twice in the original piece of code? How about something like this:
```Haskell {linenos=table}
fourthPower x = square * square
where
square = x * x
```
Applying the strategy we saw above, we get:
```Haskell {linenos=table}
fourthPower x = (square x) * (square x)
square x = x * x
```
This is valid, except that in our evaluation model, the two instances
of `(square x)` will be built independently of one another, and thus,
will not be shared. This, in turn, will mean that `square` will be called
twice, which is not what we would expect from looking at the original program.
This isn't good. Instead, why don't we keep the `where`, but modify it
as follows:
```Haskell {linenos=table}
fourthPower x = square * square
where square = square' x
square' x = x * x
```
This time, assuming we can properly implement `where`, the call to
`square' x` should only occur once. Though I've been using `where`,
which leads to less clutter in Haskell code, the exact same approach applies
to `let/in`, and that's what we'll be using in our language.
This technique of replacing captured variables with arguments, and pulling closures into the global scope to aid compilation, is called [Lambda Lifting](https://en.wikipedia.org/wiki/Lambda_lifting). Its name is no coincidence - lambda functions need to undergo the same kind of transformation as our nested definitions (unlike nested definitions, though, lambda functions need to be named). This is why they are included in this post together with `let/in`!
What are lambda functions, by the way? A lambda function is just a function
expression that doesn't have a name. For example, if we had Haskell code like
this:
```Haskell
double x = x + x
doubleList xs = map double xs
```
We could rewrite it using a lambda function as follows:
```Haskell
doubleList xs = map (\x -> x + x) xs
```
As you can see, a lambda is an expression in the form `\x -> y` where `x` can
be any variable and `y` can be any expression (including another lambda).
This represents a function that, when applied to a value `x`, will perform
the computation given by `y`. Lambdas are useful when creating single-use
functions that we don't want to make globally available.
Lifting lambda functions will effectively rewrite our program in the
opposite direction to the one shown, replacing the lambda with a reference
to a global declaration which will hold the function's body. Just like
with `let/in`, we will represent captured variables using arguments
and partial appliciation. For instance, when starting with:
```Haskell
addToAll n xs = map (\x -> n + x) xs
```
We would output the following:
```Haskell
addToAll n xs = map (lambda n) xs
lambda n x = n + x
```
### Implementation
Now that we understand what we have to do, it's time to jump straight into
doing it. First, we need to refactor our current code to allow for the changes
we're going to make; then, we will use the new tools we defined to implement `let/in` expressions and lambda functions.
#### Infrastructure Changes
When finding captured variables, the notion of _free variables_ once again
becomes important. Recall that a free variable in an expression is a variable
that is defined outside of that expression. Consider, for example, the
expression:
```Haskell
let x = 5 in x + y
```
In this expression, `x` is _not_ a free variable, since it's defined
in the `let/in` expression. On the other hand, `y` _is_ a free variable,
since it's not defined locally.
The algorithm that we used for computing free variables was rather biased.
Previously, we only cared about the difference between a local variable
(defined somewhere in a function's body, or referring to one of the function's
parameters) and a global variable (referring to a global function).
This shows in our code for `find_free`. Consider, for example, this snippet:
{{< codelines "C++" "compiler/11/ast.cpp" 33 36 >}}
We created bindings in our type environment whenever we saw a new variable
being introduced, which led us to only count variables that we did not bind
_anywhere_ as 'free'. This approach is no longer sufficient. Consider,
for example, the following Haskell code:
```Haskell {linenos=table}
someFunction x =
let
y = x + 5
in
x*y
```
We can see that the variable `x` is introduced on line 1.
Thus, our current algorithm will happily store `x` in an environment,
and not count it as free. But clearly, the definition of `y` on line 3
captures `x`! If we were to lift `y` into global scope, we would need
to pass `x` to it as an argument. To fix this, we have to separate the creation
and assignment of type environments from free variable detection. Why
don't we start with `ast` and its descendants? Our signatures become:
```C++
void ast::find_free(std::set<std::string>& into);
type_ptr ast::typecheck(type_mgr& mgr, type_env_ptr& env);
```
For the most part, the code remains unchanged. We avoid
using `env` (and `this->env`), and default to marking
any variable as a free variable:
{{< codelines "C++" "compiler/12/ast.cpp" 39 41 >}}
Since we no longer use the environment, we resort to an
alternative method of removing bound variables. Here's
`ast_case::find_free`:
{{< codelines "C++" "compiler/12/ast.cpp" 169 181 >}}
For each branch, we find the free variables. However, we
want to avoid marking variables that were introduced through
pattern matching as free (they are not). Thus, we use `pattern::find_variables`
to see which of the variables were bound by that pattern,
and remove them from the list of free variables. We
can then safely add the list of free variables in the pattern to the overall
list of free variables. Other `ast` descendants experience largely
cosmetic changes (such as the removal of the `env` parameter).
Of course, we must implement `find_variables` for each of our `pattern`
subclasses. Here's what I got for `pattern_var`:
{{< codelines "C++" "compiler/12/ast.cpp" 402 404 >}}
And here's an equally terse implementation for `pattern_constr`:
{{< codelines "C++" "compiler/12/ast.cpp" 417 419 >}}
We also want to update `definition_defn` with this change. Our signatures
become:
```C++
void definition_defn::find_free();
void definition_defn::insert_types(type_mgr& mgr, type_env_ptr& env, visibility v);
```
We'll get to the `visiblity` parameter later. The implementations
are fairly simple. Just like `ast_case`, we want to erase each function's
parameters from its list of free variables:
{{< codelines "C++" "compiler/12/definition.cpp" 13 18 >}}
Since `find_free` no longer creates any type bindings or environments,
this functionality is shouldered by `insert_types`:
{{< codelines "C++" "compiler/12/definition.cpp" 20 32 >}}
Now that free variables are properly computed, we are able to move on
to bigger and better things.
#### Nested Definitions
At present, our code for typechecking the whole program is located in
`main.cpp`:
{{< codelines "C++" "compiler/11/main.cpp" 43 61 >}}
This piece of code goes on. We now want this to be more general. Soon, `let/in`
expressions with bring with them definitions that are inside other definitions,
which will not be reachable at the top level. The fundamental topological
sorting algorithm, though, will remain the same. We can abstract a series
of definitions that need to be ordered and then typechecked into a new struct,
`definition_group`:
{{< codelines "C++" "compiler/12/definition.hpp" 73 83 >}}
This will be exactly like a list of `defn`/`data` definitions we have at the
top level, except now, it can also occur in other places, like `let/in`
expressions. Once again, ignore for the moment the `visibility` field.
The way we defined function ordering requires some extra work from
`definition_group`. Recall that conceptually, functions can only depend
on other functions defined in the same `let/in` expression, or, more generally,
in the same `definition_group`. This means that we now classify free variables
in definitions into two categories: free variables that refer to "nearby"
definitions (i.e. definitions in the same group) and free variables that refer
to "far away" definitions. The "nearby" variables will be used to do
topological ordering, while the "far away" variables can be passed along
further up, perhaps into an enclosing `let/in` expression (for which "nearby"
variables aren't actually free, since they are bound in the `let`). We
implement this partitioning of variables in `definition_group::find_free`:
{{< codelines "C++" "compiler/12/definition.cpp" 94 105 >}}
Notice that we have added a new `nearby_variables` field to `definition_defn`.
This is used on line 101, and will be once again used in `definition_group::typecheck`. Speaking of `typecheck`, let's look at its definition:
{{< codelines "C++" "compiler/12/definition.cpp" 107 145 >}}
This function is a little long, but conceptually, each `for` loop
contains a step of the process:
* The first loop declares all data types, so that constructors can
be verified to properly reference them.
* The second loop creates all the data type constructors.
* The third loop adds edges to our dependency graph.
* The fourth loop performs typechecking on the now-ordered groups of mutually
recursive functions.
* The first inner loop inserts the types of all the functions into the environment.
* The second inner loop actually performs typechecking.
* The third inner loop makes as many things polymorphic as possible.
We can now adjust our `parser.y` to use a `definition_group` instead of
two global vectors. First, we declare a global `definition_group`:
{{< codelines "C++" "compiler/12/parser.y" 10 10 >}}
Then, we adjust `definitions` to create `definition_group`s:
{{< codelines "text" "compiler/12/parser.y" 59 68 >}}
We can now adjust `main.cpp` to use the global `definition_group`. Among
other changes (such as removing `extern` references to `vector`s, and updating
function signatures) we also update the `typecheck_program` function:
{{< codelines "C++" "compiler/12/main.cpp" 41 49 >}}
Now, our code is ready for typechecking nested definitions, but not for
compiling them. The main thing that we still have to address is the addition
of new definitions to the global scope. Let's take a look at that next.
#### Global Definitions
We want every function (and even non-function definitions that capture surrounding
variables), regardless of whether or not it was declared in the global scope,
to be processed and converted to LLVM code. The LLVM code conversion takes
several steps. First, the function's AST is translated into G-machine
instructions, which we covered in [part 5]({{< relref "05_compiler_execution.md" >}}),
by a process we covered in [part 6]({{< relref "06_compiler_compilation.md" >}}).
Then, an LLVM function is created for every function, and registered globally.
Finally, the G-machine instructions are converted into LLVM IR, which is
inserted into the previously created functions. These things
can't be done in a single pass: at the very least, we can't start translating
G-machine instructions into LLVM IR until functions are globally declared,
because we would otherwise have no means of referencing other functions. It
makes sense to me, then, to pull out all the 'global' definitions into
a single top-level list (perhaps somewhere in `main.cpp`).
Let's start implementing this with a new `global_scope` struct. This
struct will contain all of the global function and constructor definitions:
{{< codelines "C++" "compiler/12/global_scope.hpp" 42 55 >}}
This struct will allow us to keep track of all the global definitions,
emitting them as we go, and then coming back to them as necessary.
There are also signs of another piece of functionality: `occurence_count`
and `mangle_name`. These two will be used to handle duplicate names.
We cannot have two global functions named the same thing, but we can
easily imagine a situation in which two separate `let/in` expressions define
a variable like `x`, which then needs to be lifted to the global scope. We
resolve such conflicts by slightly changing - "mangling" - the name of
one of the resulting global definitions. We allow the first global definition
to be named the same as it was originally (in our example, this would be `x`).
However, if we detect that a global definition `x` already exists (we
track this using `occurence_count`), we rename it to `x_1`. Subsequent
global definitions will end up being named `x_2`, `x_3`, and so on.
Alright, let's take a look at `global_function` and `global_constructor`.
Here's the former:
{{< codelines "C++" "compiler/12/global_scope.hpp" 11 27 >}}
There's nothing really surprising here: all of the fields
are reminiscent of `definition_defn`, though some type-related variables
are missing. We also include the three compilation-related methods,
`compile`, `declare_llvm`, and `generate_llvm`, which were previously in `definition_defn`. Let's look at `global_constructor` now:
{{< codelines "C++" "compiler/12/global_scope.hpp" 29 40 >}}
This maps pretty closely to a single `definition_data::constructor`.
There's a difference here that is not clear at a glance, though. Whereas
the `name` in a `definition_defn` or `definition_data` refers to the
name as given by the user in the code, the `name` of a `global_function`
or `global_constructor` has gone through mangling, and thus, should be
unique.
Let's now look at the implementation of these structs' methods. The methods
`add_function` and `add_constructor` are pretty straightforward. Here's
the former:
{{< codelines "C++" "compiler/12/global_scope.cpp" 39 43 >}}
And here's the latter:
{{< codelines "C++" "compiler/12/global_scope.cpp" 45 49 >}}
In both of these functions, we return a reference to the new global
definition we created. This helps us access the mangled `name` field,
and, in the case of `global_function`, inspect the `ast_ptr` that represents
its body.
Next, we have `global_scope::compile` and `global_scope::generate_llvm`,
which encapsulate these operations on all global definitions. Their
implementations are very straightforward, and are similar to the
`gen_llvm` function we used to have in our `main.cpp`:
{{< codelines "C++" "compiler/12/global_scope.cpp" 51 67 >}}
Finally, we have `mangle`, which takes care of potentially duplicate
variable names:
{{< codelines "C++" "compiler/12/global_scope.cpp" 69 83 >}}
Let's move on to the global definition structs.
The `compile`, `declare_llvm`, and `generate_llvm` methods for
`global_function` are pretty much the same as those that we used to have
in `definition_defn`:
{{< codelines "C++" "compiler/12/global_scope.cpp" 4 24 >}}
The same is true for `global_constructor` and its method `generate_llvm`:
{{< codelines "C++" "compiler/12/global_scope.cpp" 26 37 >}}
Recall that in this case, we need not have two methods for declaring
and generating LLVM, since constructors don't reference other constructors,
and are always generated before any function definitions.
#### Visibility
Should we really be turning _all_ free variables in a function definition
into arguments? Consider the following piece of Haskell code:
```Haskell {linenos=table}
add x y = x + y
mul x y = x * y
something = mul (add 1 3) 3
```
In the definition of `something`, `mul` and `add` occur free.
A very naive lifting algorithm might be tempted to rewrite such a program
as follows:
```Haskell {linenos=table}
add x y = x + y
mul x y = x * y
something' add mul = mul (add 1 3) 3
something = something' add mul
```
But that's absurd! Not only are `add` and `mul` available globally,
but such a rewrite generates another definition with free variables,
which means we didn't really improve our program in any way. From this
example, we can see that we don't want to be turning reference to global
variables into function parameters. But how can we tell if a variable
we're trying to operate on is global or not? I propose a flag in our
`type_env`, which we'll augment to be used as a symbol table. To do
this, we update the implementation of `type_env` to map variables to
values of a struct `variable_data`:
{{< codelines "C++" "compiler/12/type_env.hpp" 14 23 >}}
The `visibility` enum is defined as follows:
{{< codelines "C++" "compiler/12/type_env.hpp" 11 11 >}}
As you can see from the above snippet, we also added a `mangled_name` field
to the new `variable_data` struct. We will be using this field shortly. We
also add a few methods to our `type_env`, and end up with the following:
{{< codelines "C++" "compiler/12/type_env.hpp" 32 45 >}}
We will come back to `find_free` and `find_free_except`, as well as
`set_mangled_name` and `get_mangled_name`. For now, we just adjust `bind` to
take a visibility parameter that defaults to `local`, and implement
`is_global`:
{{< codelines "C++" "compiler/12/type_env.cpp" 27 32 >}}
Remember the `visibility::global` in `parser.y`? This is where that comes in.
Specifically, we recall that `definition_defn::insert_types` is responsible
for placing function types into the environment, making them accessible
during typechecking later. At this time, we already need to know whether
or not the definitions are global or local (so that we can create the binding).
Thus, we add `visibility` as a parameter to `insert_types`:
{{< codelines "C++" "compiler/12/definition.hpp" 44 44 >}}
Since we are now moving from manually wrangling definitions towards using
`definition_group`, we make it so that the group itself provides this
argument. To do this, we add the `visibility` field from before to it,
and set it in the parser. One more thing: since constructors never
capture variables, we can always move them straight to the global
scope, and thus, we'll always mark them with `visibility::global`.
#### Managing Mangled Names
Just mangling names is not enough. Consider the following program:
```text {linenos=table}
defn packOne x = {
let {
data Packed a = { Pack a }
} in {
Pack x
}
}
defn packTwo x = {
let {
data Packed a = { Pack a }
} in {
Pack x
}
}
```
{{< sidenote "right" "lifting-types-note" "Lifting the data type declarations" >}}
We are actually not <em>quite</em> doing something like the following snippet.
The reason for this is that we don't mangle the names for types. I pointed
out this potential issue in a sidenote in the previous post. Since the size
of this post is already balooning, I will not deal with this issue here.
Even at the end of this post, our compiler will not be able to distinguish
between the two <code>Packed</code> types. We will hopefully get to it later.
{{< /sidenote >}} and their constructors into the global
scope gives us something like:
``` {linenos=table}
data Packed a = { Pack a }
data Packed_1 a = { Pack_1 a }
defn packOne x = { Pack x }
defn packTwo x = { Pack_1 x }
```
Notice that we had to rename one of the calls to `Pack` to be a call to
be `Pack_1`. To actually change our AST to reference `Pack_1`, we'd have
to traverse the whole tree, and make sure to keep track of definitions
that could shadow `Pack` further down. This is cumbersome. Instead, we
can mark a variable as referring to a mangled version of itself, and
access this information when needed. To do this, we add the `mangled_name`
field to the `variable_data` struct as we've seen above, and implement
the `set_mangled_name` and `get_mangled_name` methods. The former:
{{< codelines "C++" "compiler/12/type_env.cpp" 34 37 >}}
And the latter:
{{< codelines "C++" "compiler/12/type_env.cpp" 39 45 >}}
We don't allow `set_mangled_name` to affect variables that are declared
above the receiving `type_env`, and use the empty string as a 'none' value.
Now, when lifting data type constructors, we'll be able to use
`set_mangled_name` to make sure constructor calls are made correctly. We
will also be able to use this in other cases, like the translation
of local function definitions.
#### New AST Nodes
Finally, it's time for us to add new AST nodes to our language.
Specifically, these nodes are `ast_let` (for `let/in` expressions)
and `ast_lambda` for lambda functions. We declare them as follows:
{{< codelines "C++" "compiler/12/ast.hpp" 131 166 >}}
In `ast_let`, the `definitions` field corresponds to the original definitions
given by the user in the program, and the `in` field corresponds to the
expression which uses these definitions. In the process of lifting, though,
we eventually transfer each of the definitions to the global scope, replacing
their right hand sides with partial applications. After this transformation,
all the data type definitions are effectively gone, and all the function
definitions are converted into the simple form `x = f a1 ... an`. We hold
these post-transformation equations in the `translated_definitions` field,
and it's them that we compile in this node's `compile` method.
In `ast_lambda`, we allow multiple parameters (like Haskell's `\x y -> x + y`).
We store these parameters in the `params` field, and we store the lambda's
expression in the `body` field. Just like `definition_defn`,
the `ast_lambda` node maintains a separate environment in which its children
have been bound, and a list of variables that occur freely in its body. The
former is used for typechecking, while the latter is used for lifting.
Finally, the `translated` field holds the lambda function's form
after its body has been transformed into a global function. Similarly to
`ast_let`, this node will be in the form `f a1 ... an`.
The
observant reader will have noticed that we have a new method: `translate`.
This is a new method for all `ast` descendants, and will implement the
steps of moving definitions to the global scope and transforming the
program. Before we get to it, though, let's look at the other relevant
pieces of code for `ast_let` and `ast_lambda`. First, their grammar
rules in `parser.y`:
{{< codelines "text" "compiler/12/parser.y" 107 115 >}}
This is pretty similar to the rest of the grammar, so I will give this no
further explanation. Next, their `find_free` and `typecheck` code.
We can start with `ast_let`:
{{< codelines "C++" "compiler/12/ast.cpp" 275 289 >}}
As you can see, `ast_let::find_free` works in a similar manner to `ast_case::find_free`.
It finds the free variables in the `in` node as well as in each of the definitions
(taking advantage of the fact that `definition_group::find_free` populates the
given set with "far away" free variables). It then filters out any variables bound in
the `let` from the set of free variables in `in`, and returns the result.
Typechecking in `ast_let` relies on `definition_group::typecheck`, which holds
all of the required functionality for checking the new definitions.
Once the definitions are typechecked, we use their type information to
typecheck the `in` part of the expression (passing `definitions.env` to the
call to `typecheck` to make the new definitions visible).
Next, we look at `ast_lambda`:
{{< codelines "C++" "compiler/12/ast.cpp" 344 366 >}}
Again, `ast_lambda::find_free` works similarly to `definition_defn`, stripping
the variables expected by the function from the body's list of free variables.
Also like `definition_defn`, this new node remembers the free variables in
its body, which we will later use for lifting.
Typechecking in this node also proceeds similarly to `definition_defn`. We create
new type variables for each parameter and for the return value, and build up
a function type called `full_type`. We then typecheck the body using the
new environment (which now includes the variables), and return the function type we came up with.
#### Translation
Recalling the transformations we described earlier, we can observe two
major steps to what we have to do:
1. Move the body of the original definition into its own
global definition, adding all the captured variables as arguments.
2. Replace the right hand side of the `let/in` expression with an application
of the global definition to the variables it requires.
We will implement these in a new `translate` method, with the following
signature:
```C++
void ast::translate(global_scope& scope);
```
The `scope` parameter and its `add_function` and `add_constructor` methods will
be used to add definitions to the global scope. Each AST node will also
use this method to implement the second step. Currently, only
`ast_let` and `ast_lambda` will need to modify themselves - all other
nodes will simply recursively call this method on their children. Let's jump
straight into implementing this method for `ast_let`:
{{< codelines "C++" "compiler/12/ast.cpp" 291 316 >}}
Since data type definitions don't really depend on anything else, we process
them first. This amounts to simply calling the `definition_data::into_globals`
method, which itself simply calls `global_scope::add_constructor`:
{{< codelines "C++" "compiler/12/definition.cpp" 86 92 >}}
Note how `into_globals` updates the mangled name of its constructor
via `set_mangled_name`. This will help us decide which global
function to call during code generation. More on that later.
Starting with line 295, we start processing the function definitions
in the `let/in` expression. We remember how many arguments were
explicitly added to the function definition, and then call the
definition's `into_global` method. This method is implemented
as follows:
{{< codelines "C++" "compiler/12/definition.cpp" 40 49 >}}
First, this method collects all the non-global free variables in
its body, which will need to be passed to the global definition
as arguments. It then combines this list with the arguments
the user explicitly added to it, recursively translates
its body, and creates a new global definition using `add_function`.
We return to `ast_let::translate` at line 299. Here,
we determine how many variables ended up being captured, by
subtracting the number of explicit parameters from the total
number of parameters the new global definition has. This number,
combined with the fact that we added all the 'implict' arguments
to the function to the beginning of the list, will let us
iterate over all implict arguments, creating a chain of partial
function applications.
But how do we build the application? We could use the mangled name
of the function, but this seems inelegant, especially since we
alreaady keep track of mangling information in `type_env`. Instead,
we create a new, local environment, in which we place an updated
binding for the function, marking it global, and setting
its mangled name to the one generated by `global_sope`. This work is done
on lines 301-303. We create a reference to the global function
using the new environment on lines 305 and 306, and apply it to
all the implict arguments on lines 307-313. Finally, we
add the new 'basic' equation into `translated_definitions`.
Let's take a look at translating `ast_lambda` next:
{{< codelines "C++" "compiler/12/ast.cpp" 368 392 >}}
Once again, on lines 369-375 we find all the arguments to the
global definition. On lines 377-382 we create a new global
function and a mangled environment, and start creating the
chain of function applications. On lines 384-390, we actually
create the arguments and apply the function to them. Finally,
on line 391, we store this new chain of applications in the
`translated` field.
#### Compilation
There's still another piece of the puzzle missing, and
that's how we're going to compile `let/in` expressions into
G-machine instructions. We have allowed these expressions
to be recursive, and maybe even mutually recursive. This
worked fine with global definitions; instead of specifying
where on the stack we can find the reference to a global
function, we just created a new global node, and called
it good. Things are different now, though, because the definitions
we're referencing aren't _just_ global functions; they are partial
applications of a global function. And to reference themselves,
or their neighbors, they have to have a handle on their own nodes. We do this
using an instruction that we foreshadowed in part 5, but didn't use
until just now: __Alloc__.
__Alloc__ creates placeholder nodes on the stack. These nodes
are indirections, the same kind that we use for lazy evaluation
and sharing elsewhere. We create an indirection node for every
definition that we then build; when an expression needs access
to a definition, we give it the indirection node. After
building the partial application graph for an expression,
we use __Update__, making the corresponding indirection
point to this new graph. This way, the 'handle' to a
definition is always accessible, and once the definition's expression
is built, the handle correctly points to it. Here's the implementation:
{{< codelines "C++" "compiler/12/ast.cpp" 319 332 >}}
First, we create the __Alloc__ instruction. Then, we update
our environment to map each definition name to a location
within the newly allocated batch of nodes. Since we iterate
the definitions in order, 'pushing' them into our environment,
we end up with the convention of having the later definitions
closer to the top of the G-machine stack. Thus, when we
iterate the definitions again, this time to compile their
bodies, we have to do so starting with the highest offset,
and working our way down to __Update__-ing the top of the stack.
Once the definitions have been compiled, we proceed to compiling
the `in` part of the expression as normal, using our updated
environment. Finally, we use __Slide__ to get rid of the definition
graphs, cleaning up the stack.
Compiling the `ast_lambda` is far more straightforward. We just
compile the resulting partial application as we normally would have:
{{< codelines "C++" "compiler/12/ast.cpp" 394 396 >}}
One more thing. Let's adopt the convention of storing __mangled__
names into the compilation environment. This way, rather than looking up
mangled names only for global functions, which would be a 'gotcha'
for anyone working on the compiler, we will always use the mangled
names during compilation. To make this change, we make sure that
`ast_case` also uses `mangled_name`:
{{< codelines "C++" "compiler/12/ast.cpp" 242 242 >}}
We also update the logic for `ast_lid::compile` to use the mangled
name information:
{{< codelines "C++" "compiler/12/ast.cpp" 52 58 >}}
#### Fixing Type Generalization
This is a rather serious bug that made its way into the codebase
since part 10. Recall that we can only generalize type variables
that are free in the environment. Thus far, we haven't done that,
and we really should: I ran into incorrectly inferred types
in my first test of the `let/in` language feature.
We need to make our code capable of finding free variables in the
type environment. This requires the `type_mgr`, which associates
with type variables the real types they represent, if any. We
thus create methods with signatures as follows:
```C++
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const;
void type_env::find_free_except(const type_mgr& mgr, const std::string& avoid,
std::set<std::string>& into) const;
```
Why `find_free_except`? When generalizing a variable whose type was already
stored in the environment, all the type variables we could generalize would
not be 'free'. If they only occur in the type we're generalizing, though,
we shouldn't let that stop us! More generally, if we see type variables that
are only found in the same mutually recursive group as the binding we're
generalizing, we are free to generalize them too. Thus, we pass in
a reference to a `group`, and check if a variable is a member of that group
before searching it for free type variables. The implementations of the two
methods are straightforward:
{{< codelines "C++" "compiler/12/type_env.cpp" 4 18 >}}
Note that `find_free_except` calls `find_free` in its recursive call. This
is not a bug: we _do_ want to include free type variables from bindings
that have the same name as the variable we're generalizing, but aren't found
in the same scope. As far as we're concerned, they're different variables!
The two methods use another `find_free` method which we add to `type_mgr`:
{{< codelines "C++" "compiler/12/type.cpp" 206 219 >}}
This one is a bit of a hack. Typically, while running `find_free`, a
`type_mgr` will resolve any type variables. However, variables from the
`forall` quantifier of a type scheme should not be resolved, since they
are explicitly generic. To prevent the type manager from erroneously resolving
such type variables, we create a new type manager that does not have
these variables bound to anything, and thus marks them as free. We then
filter these variables out of the final list of free variables.
Finally, `generalize` makes sure not to use variables that it finds free:
{{< codelines "C++" "compiler/12/type_env.cpp" 68 81 >}}
#### Putting It All Together
All that's left is to tie the parts we've created into one coherent whole
in `main.cpp`. First of all, since we moved all of the LLVM-related
code into `global_scope`, we can safely replace that functionality
in `main.cpp` with a method call:
{{< codelines "C++" "compiler/12/main.cpp" 121 132 >}}
On the other hand, we need top-level logic to handle `definition_group`s.
This is pretty straightforward, and the main trick is to remember to
update the function's mangled name. Right now, depending on the choice
of manging algorithm, it's possible even for top-level functions to
have their names changed, and we must account for that. The whole code is:
{{< codelines "C++" "compiler/12/main.cpp" 52 62 >}}
Finally, we call `global_scope`'s methods in `main()`:
{{< codelines "C++" "compiler/12/main.cpp" 148 151 >}}
That's it! Please note that I've mentioned or hinted at minor changes to the
codebase. Detailing every single change this late into the project is
needlessly time consuming and verbose; Gitea reports that I've made 677
insertions into and 215 deletions from the code. As always, I provide
the [source code for the compiler](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/12), and you can also take a look at the
[Gitea-generated diff](https://dev.danilafe.com/Web-Projects/blog-static/compare/1905601aaa96d11c771eae9c56bb9fc105050cda...21851e3a9c552383ee8c4bc878ea06e7d28c333e)
at the time of writing. If you want to follow along, feel free to check
them out!
### Running Our Programs
It's important to test all the language features that we just added. This
includes recursive definitions, nested function dependency cycles, and
uses of lambda functions. Some of the following examples will be rather
silly, but they should do a good job of checking that everything works
as we expect. Let's start with a simple use of a recursive definition
inside a `let/in`. A classic definition in that form is of `fix`
(the fixpoint combinator):
```Haskell
fix f = let x = f x in x
```
This defines `x` to be `f x`, which by substitution becomes `f (f x)`, and then
`f (f (f x))` and so on. The fixpoint combinator allows one to write a
recursive function that doesn't use its own name in the body. Rather,
we write a function expecting to receive 'itself' as a value:
```Haskell
fix :: (a -> a) -> a
factRec :: (Int -> Int) -> Int -> Int
factRec f x = if x == 0 then 1 else x * f x
fact :: Int -> Int
fact = fix factRec
```
Notice that `factRec` doesn't reference itself, but rather takes
as argument a function it expects to be 'factorial' called `f`,
and uses that in its recursive case. We can write something similar
in our language, perhaps to create an infinite list of ones:
{{< codeblock "text" "compiler/12/examples/fixpoint.txt" >}}
We want `sumTwo` to take the first two elements from the list,
and return their sum. For an infinite list of ones, we expect
this sum to be equal to 2, and it is:
```
Result: 2
```
Next, let's try to define a function which has a mutually recursive pair
of definitions inside of a `let/in`. Let's also make these expressions
reference a function from the global scope, so that we know our
dependency tracking works as expected:
{{< codeblock "text" "compiler/12/examples/letin.txt" >}}
Here, we have a function `mergeUntil` which, given two lists
and a predicate, combines the two lists as long as
the predicate returns `True`. It does so using a convoluted
pair of mutually recursive functions, one of which
unpacks the left list, and the other the right. Each of the
functions calls the global function `if`. We also use two
definitions inside of `main` to create the two lists we're
going to merge. The compiler outputs the following (correct)
types:
```
const: forall bb bc . bc -> bb -> bc
if: Bool* -> List* Int* -> List* Int* -> List* Int*
main: Int*
mergeUntil: List* Int* -> List* Int* -> (Int* -> Bool*) -> List* Int*
sum: List* Int* -> Int*
```
And the result is 21, as would be expected from the sum of the numbers 1-6:
```
Result: 21
```
Let's try lambda functions now. We can try use them for a higher-order function
like `map`:
{{< codeblock "text" "compiler/12/examples/lambda.txt" >}}
In this example, we first double every element in the list, then square it,
and finally take the sum. This should give us 4+16+36 = 56, and so it does:
```
Result: 56
```
Finally, let's do some magic with a locally-declared data type. We'll make a
"packer" that creates a wrapped instance of a type, `Packed a`. Since the
constructor of this data type is not globally visible, it's not possible
to get the value back out, except by using an 'unpacking' function that
we provide:
{{< codeblock "text" "compiler/12/examples/packed.txt" >}}
Here, the `packer` definition returns a pair of the 'packing'
and 'unpacking' functions. The 'packing' function simply applies
the consntructor of `Packed` to its argument, while the 'unpacking'
function performs pattern matching (which is possible since the
data type is still in scope there). We expect `unpack (pack 3)` to
return 3, and it does:
```
Result: 3
```
Trying to pattern match, though, doesn't work, just like we would want!
This is enough to convince me that our changes do, indeed, work! Of
the 'major' components that I wanted to cover, only __Input/Output__
remains! Additionally, a [lobste.rs](https://lobste.rs) user suggested
that we also cover namespacing, and perhaps we will look into that as well.
Before either of those things, though, I think that I want to go through
the compiler and perform another round of improvements, similarly to
[part 4]({{< relref "04_compiler_improvements" >}}). It's hard to do a lot
of refactoring while covering new content, since major changes need to
be explained and presented for the post to make sense. I hope to see
you in these future posts!

View File

@@ -0,0 +1,304 @@
---
title: Rendering Mathematics On The Back End
date: 2020-07-21T14:54:26-07:00
tags: ["Website", "Nix", "Ruby", "KaTeX"]
---
Due to something of a streak of bad luck when it came to computers, I spent a
significant amount of time using a Linux-based Chromebook, and then a
Pinebook Pro. It was, in some way, enlightening. The things that I used to take
for granted with a 'powerful' machine now became a rare luxury: StackOverflow,
and other relatively static websites, took upwards of ten seconds to finish
loading. On Slack, each of my keypresses could take longer than 500ms to
appear on the screen, and sometimes, it would take several seconds. Some
websites would present me with a white screen, and remain that way for much
longer than I had time to wait. It was awful.
At one point, I installed uMatrix, and made it the default policy to block
all JavaScript. For the most part, this worked well. Of course, I had to
enable JavaScript for applications that needed to be interactive, like
Slack, and Discord. But for the most part, I was able to browse the majority
of the websites I normally browse. This went on until I started working
on the [compiler series]({{< relref "00_compiler_intro.md" >}}) again,
and discovered that the LaTeX math on my page, which was required
for displaying things like inference rules, didn't work without
JavaScript. I was left with two options:
* Allow JavaScript, and continue using MathJax to render my math.
* Make it so that the mathematics are rendered on the back end.
I've [previously written about math rendering]({{< relref "math_rendering_is_wrong.md" >}}),
and made the observation that MathJax's output for LaTeX is __identical__
on every computer. From the MathJax 2.6 change log:
> _Improved CommonHTML output_. The CommonHTML output now provides the same layout quality and MathML support as the HTML-CSS and SVG output. It is on average 40% faster than the other outputs and the markup it produces are identical on all browsers and thus can also be pre-generated on the server via MathJax-node.
It seems absurd, then, to offload this kind of work into the users, to
be done over and over again. As should be clear from the title of
this post, this made me settle for the second option: it was
__obviously within reach__, especially for a statically-generated website
like mine, to render math on the backend.
I settled on the following architecture:
* As before, I would generate my pages using Hugo.
* I would use the KaTeX NPM package to render math.
* To build the website no matter what system I was on, I would use Nix.
It so happens that Nix isn't really required for using my approach in general.
I will give my setup here, but feel free to skip ahead.
### Setting Up A Nix Build
My `default.nix` file looks like this:
```Nix {linenos=table}
{ stdenv, hugo, fetchgit, pkgs, nodejs, ruby }:
let
url = "https://dev.danilafe.com/Web-Projects/blog-static.git";
rev = "<commit>";
sha256 = "<hash>";
requiredPackages = import ./required-packages.nix {
inherit pkgs nodejs;
};
in
stdenv.mkDerivation {
name = "blog-static";
version = rev;
src = fetchgit {
inherit url rev sha256;
};
builder = ./builder.sh;
converter = ./convert.rb;
buildInputs = [
hugo
requiredPackages.katex
(ruby.withPackages (ps: [ ps.nokogiri ]))
];
}
```
I'm using `node2nix` to generate the `required-packages.nix` file, which allows me,
even from a sandboxed Nix build, to download and install `npm` packages. This is needed
so that I have access to the `katex` binary at build time. I fed the following JSON file
to `node2nix`:
```JSON {linenos=table}
[
"katex"
]
```
The Ruby script I wrote for this (more on that soon) required the `nokogiri` gem, which
I used for traversing the HTML generated for my site. Hugo was obviously required to
generate the HTML.
### Converting LaTeX To HTML
After my first post complaining about the state of mathematics on the web, I received
the following email (which the author allowed me to share):
> Sorry for having a random stranger email you, but in your blog post
[(link)](https://danilafe.com/blog/math_rendering_is_wrong) you seem to focus on MathJax's
difficulty in rendering things server-side, while quietly ignoring that KaTeX's front
page advertises server-side rendering. Their documentation [(link)](https://katex.org/docs/options.html)
even shows (at least as of the time this email was sent) that it renders both HTML
(to be arranged nicely with their CSS) for visuals and MathML for accessibility.
The author of the email then kindly provided a link to a page they generated using KaTeX and
some Bash scripts. The math on this page was rendered at the time it was generated.
This is a great point, and KaTeX is indeed usable for server-side rendering. But I've
seen few people who do actually use it. Unfortunately, as I pointed out in my previous post on the subject,
few tools actually take your HTML page and replace LaTeX with rendered math.
Here's what I wrote about this last time:
> [In MathJax,] The bigger issue, though, was that the `page2html`
program, which rendered all the mathematics in a single HTML page,
was gone. I found `tex2html` and `text2htmlcss`, which could only
render equations without the surrounding HTML. I also found `mjpage`,
which replaced mathematical expressions in a page with their SVG forms.
This is still the case, in both MathJax and KaTeX. The ability
to render math in one step is the main selling point of front-end LaTeX renderers:
all you have to do is drop in a file from a CDN, and voila, you have your
math. There are no such easy answers for back-end rendering. In fact,
as we will soon see, it's not possible to just search-and-replace occurences
of mathematics on your page, either. To actually get KaTeX working
on the backend, you need access to tools that handle the potential variety
of edge cases associated with HTML. Such tools, to my knowledge, do not
currently exist.
I decided to write my own Ruby script to get the job done. From this script, I
would call the `katex` command-line program, which would perform
the heavy lifting of rendering the mathematics.
There are two types of math on my website: inline math and display math.
On the command line ([here are the docs](https://katex.org/docs/cli.html)),
the distinction is made using the `--display-mode` argument. So, the general algorithm
is to replace the code inside the `$$...$$` with their display-rendered version,
and the code inside the `\(...\)` with the inline-rendered version. I came up with
the following Ruby function:
```Ruby {linenos=table}
def render_cached(cache, command, string, render_comment = nil)
cache.fetch(string) do |new|
puts " Rendering #{render_comment || new}"
cache[string] = Open3.popen3(command) do |i, o, e, t|
i.write new
i.close
o.read.force_encoding(Encoding::UTF_8).strip
end
end
end
```
Here, the `cache` argument is used to prevent re-running the `katex` command
on an equation that was already rendered before (the output is the same, after all).
The `command` is the specific shell command that we want to invoke; this would
be either `katex` or `katex -d`. The `string` is the math equation to render,
and the `render_comment` is the string to print to the console instead of the equation
(so that long, display math equations are not printed out to standard out).
Then, given a substring of the HTML file, we use regular expressions
to find the `\(...\)` and `$$...$$`s, and use the `render_cached` method
on the LaTeX code inside.
```Ruby {linenos=table}
def perform_katex_sub(inline_cache, display_cache, content)
rendered = content.gsub /\\\(((?:[^\\]|\\[^\)])*)\\\)/ do |match|
render_cached(inline_cache, "katex", $~[1])
end
rendered = rendered.gsub /\$\$((?:[^\$]|$[^\$])*)\$\$/ do |match|
render_cached(display_cache, "katex -d", $~[1], "display")
end
return rendered
end
```
There's a bit of a trick to the final layer of this script. We want to be
really careful about where we replace LaTeX, and where we don't. In
particular, we _don't_ want to go into the `code` tags. Otherwise,
it wouldn't be possible to talk about LaTeX code! I also suspect that
some captions, alt texts, and similar elements should also be left alone.
However, I don't have those on my website (yet), and I won't worry about
them now. Either way, because of the code tags,
we can't just search-and-replace over the entire page; we need to be context
aware. This is where `nokogiri` comes in. We parse the HTML, and iterate
over all of the 'text' nodes, calling `perform_katex_sub` on all
of those that _aren't_ inside code tags.
Fortunately, this kind of iteration is pretty easy to specify thanks to something called XPath.
This was my first time encountering it, but it seems extremely useful: it's
a sort of language for selecting XML nodes. First, you provide an 'axis',
which is used to specify the positions of the nodes you want to look at
relative to the root node. The axis `/` looks at the immediate children
(this would be the `html` tag in a properly formatted document, I would imagine).
The axis `//` looks at all the transitive children. That is, it will look at the
children of the root, then its children, and so on. There's also the `self` axis,
which looks at the node itself.
After you provide an axis, you need to specify the type of node that you want to
select. We can write `code`, for instance, to pick only the `<code>....</code>` tags
from the axis we've chosen. We can also use `*` to select any node, and we can
use `text()` to select text nodes, such as the `Hello` inside of `<b>Hello</b>`.
We can also apply some more conditions to the nodes we pick using `[]`.
For us, the relevant feature here is `not(...)`, which allows us to
select nodes that do __not__ match a particular condition. This is all
we need to know.
We write:
* `//`, starting to search for nodes everywhere, not just the root of the document.
* `*`, to match _any_ node. We want to replace math inside of `div`s, `span`s, `nav`s,
all of the `h`s, and so on.
* `[not(self::code)]`, cutting out all the `code` tags.
* `/`, now selecting the nodes that are immediate descendants of the nodes we've selected.
* `text()`, giving us the text contents of all the nodes we've selected.
All in all:
```
//*[not(self::code)]/text()
```
Finally, we use this XPath from `nokogiri`:
```Ruby {linenos=table}
files = ARGV[0..-1]
inline_cache, display_cache = {}, {}
files.each do |file|
puts "Rendering file: #{file}"
document = Nokogiri::HTML.parse(File.open(file))
document.search('//*[not(self::code)]/text()').each do |t|
t.replace(perform_katex_sub(inline_cache, display_cache, t.content))
end
File.write(file, document.to_html)
end
```
I named this script `convert.rb`; it's used from inside of the Nix expression
and its builder, which we will cover below.
### Tying it All Together
Finally, I wanted an end-to-end script to generate HTML pages and render the LaTeX in them.
I used Nix for this, but the below script will largely be compatible with a non-Nix system.
I came up with the following, commenting on Nix-specific commands:
```Bash {linenos=table}
# Nix-specific; set up paths.
source $stdenv/setup
# Build site with Hugo
# The cp is Nix-specific; it copies the blog source into the current directory.
cp -r $src/* .
hugo --baseUrl="https://danilafe.com"
# Render math in HTML and XML files.
# $converter is Nix-specific; you can just use convert.rb.
find public/ -regex "public/.*\.html" | xargs ruby $converter
# Output result
# $out is Nix-specific; you can replace it with your destination folder.
mkdir $out
cp -r public/* $out/
```
This is it! Using the two scripts, `convert.rb` and `builder.sh`, I
was able to generate my blog with the math rendered on the back-end.
Please note, though, that I had to add the KaTeX CSS to my website's
`<head>`.
### Caveats
The main caveat of my approach is performance. For every piece of
mathematics that I render, I invoke the `katex` command. This incurs
the penalty of Node's startup time, every time, and makes my approach
take a few dozen seconds to run on my relatively small site. The
better approach would be to use a NodeJS script, rather than a Ruby one,
to perform the conversion. KaTeX also provides an API, so such a NodeJS
script can find the files, parse the HTML, and perform the substitutions.
I did quite like using `nokogiri` here, though, and I hope that an equivalently
pleasant solution exists in JavaScript.
Re-rendering the whole website is also pretty wasteful. I rarely change the
mathematics on more than one page at a time, but every time I do so, I have
to re-run the script, and therefore re-render every page. This makes sense
for me, since I use Nix, and my builds are pretty much always performed
from scratch. On the other hand, for others, this may not be the best solution.
### Alternatives
The same person who sent me the original email above also pointed out
[this `pandoc` filter for KaTeX](https://github.com/Zaharid/pandoc_static_katex).
I do not use Pandoc, but from what I can see, this fitler relies on
Pandoc's `Math` AST nodes, and applies KaTeX to each of those. This
should work, but wasn't applicable in my case, since Hugo's shrotcodes
don't mix well with Pandoc. However, it certainly seems like a workable
solution.
### Conclusion
With the removal of MathJax from my site, it is now completely JavaScript free,
and contains virtually the same HTML that it did beforehand. This, I hope,
makes it work better on devices where computational power is more limited.
I also hope that it illustrates a general principle - it's very possible,
and plausible, to render LaTeX on the back-end for a static site.

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 476 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 158 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 204 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

View File

@@ -0,0 +1,381 @@
---
title: DELL Is A Horrible Company And You Should Avoid Them At All Costs
date: 2020-07-23T13:40:05-07:00
tags: ["Electronics"]
---
I really do not want this to be a consumer electronics blog. Such things
aren't interesting to me, and nor do I have much knowledge
about them. However, sometimes, ripples from these areas make their way
into my life, and this is one such instance. Let me tell you
{{< sidenote "right" "source-note" "a story" >}}
I originally wrote about this in
<a href="https://www.dell.com/community/XPS/Ridiculously-Bad-Support-Experience/td-p/7554383">a thread on DELL's support website</a>. Some of this post is
going to be adapted from the support website, but some things have happened
since. You will probably notice the change between the terse language I used
in the original post and the fresh text that I'm writing now.
{{< /sidenote >}} of
my experience with DELL and their XPS 2-in-1 laptop, which has gone on since
around January of 2020, and is still going at the time of writing, in July
2020, half a year later.
I was, until recently, an undergraduate student in Computer Science. I will
soon be starting my Masters in Computer Science, too. I say this to make one
thing clear: I need a computer. Not only is it a necessity for my major,
but the majority of my hobbies -- including this blog -- are digital, too.
Since my university is a couple of hours from my home, I travel back and forth
a lot. I also have a cozy little spot in the
{{< sidenote "right" "offices-note" "graduate student offices" >}}
They're a bunch of cubicles in a keycard-protected room, really. Nothing fancy.
{{< /sidenote >}}at my university, but travel by bus, so I find myself spending
roughly equal portions of my work time at home and 'elsewhere'. A laptop
as my primary machine, I thought, made sense. But it had to be a decent one.
Persuaded by one of my instructors, who stressed the importance of vision and
a decent screen, I settled on a DELL XPS, which at the time came with a 4k
display.
As is commonplace, things went great at first. The screen _was_ really nice,
all of my code compiled swiftly, and even the games I occasionally played ran
at a solid 60fps. I was happy with my purchase.
There was one hiccup before things went really downhill, a sort of
foreshadowing of things to come. My trackpad didn't work at peculiar times.
### Prologue: Trackpad Hiccups
While working, booted into Linux, I noticed that my trackpad was having some
trouble. It was stuttering, and occasionally wouldn't work at all for seconds
at a time. I assumed that this was a problem with the trackpad drivers on
Linux, or perhaps the whole system was freezing up. I rebooted, and the
problem went away.
Until it came back.
A few days later, my trackpad was freezing virtually every minute.
It was strange, but fortunately, I'm used to a keyboard-based workflow, and
the malfunctions did not affect me too much. It was just a little troubling.
What soon made it more troubling, was that I noticed this exact same issue
occurring on Windows. To me, this meant one dreadful thing: it was a hardware
issue.
I poked and prodded for a little bit, and finally discovered the cause:
whenever I put my hand on the left palmrest, the trackpad would reliably stop
working. Knowing what the issue was, I called DELL. I spoke to a guy on the
other end, who had me run through diagnostics, driver updates, and BIOS
settings (I imagined this was procedure, so I didn't mind doing the extra
work to make the other guy's job easier). Finally, he scheduled a repair
appointment. A technician came into my house, took off the laptop cover,
and said something along the lines of:
> Now look. They gave me a whole new motherboard and case to replace yours,
but in my personal opinion, this is a bad idea. Things are bound to break
when you do this. See how the replacement case has an insulating piece
of fabric under the left palmrest, and yours doesn't? Why don't we rip
the fabric off the replacement case, and tape it in place on your machine,
without any reassembly?
This man was wiser than any of the other DELL technicians, I now understand.
The repair went without a hitch. He grilled me for going to college instead of
just picking up a trade, which was cheaper and offered more job security.
In the end, I felt a little weird about having a piece of fabric duct taped
inside my computer, but the trackpad had no more issues ever since. All was
well.
### Service Request 1: Broken D Key
All was well, that is, until the middle of winter term. I was typing up an
assignment for a university class. I was working as usual, when I suddenly
noticed that the "d" key stopped working - it had to be pressed rather weird
to register on the computer. I looked down, and discovered that the key had
snapped in half. The top part of the key fell off shortly thereafter.
{{< figure src="brokenkey.jpg" caption="The broken D key shortly after the above events." >}}
At that point, I was more surprised than anything. I hadn't heard of something
like this ever happening, especially under circumstances as normal as typing.
Regardless, I contacted support, and set up a repair appointment. Things only
went downhill from there.
Again, the appointment was scheduled, and only a few days later, another
technician arrived at my house. The only way to repair the key, he said,
was to replace the whole keyboard. They keyboard happens to be located
underneath all the other hardware, and so, the entire laptop had to be
disassembled and reassembled from scratch. He worked for about an hour, and
eventually, he put the machine together. The words of the previous
technician, who wanted to avoid doing exactly what had just been done, echoed
in my head:
> Things are bound to break when you do this.
I asked him to test it, just to make sure everything works. Sure enough,
not everything did work: the machine no longer had sound!
### Service Request 2: No sound
During diagnostics, the laptop did not emit the "beep" it usually does. This
was the first sign. Booting into Windows, the sound icon was crossed out in
red, and no sound was present. Booting into Linux led to similar results.
The microphone on the machine did not seem to work either. The service
technician said that he didn't have the parts to repair it, told me he'd call
it in, and left. Soon after, I got an email asking for times I'm available to
call: I said "any time except for 1-4 pacific time". DELL support proceeded
to call me at 3pm pacific time, when I had no service. Unable to reach me,
they promptly notified me that they are archiving my service request.
This all occurred near finals week at my university, so I had to put the issue
on hold. I had to maintain my grades, and I had to grade heaps of assignments
from other students. Though the lack of sound was annoying, it wasn't as
pressing as preparing for exams, so it was during spring break that I finally
called again, and scheduled the service appointment. By then,
{{< sidenote "right" "pandemic-note" "the pandemic was in full swing," >}}
Just for posterity, in 2020, there had been an outbreak of COVID-19,
a Coronavirus. Many states in the U.S., including my own, issued
the orders for lockdown and social distancing, which meant the closing
of schools, restaurants, and, apparently, the cessation of in-person
repairs.
{{< /sidenote >}}and DELL told me they'd mail me a box to put my laptop in, and
I'd have to mail it off to their service center. Sure, I thought, that's
fine. If it's at the service center, they won't ever "not have the required
parts". I told the tech support person my address, he read it back to me, and
so it was settled.
Until, that is, the box arrived at the wrong address.
I had received the machine as a gift from my family, who purchased the
computer to arrive at their address. The box arrived at that address too,
despite my explicit instructions to have it deliver to my current residence.
Since my family and I live 2 hours apart, it took 4 total hours to get the box
to me (a drive that couldn't be made right away!), and by the time I had it,
DELL was already threatening me again with closing the service request.
Eventually, I was able to mail the machine off, and about 5 business days
later (business days during which I did not have a working machine, which is
very necessary for my school and job) I received it back. I was excited to
have the machine back, but that didn't last very long. As I was using the
computer with Wolfram Mathematica (a rather heavy piece of software running
under Linux), I noticed that it was discharging even while plugged in. I
booted into Windows, and was greeted with a warning, something along the
lines of: "you are using a slow charger. Please use the official adapter".
But I was using the official adapter! I also tried to plug my mouse into the
relevant USB-C port, only to discover that it did not work. I had to make
another service requests.
### Service Request 3: Broken Charging Port
This time, I made sure to tell the person on the other end of the support
call to please send it to my address. I asked if there was anything I can do,
or anyone I can contact, and was told "no, just mail the computer in again."
I obliged. The box arrived at the right address this time, so I was able to
ship it off.
In the "describe your issue" field on the provided form, I begged the
technicians to send me a working machine. "Please", I wrote "Last time I got
a machine back from support, it was still broken. I really need it for school
and work!". 5 business days later, I received the machine back. I plugged it
in to make sure it worked, only to find out . . . that the very same charging
port that I requested be repaired, is still broken! It would've been funny,
if it wasn't infuriating. How is it possible for me to receive a machine from
repairs, without the thing I asked to repair being as much as improved?!
Worse, a day after I received the machine back (I was able to keep using it
thanks to it having two USB-C ports capable of charging), the LCD suddenly
flashed, and started flickering. Thinking it was a software glitch, I
restarted the machine, only to discover the same flickering during the boot
animation and menu. Not only was the charging port not repaired, but now my
LCD was broken! (in the below picture, the screen is meant to be blue, but
the bottom part of the display is purple and flickering).
{{< figure src="brokenlcd.jpg" caption="The broken LCD." >}}
### Service Request 4: Broken LCD
I called in to support again, and they once again told me to ship the machine
off. What's worse, they accused me of breaking the port myself, and told me
this was no longer covered under basic warranty. I had to explain all over
again that the port worked fine before the fateful day the D-key snapped. They
told me they'd "look into it". Eventually, I received a box in the mail. I
wasn't told I would be receiving a box, but that wasn't a big deal. I mailed
off the machine.
The UPS shipping was always the most streamlined part of the process. A day
later, I was told my machine was received intact. Another day, and I was
informed that the technicians are starting to work on it. And then,
a few hours later:
> __Current Status:__
> The part(s) needed to repair your system are not currently in stock.
> __What's Next:__
> In most cases the parts are available is less than five days.
A few days is no big deal, and it made sense that DELL wouldn't just
have screens lying around. So I waited. And waited. And waited. Two weeks
later, I got a little tired of waiting, and called the repair center.
An automated message told me:
> We're currently experiencing heavy call volumes. Please try again later. Goodbye.
And the call was dropped. This happened every time I tried to call, no matter
the hour. The original status update -- the one that notified me about the
part shortage -- came on May 8th, but the machine finally arrived to me
(without prior warning) on June 2nd, almost a month later.
The charging port worked. Sound
worked. The screen wasn't flickering. I was happy for the brief moments that
my computer was loading. As soon as I started vim, though, I noticed something
was off: the fonts looked more pixelated. The DPI settings I'd painstakingly
tweaked were wrong. Now that I thought about it, even the GRUB menu was
larger. My suspicion growing, I booted into Windows, and looked at the display
settings. Noticeably fewer resolutions were listed in the drop-down menu;
worse, the highest resolution was 1080p. After almost a month of waiting,
DELL replaced my 4k laptop display with a 1080p one.
### System Replacement: Worse LCD Screen
I admit, I was angry. At the same time, the absurdity of it all was also
unbearable. Was this constant loop of hardware damage, the endless number of
support calls filled with hoarse jazz music, part of some kind of Kafkaesque
dream? I didn't know. I was at the end of my wits as to what to do. As a last
resort, I made [a tweet](https://twitter.com/DanilaTheWalrus/status/1268056637383692289)
from my almost-abandoned account. DELL Support's Twitter
account [quickly responded](https://twitter.com/DellCares/status/1268064691416334344), eager as always to destroy any semblance of
transparency by switching to private messages. I let them know my thoughts on the matter. I wanted a new machine.
{{< figure src="dm_1.png" caption="The first real exchange between me and DELL support." >}}
Of course we can proceed further. I wanted to know what kind of machine I was getting,
though. As long as it was the same model that I originally bought,
{{< sidenote "right" "replacement-note" "it would be better than what I have." >}}
At least in principle, it would be. Perhaps the wear and tear on the replacement
parts would be greater, but at least I would have, presumably, a machine
in good condition that had the 4k screen that made me buy it in the first place.
{{< /sidenote >}}
Despite this, I knew that the machine I was getting was likely refurbished.
This _had_ to mean that some of the parts would come from other, used, machines.
This irked me, because, well, I payed for a new machine.
{{< figure src="dm_2.png" caption="Ah, the classic use of canned responses." >}}
Their use of the canned response, and their unwillingness to answer this simple
question, was transparent. Indeed, the machine would be made of used
parts. I still wanted to proceed. DELL requested that I sent an image of
my machine which included its service tag, together with a piece of
paper which included my name and email address. I obliged, and quickly got a response:
{{< figure src="dm_3.png" caption="If it was me who was silent for 4 days, my request would've long been cancelled. " >}}
Thanks, Kalpana. You will never hear this name again, not in this post.
Only one or two messages from DELL support are ever from the same person.
About a week later, I get the following beauty:
{{< figure src="dm_4.png" caption="Excuse me? What's going on?" >}}
My initial request was cancelled? Why wasn't I told? What was the reason?
What the heck was going on at DELL Support? Should I be worried?
My question of "Why" was answered with the apt response of "Yes",
and a message meant to pacify me. While this was going on, I ordered
a
{{< sidenote "right" "pinebook-note" "Pinebook Pro." >}}
The Pinebook a $200 machine has, thus far, worked more reliably than any DELL product
I've had the misfortune of owning.
{{< /sidenote >}} It was not a replacement for the DELL machine, but rather
the first step towards migrating my setup to a stationary computer,
and a small, lightweight SSH device. At this point,
there was no more faith in DELL left in my mind.
Soon, DELL required my attention, only to tell me that they put in
a request to see that status of my request. How bureaucratic. Two
more names -- Kareem and JKC -- flickered through the chats,
also never to be seen again.
{{< figure src="dm_5.png" caption="Not much of a conversation, really." >}}
Finally, on July 9th (a month and six days after my first real message to DELL
support), I was notified by my roommates that FedEx tried to deliver a package
to our house, but gave up when no one came to sign for it. On one hand, this
is great: FedEx didn't just leave my laptop on the porch. On the other hand,
though, this was the first time I heard about receiving the machine. I got
to the house the next day, unpacked the new computer, and tested all the things
that had, at one point, failed. Everything seemed to work. I transfered all my
files, wiped the old computer clean, and mailed it off. I also spent some
time dealing with the fallout of DELL PremierColor starting on its own,
and permanently altering the color profile of my display. I don't have the
special, physical calibration device, and therefore still suspect that my
screen is somewhat green.
Today, I discovered that the microphone of the replacement machine didn't work.
### Am I The Problem?
When the mysterious FedEx package arrived at my door on July 9th, I did some
digging to verify my suspicion that it was from DELL. I discovered their
HQ in Lebanon, TN. This gave me an opportunity to
{{< sidenote "right" "reviews-note" "see" >}}
See, of course, modulo whatever bias arises when only those who feel strongly leave reviews.
{{< /sidenote >}} whether or not I was alone in this situation. I was genuinely
worried that I was suffering from the technical variant of
[Munchausen Syndrome](https://www.webmd.com/mental-health/munchausen-syndrome#1),
and that I was compulsively breaking my electronics. These worries were
dispelled by the reviews on Google:
{{< figure src="reviews_1.png" caption="Most of the reviews are pretty terse, but the ratings convey the general idea." >}}
There were even some that were shockingly similar in terms of the apparent
incompetence of the DELL technicians:
{{< figure src="reviews_2.png" caption="Now, now, Maggie, I wouldn't go as far as recommending Apple." >}}
So, this is not uncommon. This is how DELL deals with customers now. It's
awfully tiring, really; I've been in and out of repairs continuously for
almost half a year, now. That's 2.5% of my life at the time of writing,
all non-stop since the D-key. And these people probably have spent considerable
amounts of time, too.
### It's About the Principle
The microphone on my machine is rather inconsequential to me. I can, and regularly do,
teleconference from my phone (a habit that I developed thanks to DELL, since
my computer was so often unavailable). I don't need to dictate anything. Most
of my communication is via chat.
Really, compared to the other issues (keyboard, sound, charging, USB ports, the broken or low-resolution screen)
the microphone is a benign problem. As I have now learned, things could be worse.
But why should the thought, _"It could be worse"_, even cross my mind
when dealing with such a matter? Virtually every issue that has
occurred with my computer thus far could -- should! -- have been diagnosed
at the repair center. The 'slow charger' warning shows up in BIOS,
so just turning the computer on while plugged in should make it obvious something
is wrong; doubly so when the very reason that the laptop was in repairs
in the first place was because of the faulty charger. I refuse to believe
that screens with different resolutions have the same part identifier,
either. How have the standards of service from DELL fallen so low?
How come this absurd scenario plays out not just for me, but
for others as well? It would be comforting, in a way, to think
that I was just the 'exceptional case'. But apparently, I'm not.
This is standard practice.
### Tl;DR
Here are he problems I've had with DELL:
* The machine shipped, apparently, with a missing piece of insulation.
* The "D" key on the keyboard snapped after only a few months of use.
* While repairing the "D" key, the DELL technician broke the computer's sound and microphone.
* While repairing the sound and microphone, the DELL technicians broke a charging port.
* The DELL technicians failed to repair the charging port, mailing me back a machine
exhibiting the same issues, in addition to a broken LCD screen.
* The repair of the LCD screen took almost a month, and concluded
with me receiving a worse quality screen than I originally had.
* The system replacement that followed the botched LCD repair took
over a month to go through.
* The replaced system was made partially of used parts, which
DELL refused to admit.
* The microphone on the replacement system was broken.
### Closing Thoughts
I will not be sending my system in again. It doesn't make sense to do so -
after mailing my system in for repairs three times, I've measured empirically that
the chance of failure is 100%. Every service request is a lottery, dutifully
giving out a random prize of another broken part. I no longer wish to play;
as any person who gambles should, I will quit while I'm ahead, and cut my losses.
However, I hope for this story, which may be unusual in its level of detail,
but not its content, to be seen by seen by someone. I hope to prevent
someone out there from feeling the frustration, and anger, and peculiar amusement
that I felt during this process. I hope for someone else to purchase a computer
with money, and not with their sanity. A guy can hope.
If you're reading this, please take this as a warning. __DELL is a horrible
company. They have the lowest standards of customer support of any
U.S. company that I've encountered. Their technicians are largely incompetent.
Their quality assurance is non-existent. Stay away from them.__

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 227 KiB

View File

@@ -1,95 +0,0 @@
---
title: "Clairvoyance for Good: Using Lazy Evaluation in Haskell"
date: 2020-05-03T20:05:29-07:00
tags: ["Haskell"]
draft: true
---
While tackling a project for work, I ran across a rather unpleasant problem.
I don't think it's valuable to go into the specifics here (it's rather
large and convoluted); however, the outcome of this experience led me to
discover a very interesting technique for lazy functional languages,
and I want to share what I learned.
### Time Traveling
Some time ago, I read [this post](https://kcsongor.github.io/time-travel-in-haskell-for-dummies/) by Csongor Kiss about time traveling in Haskell. It's
really cool, and makes a lot of sense if you have wrapped your head around
lazy evaluation. I'm going to present my take on it here, but please check out
Csongor's original post if you are interested.
Say that you have a list of integers, like `[3,2,6]`. Next, suppose that
you want to find the maximum value in the list. You can implement such
behavior quite simply with pattern matching:
```Haskell
myMax :: [Int] -> Int
myMax [] = error "Being total sucks"
myMax (x:xs) = max x $ myMax xs
```
You could even get fancy with a `fold`:
```Haskell
myMax :: [Int] -> Int
myMax = foldr1 max
```
All is well, and this is rather elementary Haskell. But now let's look at
something that Csongor calls the `repMax` problem:
> Imagine you had a list, and you wanted to replace all the elements of the
> list with the largest element, by only passing the list once.
How can we possibly do this in one pass? First, we need to find the maximum
element, and only then can we have something to replace the other numbers
with! It turns out, though, that we can just expect to have the future
value, and all will be well. Csongor provides the following example:
```Haskell {linenos=table}
repMax :: [Int] -> Int -> (Int, [Int])
repMax [] rep = (rep, [])
repMax [x] rep = (x, [rep])
repMax (l : ls) rep = (m', rep : ls')
where (m, ls') = repMax ls rep
m' = max m l
doRepMax :: [Int] -> [Int]
doRepMax xs = xs'
where (largest, xs') = repMax xs largest
```
In the above snippet, `repMax` expects to receive the maximum value of
its input list. At the same time, it also computes that maximum value,
returning it and the newly created list. `doRepMax` is where the magic happens:
the `where` clauses receives the maximum number from `repMax`, while at the
same time using that maximum number to call `repMax`!
This works because Haskell's evaluation model is, effectively,
[lazy graph reduction](https://en.wikipedia.org/wiki/Graph_reduction). That is,
you can think of Haskell as manipulating your code as
{{< sidenote "right" "tree-note" "a syntax tree," >}}
Why is it called graph reduction, you may be wondering, if the runtime is
manipulating syntax trees? To save on work, if a program refers to the
same value twice, Haskell has both of those references point to the
exact same graph. This violates the tree's property of having only one path
from the root to any node, and makes our program a graph. Graphs that
refer to themselves also violate the properties of a tree.
{{< /sidenote >}} performing
substitutions and simplifications as necessary until it reaches a final answer.
What the lazy part means is that parts of the syntax tree that are not yet
needed to compute the final answer can exist, unsimplied, in the tree. This is
what allows us to write the code above: the graph of `repMax xs largest`
effectively refers to itself. While traversing the list, it places references
to itself in place of each of the elements, and thanks to laziness, these
references are not evaluated.
Let's try a more complicated example. How about instead of creating a new list,
we return a `Map` containing the number of times each number occured, but only
when those numbers were a factor of the maximum numbers. Our expected output
will be:
```
>>> countMaxFactors [1,3,3,9]
fromList [(1, 1), (3, 2), (9, 1)]
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

View File

@@ -0,0 +1,564 @@
---
title: "Time Traveling In Haskell: How It Works And How To Use It"
date: 2020-07-30T00:58:10-07:00
tags: ["Haskell"]
---
I recently got to use a very curious Haskell technique
{{< sidenote "right" "production-note" "in production:" >}}
As production as research code gets, anyway!
{{< /sidenote >}} time traveling. I say this with
the utmost seriousness. This technique worked like
magic for the problem I was trying to solve, and so
I thought I'd share what I learned. In addition
to the technique and its workings, I will also explain how
time traveling can be misused, yielding computations that
never terminate.
### Time Traveling
Some time ago, I read [this post](https://kcsongor.github.io/time-travel-in-haskell-for-dummies/) by Csongor Kiss about time traveling in Haskell. It's
really cool, and makes a lot of sense if you have wrapped your head around
lazy evaluation. I'm going to present my take on it here, but please check out
Csongor's original post if you are interested.
Say that you have a list of integers, like `[3,2,6]`. Next, suppose that
you want to find the maximum value in the list. You can implement such
behavior quite simply with pattern matching:
```Haskell
myMax :: [Int] -> Int
myMax [] = error "Being total sucks"
myMax (x:xs) = max x $ myMax xs
```
You could even get fancy with a `fold`:
```Haskell
myMax :: [Int] -> Int
myMax = foldr1 max
```
All is well, and this is rather elementary Haskell. But now let's look at
something that Csongor calls the `repMax` problem:
> Imagine you had a list, and you wanted to replace all the elements of the
> list with the largest element, by only passing the list once.
How can we possibly do this in one pass? First, we need to find the maximum
element, and only then can we have something to replace the other numbers
with! It turns out, though, that we can just expect to have the future
value, and all will be well. Csongor provides the following example:
```Haskell
repMax :: [Int] -> Int -> (Int, [Int])
repMax [] rep = (rep, [])
repMax [x] rep = (x, [rep])
repMax (l : ls) rep = (m', rep : ls')
where (m, ls') = repMax ls rep
m' = max m l
```
In this example, `repMax` takes the list of integers,
each of which it must replace with their maximum element.
It also takes __as an argument__ the maximum element,
as if it had already been computed. It does, however,
still compute the intermediate maximum element,
in the form of `m'`. Otherwise, where would the future
value even come from?
Thus far, nothing too magical has happened. It's a little
strange to expect the result of the computation to be
given to us; it just looks like wishful
thinking. The real magic happens in Csongor's `doRepMax`
function:
```Haskell
doRepMax :: [Int] -> [Int]
doRepMax xs = xs'
where (largest, xs') = repMax xs largest
```
Look, in particular, on the line with the `where` clause.
We see that `repMax` returns the maximum element of the
list, `largest`, and the resulting list `xs'` consisting
only of `largest` repeated as many times as `xs` had elements.
But what's curious is the call to `repMax` itself. It takes
as input `xs`, the list we're supposed to process... and
`largest`, the value that _it itself returns_.
This works because Haskell's evaluation model is, effectively,
[lazy graph reduction](https://en.wikipedia.org/wiki/Graph_reduction). That is,
you can think of Haskell as manipulating your code as
{{< sidenote "right" "tree-note" "a syntax tree," >}}
Why is it called graph reduction, you may be wondering, if the runtime is
manipulating syntax trees? To save on work, if a program refers to the
same value twice, Haskell has both of those references point to the
exact same graph. This violates the tree's property of having only one path
from the root to any node, and makes our program a DAG (at least). Graph nodes that
refer to themselves (which are also possible in the model) also violate the properties of a
a DAG, and thus, in general, we are working with graphs.
{{< /sidenote >}} performing
substitutions and simplifications as necessary until it reaches a final answer.
What the lazy part means is that parts of the syntax tree that are not yet
needed to compute the final answer can exist, unsimplified, in the tree.
Why don't we draw a few graphs to get familiar with the idea?
### Visualizing Graphs and Their Reduction
Let's start with something that doesn't have anything fancy. We can
take a look at the graph of the expression:
```Haskell
length [1]
```
Stripping away Haskell's syntax sugar for lists, we can write this expression as:
```Haskell
length (1:[])
```
Then, recalling that `(:)`, or 'cons', is just a binary function, we rewrite:
```Haskell
length ((:) 1 [])
```
We're now ready to draw the graph; in this case, it's pretty much identical
to the syntax tree of the last form of our expression:
{{< figure src="length_1.png" caption="The initial graph of `length [1]`." class="small" >}}
In this image, the `@` nodes represent function application. The
root node is an application of the function `length` to the graph that
represents the list `[1]`. The list itself is represented using two
application nodes: `(:)` takes two arguments, the head and tail of the
list, and function applications in Haskell are
[curried](https://en.wikipedia.org/wiki/Currying). Eventually,
in the process of evaluation, the body of `length` will be reached,
and leave us with the following graph:
{{< figure src="length_2.png" caption="The graph of `length [1]` after the body of `length` is expanded." class="small" >}}
Conceptually, we only took one reduction step, and thus, we haven't yet gotten
to evaluating the recursive call to `length`. Since `(+)`
is also a binary function, `1+length xs` is represented in this
new graph as two applications of `(+)`, first to `1`, and then
to `length []`.
But what is that box at the root? This box _used to be_ the root of the
first graph, which was an application node. However, it is now a
an _indirection_. Conceptually, reducing
this indirection amounts to reducing the graph
it points to. But why have we {{< sidenote "right" "altered-note" "altered the graph" >}}
This is a key aspect of implementing functional languages.
The language itself may be pure, while the runtime
can be, and usually is, impure and stateful. After all,
computers are impure and stateful, too!
{{< /sidenote >}} in this manner? Because Haskell is a pure language,
of course! If we know that a particular graph reduces to some value,
there's no reason to reduce it again. However, as we will
soon see, it may be _used_ again, so we want to preserve its value.
Thus, when we're done reducing a graph, we replace its root node with
an indirection that points to its result.
When can a graph be used more than once? Well, how about this:
```Haskell
let x = square 5 in x + x
```
Here, the initial graph looks as follows:
{{< figure src="square_1.png" caption="The initial graph of `let x = square 5 in x + x`." class="small" >}}
As you can see, this _is_ a graph, but not a tree! Since both
variables `x` refer to the same expression, `square 5`, they
are represented by the same subgraph. Then, when we evaluate `square 5`
for the first time, and replace its root node with an indirection,
we end up with the following:
{{< figure src="square_2.png" caption="The graph of `let x = square 5 in x + x` after `square 5` is reduced." class="small" >}}
There are two `25`s in the graph, and no more `square`s! We only
had to evaluate `square 5` exactly once, even though `(+)`
will use it twice (once for the left argument, and once for the right).
Our graphs can also include cycles.
A simple, perhaps _the most_ simple example of this in practice is Haskell's
`fix` function. It computes a function's fixed point,
{{< sidenote "right" "fixpoint-note" "and can be used to write recursive functions." >}}
In fact, in the lambda calculus, <code>fix</code> is pretty much <em>the only</em>
way to write recursive functions. In the untyped lambda calculus, it can
be written as: $$\lambda f . (\lambda x . f (x \ x)) \ (\lambda x . f (x \ x))$$
In the simply typed lambda calculus, it cannot be written in any way, and
needs to be added as an extension, typically written as \(\textbf{fix}\).
{{< /sidenote >}}
It's implemented as follows:
```Haskell
fix f = let x = f x in x
```
See how the definition of `x` refers to itself? This is what
it looks like in graph form:
{{< figure src="fixpoint_1.png" caption="The initial graph of `let x = f x in x`." class="tiny" >}}
I think it's useful to take a look at how this graph is processed. Let's
pick `f = (1:)`. That is, `f` is a function that takes a list,
and prepends `1` to it. Then, after constructing the graph of `f x`,
we end up with the following:
{{< figure src="fixpoint_2.png" caption="The graph of `fix (1:)` after it's been reduced." class="small" >}}
We see the body of `f`, which is the application of `(:)` first to the
constant `1`, and then to `f`'s argument (`x`, in this case). As
before, once we evaluated `f x`, we replaced the application with
an indirection; in the image, this indirection is the top box. But the
argument, `x`, is itself an indirection which points to the root of `f x`,
thereby creating a cycle in our graph. Traversing this graph looks like
traversing an infinite list of `1`s.
Almost there! A node can refer to itself, and, when evaluated, it
is replaced with its own value. Thus, a node can effectively reference
its own value! The last piece of the puzzle is how a node can access
_parts_ of its own value: recall that `doRepMax` calls `repMax`
with only `largest`, while `repMax` returns `(largest, xs')`.
I have to admit, I don't know the internals of GHC, but I suspect
this is done by translating the code into something like:
```Haskell
doRepMax :: [Int] -> [Int]
doRepMax xs = snd t
where t = repMax xs (fst t)
```
#### Detailed Example: Reducing `doRepMax`
If the above examples haven't elucidated how `doRepMax` works,
stick around in this section and we will go through it step-by-step.
This is a rather long and detailed example, so feel free to skip
this section to read more about actually using time traveling.
If you're sticking around, why don't we watch how the graph of `doRepMax [1, 2]` unfolds.
This example will be more complex than the ones we've seen
so far; to avoid overwhelming ourselves with notation,
let's adopt a different convention of writing functions. Instead
of using application nodes `@`, let's draw an application of a
function `f` to arguments `x1` through `xn` as a subgraph with root `f`
and children `x`s. The below figure demonstrates what I mean:
{{< figure src="notation.png" caption="The new visual notation used in this section." class="small" >}}
Now, let's write the initial graph for `doRepMax [1,2]`:
{{< figure src="repmax_1.png" caption="The initial graph of `doRepMax [1,2]`." class="small" >}}
Other than our new notation, there's nothing too surprising here.
The first step of our hypothetical reduction would replace the application of `doRepMax` with its
body, and create our graph's first cycle. At a high level, all we want is the second element of the tuple
returned by `repMax`, which contains the output list. To get
the tuple, we apply `repMax` to the list `[1,2]` and the first element
of its result. The list `[1,2]` itself
consists of two uses of the `(:)` function.
{{< figure src="repmax_2.png" caption="The first step of reducing `doRepMax [1,2]`." class="small" >}}
Next, we would also expand the body of `repMax`. In
the following diagram, to avoid drawing a noisy amount of
crossing lines, I marked the application of `fst` with
a star, and replaced the two edges to `fst` with
edges to similar looking stars. This is merely
a visual trick; an edge leading to a little star is
actually an edge leading to `fst`. Take a look:
{{< figure src="repmax_3.png" caption="The second step of reducing `doRepMax [1,2]`." class="medium" >}}
Since `(,)` is a constructor, let's say that it doesn't
need to be evaluated, and that its
{{< sidenote "right" "normal-note" "graph cannot be reduced further" >}}
A graph that can't be reduced further is said to be in <em>normal form</em>,
by the way.
{{< /sidenote >}} (in practice, other things like
packing may occur here, but they are irrelevant to us).
If `(,)` can't be reduced, we can move on to evaluating `snd`. Given a pair, `snd`
simply returns the second element, which in our
case is the subgraph starting at `(:)`. We
thus replace the application of `snd` with an
indirection to this subgraph. This leaves us
with the following:
{{< figure src="repmax_4.png" caption="The third step of reducing `doRepMax [1,2]`." class="medium" >}}
Since it's becoming hard to keep track of what part of the graph
is actually being evaluated, I marked the former root of `doRepMax [1,2]` with
a blue star. If our original expression occured at the top level,
the graph reduction would probably stop here. After all,
we're evaluating our graphs using call-by-need, and there
doesn't seem to be a need for knowing the what the arguments of `(:)` are.
However, stopping at `(:)` wouldn't be very interesting,
and we wouldn't learn much from doing so. So instead, let's assume
that _something_ is trying to read the elements of our list;
perhaps we are trying to print this list to the screen in GHCi.
In this case, our mysterious external force starts unpacking and
inspecting the arguments to `(:)`. The first argument to `(:)` is
the list's head, which is the subgraph starting with the starred application
of `fst`. We evaluate it in a similar manner to `snd`. That is,
we replace this `fst` with an indirection to the first element
of the argument tuple, which happens to be the subgraph starting with `max`:
{{< figure src="repmax_5.png" caption="The fourth step of reducing `doRepMax [1,2]`." class="medium" >}}
Phew! Next, we need to evaluate the body of `max`. Let's make one more
simplification here: rather than substitututing `max` for its body
here, let's just reason about what evaluating `max` would entail.
We would need to evaluate its two arguments, compare them,
and return the larger one. The argument `1` can't be reduced
any more (it's just a number!), but the second argument,
a call to `fst`, needs to be processed. To do so, we need to
evaluate the call to `repMax`. We thus replace `repMax`
with its body:
{{< figure src="repmax_6.png" caption="The fifth step of reducing `doRepMax [1,2]`." class="medium" >}}
We've reached one of the base cases here, and there
are no more calls to `max` or `repMax`. The whole reason
we're here is to evaluate the call to `fst` that's one
of the arguments to `max`. Given this graph, doing so is easy.
We can clearly see that `2` is the first element of the tuple
returned by `repMax [2]`. We thus replace `fst` with
an indirection to this node:
{{< figure src="repmax_7.png" caption="The sixth step of reducing `doRepMax [1,2]`." class="medium" >}}
This concludes our task of evaluating the arguments to `max`.
Comparing them, we see that `2` is greater than `1`, and thus,
we replace `max` with an indirection to `2`:
{{< figure src="repmax_8.png" caption="The seventh step of reducing `doRepMax [1,2]`." class="medium" >}}
The node that we starred in our graph is now an indirection (the
one that used to be the call to `fst`) which points to
another indirection (formerly the call to `max`), which
points to `2`. Thus, any edge pointing to a star now
points to the value 2.
By finding the value of the starred node, we have found the first
argument of `(:)`, and returned it to our mysterious external force.
If we were printing to GHCi, the number `2` would appear on the screen
right about now. The force then moves on to the second argument of `(:)`,
which is the call to `snd`. This `snd` is applied to an instance of `(,)`, which
can't be reduced any further. Thus, all we have to do is take the second
element of the tuple, and replace `snd` with an indirection to it:
{{< figure src="repmax_9.png" caption="The eighth step of reducing `doRepMax [1,2]`." class="medium" >}}
The second element of the tuple was a call to `(:)`, and that's what the mysterious
force is processing now. Just like it did before, it starts by looking at the
first argument of this list, which is the list's head. This argument is a reference to
the starred node, which, as we've established, eventually points to `2`.
Another `2` pops up on the console.
Finally, the mysterious force reaches the second argument of the `(:)`,
which is the empty list. The empty list also cannot be evaluated any
further, so that's what the mysterious force receives. Just like that,
there's nothing left to print to the console. The mysterious force ceases.
After removing the unused nodes, we are left with the following graph:
{{< figure src="repmax_10.png" caption="The result of reducing `doRepMax [1,2]`." class="small" >}}
As we would have expected, two `2`s were printed to the console, and our
final graph represents the list `[2,2]`.
### Using Time Traveling
Is time tarveling even useful? I would argue yes, especially
in cases where Haskell's purity can make certain things
difficult.
As a first example, Csongor provides an assembler that works
in a single pass. The challenge in this case is to resolve
jumps to code segments occuring _after_ the jump itself;
in essence, the address of the target code segment needs to be
known before the segment itself is processed. Csongor's
code uses the [Tardis monad](https://hackage.haskell.org/package/tardis-0.4.1.0/docs/Control-Monad-Tardis.html),
which combines regular state, to which you can write and then
later read from, and future state, from which you can
read values before your write them. Check out
[his complete example](https://kcsongor.github.io/time-travel-in-haskell-for-dummies/#a-single-pass-assembler-an-example) here.
Alternatively, here's an example from my research, which my
coworker and coauthor Kai helped me formulate. I'll be fairly
vague, since all of this is still in progress. The gist is that
we have some kind of data structure (say, a list or a tree),
and we want to associate with each element in this data
structure a 'score' of how useful it is. There are many possible
heuristics of picking 'scores'; a very simple one is
to make it inversely propertional to the number of times
an element occurs. To be more concrete, suppose
we have some element type `Element`:
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 5 6 >}}
Suppose also that our data structure is a binary tree:
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 14 16 >}}
We then want to transform an input `ElementTree`, such as:
```Haskell
Node A (Node A Empty Empty) Empty
```
Into a scored tree, like:
```Haskell
Node (A,0.5) (Node (A,0.5) Empty Empty) Empty
```
Since `A` occured twice, its score is `1/2 = 0.5`.
Let's define some utility functions before we get to the
meat of the implementation:
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 8 12 >}}
The `addElement` function simply increments the counter for a particular
element in the map, adding the number `1` if it doesn't exist. The `getScore`
function computes the score of a particular element, defaulting to `1.0` if
it's not found in the map.
Just as before -- noticing that passing around the future values is getting awfully
bothersome -- we write our scoring function as though we have
a 'future value'.
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 18 24 >}}
The actual `doAssignScores` function is pretty much identical to
`doRepMax`:
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 26 28 >}}
There's quite a bit of repetition here, especially in the handling
of future values - all of our functions now accept an extra
future argument, and return a work-in-progress future value.
This is what the `Tardis` monad, and its corresponding
`TardisT` monad transformer, aim to address. Just like the
`State` monad helps us avoid writing plumbing code for
forward-traveling values, `Tardis` helps us do the same
for backward-traveling ones.
#### Cycles in Monadic Bind
We've seen that we're able to write code like the following:
```Haskell
(a, b) = f a c
```
That is, we were able to write function calls that referenced
their own return values. What if we try doing this inside
a `do` block? Say, for example, we want to sprinkle some time
traveling into our program, but don't want to add a whole new
transformer into our monad stack. We could write code as follows:
```Haskell
do
(a, b) <- f a c
return b
```
Unfortunately, this doesn't work. However, it's entirely
possible to enable this using the `RecursiveDo` language
extension:
```Haskell
{-# LANGUAGE RecursiveDo #-}
```
Then, we can write the above as follows:
```Haskell
do
rec (a, b) <- f a c
return b
```
This power, however, comes at a price. It's not as straightforward
to build graphs from recursive monadic computations; in fact,
it's not possible in general. The translation of the above
code uses `MonadFix`. A monad that satisfies `MonadFix` has
an operation `mfix`, which is the monadic version of the `fix`
function we saw earlier:
```Haskell
mfix :: Monad m => (a -> m a) -> m a
-- Regular fix, for comparison
fix :: (a -> a) -> a
```
To really understand how the translation works, check out the
[paper on recursive do notation](http://leventerkok.github.io/papers/recdo.pdf).
### Beware The Strictness
Though Csongor points out other problems with the
time traveling approach, I think he doesn't mention
an important idea: you have to be _very_ careful about introducing
strictness into your programs when running time-traveling code.
For example, suppose we wanted to write a function,
`takeUntilMax`, which would return the input list,
cut off after the first occurence of the maximum number.
Following the same strategy, we come up with:
{{< codelines "Haskell" "time-traveling/TakeMax.hs" 1 12 >}}
In short, if we encounter our maximum number, we just return
a list of that maximum number, since we do not want to recurse
further. On the other hand, if we encounter a number that's
_not_ the maximum, we continue our recursion.
Unfortunately, this doesn't work; our program never terminates.
You may be thinking:
> Well, obviously this doesn't work! We didn't actually
compute the maximum number properly, since we stopped
recursing too early. We need to traverse the whole list,
and not just the part before the maximum number.
To address this, we can reformulate our `takeUntilMax`
function as follows:
{{< codelines "Haskell" "time-traveling/TakeMax.hs" 14 21 >}}
Now we definitely compute the maximum correctly! Alas,
this doesn't work either. The issue lies on lines 5 and 18,
more specifically in the comparison `x == m`. Here, we
are trying to base the decision of what branch to take
on a future value. This is simply impossible; to compute
the value, we need to know the value!
This is no 'silly mistake', either! In complicated programs
that use time traveling, strictness lurks behind every corner.
In my research work, I was at one point inserting a data structure into
a set; however, deep in the structure was a data type containing
a 'future' value, and using the default `Eq` instance!
Adding the data structure to a set ended up invoking `(==)` (or perhaps
some function from the `Ord` typeclass),
which, in turn, tried to compare the lazily evaluated values.
My code therefore didn't terminate, much like `takeUntilMax`.
Debugging time traveling code is, in general,
a pain. This is especially true since future values don't look any different
from regular values. You can see it in the type signatures
of `repMax` and `takeUntilMax`: the maximum number is just an `Int`!
And yet, trying to see what its value is will kill the entire program.
As always, remember Brian W. Kernighan's wise words:
> Debugging is twice as hard as writing the code in the first place.
Therefore, if you write the code as cleverly as possible, you are,
by definition, not smart enough to debug it.
### Conclusion
This is about it! In a way, time traveling can make code performing
certain operations more expressive. Furthermore, even if it's not groundbreaking,
thinking about time traveling is a good exercise to get familiar
with lazy evaluation in general. I hope you found this useful!

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 132 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

View File

@@ -12,7 +12,7 @@ __py-starbound__, nicely enough, actually has a file named `FORMATS.md`. This fi
> This section will contain information on how to retrieve a value from a BTreeDB5 database.
Not very helpful. Before I go into what I managed to determine from the code, we may first take a look at one thing that we already know about the world format - it is a [B-Tree](https://en.wikipedia.org/wiki/B-tree).
## Binary Search Trees
### Binary Search Trees
The B-Tree is a generalization of a Binary Search Tree, or BST for short. Binary Search trees (and B-Trees in general) operate on data that can be ordered consistently, the simplest example being numbers. For instance, as an example, I'll be using a BST that holds integers. A BST is made up of nodes, objects that actually hold the pieces of data that the tree itself organizes.
In a BST, the nodes are organized in a simple way. Each node can have up to two _children_ (sub-nodes), and each of those can have up to two children, etc. The children are generally classified as _right_ and _left_. Conventionally, left children always have a value that is below (or comes before) the value of the node whose child they are (their _parent_), and right children have a bigger value.
@@ -45,7 +45,7 @@ __Although the average efficiency of a Binary Search Tree is \\(O(\log n)\\), me
This isn't good enough, and many clever algorithms have been invented to speed up the lookup of the tree by making sure that it remains _balanced_ - that is, it _isn't_ arranged like a simple list. Some of these algorithms include [Red-Black Trees](https://en.wikipedia.org/wiki/Red%E2%80%93black_tree), [AVL Trees](https://en.wikipedia.org/wiki/AVL_tree), and, of course, B-Trees.
## B-Trees
### B-Trees
B-Trees are a generalization of Binary Search Trees. That means that every Binary Search Tree is a B-Tree, but not all B-Trees are BSTs. The key difference lies in the fact that B-Trees' nodes aren't limited to having only two child nodes, and can also have more than one value.
Each B-Tree node is a sorted array of values. That is, instead of a single number like the BST that we've looked at, it has multiple, and these numbers _must_ be sorted. Below are some examples of B-Tree nodes:
@@ -64,7 +64,7 @@ This is solved using another property of B-Trees - the number of children of a n
If we were looking for the number 15, we'd look between the 10 and the 20, examining the 2nd node, and if we were looking for 45 we'd look past the 30, at the 4th node.
## Starbound B-Trees and BTreeDB5
### Starbound B-Trees and BTreeDB5
The BTreeDB5 data structure uses something other than integers for its keys - it uses sequences of bytes. These bytes are compared in a very similar fashion to integers. The game first looks at the first number in the sequence of bytes (like the largest digit in an integer), and if that's the same, moves on to the next one. Also, Starbound B-Trees not only have the values, or _keys_, that they use to find data, but the data itself.
The "nodes" in the BTreeDB are called "blocks" and are one of three types - "index", "leaf", and "free" nodes. "Index" nodes are like the `(10, 20, 30)` node in the above example - they point to other nodes, but actually store no data themselves. The "leaf" nodes actually contain the data, and, if that data is longer than the maximum block size, "leaf" nodes contain the index of the next leaf node where the user might continue to read the data. The "free" nodes are simply free data, empty and ready for Starbound to fill them with something useful.

View File

@@ -0,0 +1,375 @@
---
title: Meaningfully Typechecking a Language in Idris, Revisited
date: 2020-07-22T14:37:35-07:00
tags: ["Idris"]
---
Some time ago, I wrote a post titled [Meaningfully Typechecking a Language in Idris]({{< relref "typesafe_interpreter.md" >}}). The gist of the post was as follows:
* _Programming Language Fundamentals_ students were surprised that, despite
having run their expression through (object language) typechecking, they still had to
have a `Maybe` type in their evaluation functions. This was due to
the fact that the (meta language) type system was not certain that
(object language) typechecking worked.
* A potential solution was to write separate expression types such
as `ArithExpr` and `BoolExpr`, which are known to produce booleans
or integers. However, this required the re-implementation of most
of the logic for `IfElse`, for which the branches could have integers,
booleans, or strings.
* An alternative solution was to use dependent types, and index
the `Expr` type with the type it evaluates to. We defined a data type
`data ExprType = IntType | StringType | BoolType`, and then were able
to write types like `SafeExpr IntType` that we _knew_ would evaluate
to an integer, or `SafeExpr BoolType`, which we also _knew_ would
evaluate to a boolean. We then made our `typecheck` function
return a pair of `(type, SafeExpr of that type)`.
Unfortunately, I think that post is rather incomplete. I noted
at the end of it that I was not certain on how to implement
if-expressions, which were my primary motivation for not just
sticking with `ArithExpr` and `BoolExpr`. It didn't seem too severe
then, but now I just feel like a charlatan. Today, I decided to try
again, and managed to figure it out with the excellent help from
people in the `#idris` channel on Freenode. It required a more
advanced use of dependent types: in particular, I ended up using
Idris' theorem proving facilities to get my code to pass typechecking.
In this post, I will continue from where we left off in the previous
post, adding support for if-expressions.
Let's start with the new `Expr` and `SafeExpr` types. Here they are:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV2.idr" 37 49 >}}
For `Expr`, the `IfElse` constructor is very straightforward. It takes
three expressions: the condition, the 'then' branch, and the 'else' branch.
With `SafeExpr` and `IfThenElse`, things are more rigid. The condition
of the expression has to be of a boolean type, so we make the first argument
`SafeExpr BoolType`. Also, the two branches of the if-expression have to
be of the same type. We encode this by making both of the input expressions
be of type `SafeExpr t`. Since the result of the if-expression will be
the output of one of the branches, the whole if-expression is also
of type `SafeExpr t`.
### What Stumped Me: Equality
Typechecking if-expressions is where things get interesting. First,
we want to require that the condition of the expression evaluates
to a boolean. For this, we can write a function `requireBool`,
that takes a dependent pair produced by `typecheck`. This
function does one of two things:
* If the dependent pair contains a `BoolType`, and therefore also an expression
of type `SafeExpr BoolType`, `requireBool` succeeds, and returns the expression.
* If the dependent pair contains any type other than `BoolType`, `requireBool`
fails with an error message. Since we're using `Either` for error handling,
this amounts to using the `Left` constructor.
Such a function is quite easy to write:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV2.idr" 58 60 >}}
We can then write all of the recursive calls to `typecheck` as follows:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV2.idr" 71 75 >}}
Alright, so we have the types of the `t` and `e` branches. All we have to
do now is use `(==)`. We could implement `(==)` as follows:
```Idris
implementation Eq ExprType where
IntType == IntType = True
BoolType == BoolType = True
StringType == StringType = True
_ == _ = False
```
Now we're golden, right? We can just write the following:
```Idris {linenos=table, linenostart=76}
if tt == et
then pure (_ ** IfThenElse ce te ee)
else Left "Incompatible branch types."
```
No, this is not quire right. Idris complains:
```
Type mismatch between et and tt
```
Huh? But we just saw that `et == tt`! What's the problem?
The problem is, in fact, that `(==)` is meaningless as far
as the Idris typechecker is concerned. We could have just
as well written,
```Idris
implementation Eq ExprType where
_ == _ = True
```
This would tell us that `IntType == BoolType`. But of course,
`SafeExpr IntType` is not the same as `SafeExpr BoolType`; I
would be very worried if the typechecker allowed me to assert
otherwise. There is, however, a kind of equality that we can
use to convince the Idris typechecker that two types are the
same. This equality, too, is a type.
### Curry-Howard Correspondence
Spend enough time learning about Programming Language Theory, and
you will hear the term _Curry Howard Correspondence_. If you're
the paper kind of person, I suggest reading Philip Wadler's
_Propositions as Types_ paper. Alternatively, you can take a look
at _Logical Foundations_' [Proof Objects](https://softwarefoundations.cis.upenn.edu/lf-current/ProofObjects.html)
chapter. I will give a very brief
explanation here, too, for the sake of completeness. The general
gist is as follows: __propositions (the logical kind) correspond
to program types__, and proofs of the propositions correspond
to values of the types.
To get settled into this idea, let's look at a few 'well-known' examples:
* `(A,B)`, the tuple of two types `A` and `B` is equivalent to the
proposition \\(A \land B\\), which means \\(A\\) and \\(B\\). Intuitively,
to provide a proof of \\(A \land B\\), we have to provide the proofs of
\\(A\\) and \\(B\\).
* `Either A B`, which contains one of `A` or `B`, is equivalent
to the proposition \\(A \lor B\\), which means \\(A\\) or \\(B\\).
Intuitively, to provide a proof that either \\(A\\) or \\(B\\)
is true, we need to provide one of them.
* `A -> B`, the type of a function from `A` to `B`, is equivalent
to the proposition \\(A \rightarrow B\\), which reads \\(A\\)
implies \\(B\\). We can think of a function `A -> B` as creating
a proof of `B` given a proof of `A`.
Now, consider Idris' unit type `()`:
```Idris
data () = ()
```
This type takes no arguments, and there's only one way to construct
it. We can create a value of type `()` at any time, by just writing `()`.
This type is equivalent to \\(\\text{true}\\): only one proof of it exists,
and it requires no premises. It just is.
Consider also the type `Void`, which too is present in Idris:
```Idris
-- Note: this is probably not valid code.
data Void = -- Nothing
```
The type `Void` has no constructors: it's impossible
to create a value of this type, and therefore, it's
impossible to create a proof of `Void`. Thus, as you may have guessed, `Void`
is equivalent to \\(\\text{false}\\).
Finally, we get to a more complicated example:
```Idris
data (=) : a -> b -> Type where
Refl : x = x
```
This defines `a = b` as a type, equivalent to the proposition
that `a` is equal to `b`. The only way to construct such a type
is to give it a single value `x`, creating the proof that `x = x`.
This makes sense: equality is reflexive.
This definition isn't some loosey-goosey boolean-based equality! If we can construct a value of
type `a = b`, we can prove to Idris' typechecker that `a` and `b` are equivalent. In
fact, Idris' standard library gives us the following function:
```Idris
replace : {a:_} -> {x:_} -> {y:_} -> {P : a -> Type} -> x = y -> P x -> P y
```
This reads, given a type `a`, and values `x` and `y` of type `a`, if we know
that `x = y`, then we can rewrite any proposition in terms of `x` into
another, also valid proposition in terms of `y`. Let's make this concrete.
Suppose `a` is `Int`, and `P` (the type of which is now `Int -> Type`),
is `Even`, a proposition that claims that its argument is even.
{{< sidenote "right" "specialize-note" "Then, we have:" >}}
I'm only writing type signatures for <code>replace'</code>
to avoid overloading. There's no need to define a new function;
<code>replace'</code> is just a specialization of <code>replace</code>,
so we can use the former anywhere we can use the latter.
{{< /sidenote >}}
```Idris
replace' : {x : Int} -> {y : Int} -> x = y -> Even x -> Even y
```
That is, if we know that `x` is equal to `y`, and we know that `x` is even,
it follows that `y` is even too. After all, they're one and the same!
We can take this further. Recall:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV2.idr" 44 44 >}}
We can therefore write:
```Idris
replace'' : {x : ExprType} -> {y : ExprType} -> x = y -> SafeExpr x -> SafeExpr y
```
This is exactly what we want! Given a proof that one `ExprType`, `x`, is equal to
another `ExprType`, `y`, we can safely convert `SafeExpr x` to `SafeExpr y`.
We will use this to convince the Idris typechecker to accept our program.
### First Attempt: `Eq` implies Equality
It's pretty trivial to see that we _did_ define `(==)` correctly (`IntType` is equal
to `IntType`, `StringType` is equal to `StringType`, and so on). Thus,
if we know that `x == y` is `True`, it should follow that `x = y`. We can thus
define the following proposition:
```Idris
eqCorrect : {a : ExprType} -> {b : ExprType} -> (a == b = True) -> a = b
```
We will see shortly why this is _not_ the best solution, and thus, I won't bother
creating a proof / implementation for this proposition / function.
It reads:
> If we have a proof that `(==)` returned true for some `ExprType`s `a` and `b`,
it must be that `a` is the same as `b`.
We can then define a function to cast
a `SafeExpr a` to `SafeExpr b`, given that `(==)` returned `True` for some `a` and `b`:
```Idris
safeCast : {a : ExprType} -> {b : ExprType} -> (a == b = True) -> SafeExpr a -> SafeExpr b
safeCast h e = replace (eqCorrect h) e
```
Awesome! All that's left now is to call `safeCast` from our `typecheck` function:
```Idris {linenos=table, linenostart=76}
if tt == et
then pure (_ ** IfThenElse ce te (safeCast ?uhOh ee))
else Left "Incompatible branch types."
```
No, this doesn't work after all. What do we put for `?uhOh`? We need to have
a value of type `tt == et = True`, but we don't have one. Idris' own if-then-else
expressions do not provide us with such proofs about their conditions. The awesome
people at `#idris` pointed out that the `with` clause can provide such a proof.
We could therefore write:
```Idris
createIfThenElse ce (tt ** et) (et ** ee) with (et == tt) proof p
| True = pure (tt ** IfThenElse ce te (safeCast p ee))
| False = Left "Incompatible branch types."
```
Here, the `with` clause effectively adds another argument equal to `(et == tt)` to `createIfThenElse`,
and tries to pattern match on its value. When we combine this with the `proof` keyword,
Idris will give us a handle to a proof, named `p`, that asserts the new argument
evaluates to the value in the pattern match. In our case, this is exactly
the proof we need to give to `safeCast`.
However, this is ugly. Idris' `with` clause only works at the top level of a function,
so we have to define a function just to use it. It also shows that we're losing
information when we call `(==)`, and we have to reconstruct or recapture it using
some other means.
### Second Attempt: Decidable Propositions
More awesome folks over at `#idris` pointed out that the whole deal with `(==)`
is inelegant; they suggested I use __decidable propositions__, using the `Dec` type.
The type is defined as follows:
```Idris
data Dec : Type -> Type where
Yes : (prf : prop) -> Dec prop
No : (contra : prop -> Void) -> Dec prop
```
There are two ways to construct a value of type `Dec prop`:
* We use the `Yes` constructor, which means that the proposition `prop`
is true. To use this constructor, we have to give it a proof of `prop`,
called `prf` in the constructor.
* We use the `No` constructor, which means that the proposition `prop`
is false. We need a proof of type `prop -> Void` to represent this:
if we have a proof of `prop`, we arrive at a contradiction.
This combines the nice `True` and `False` of `Bool`, with the
'real' proofs of the truthfulness or falsity. At the moment
that we would have been creating a boolean, we also create
a proof of that boolean's value. Thus, we don't lose information.
Here's how we can go about this:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV2.idr" 20 29 >}}
We pattern match on the input expression types. If the types are the same, we return
`Yes`, and couple it with `Refl` (since we've pattern matched on the types
in the left-hand side of the function definition, the typechecker has enough
information to create that `Refl`). On the other hand, if the expression types
do not match, we have to provide a proof that their equality would be absurd.
For this we use helper functions / theorems like `intBoolImpossible`:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV2.idr" 11 12 >}}
I'm not sure if there's a better way of doing this than using `impossible`.
This does the job, though: Idris understands that there's no way we can get
an input of type `IntType = BoolType`, and allows us to skip writing a right-hand side.
We can finally use this new `decEq` function in our type checker:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV2.idr" 76 78 >}}
Idris is happy with this! We should also add `IfThenElse` to our `eval` function.
This part is very easy:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV2.idr" 80 85 >}}
Since the `c` part of the `IfThenElse` is indexed with `BoolType`, we know
that evaluating it will give us a boolean. Thus, we can use that
directly in the Idris if-then-else expression. Let's try this with a few
expressions:
```Idris
BinOp Add (IfElse (BoolLit True) (IntLit 6) (IntLit 7)) (BinOp Multiply (IntLit 160) (IntLit 2))
```
This evaluates `326`, as it should. What if we make the condition non-boolean?
```Idris
BinOp Add (IfElse (IntLit 1) (IntLit 6) (IntLit 7)) (BinOp Multiply (IntLit 160) (IntLit 2))
```
Our typechecker catches this, and we end up with the following output:
```
Type error: Not a boolean.
```
Alright, let's make one of the branches of the if-expression be a boolean, while the
other remains an integer.
```Idris
BinOp Add (IfElse (BoolLit True) (BoolLit True) (IntLit 7)) (BinOp Multiply (IntLit 160) (IntLit 2))
```
Our typechecker catches this, too:
```
Type error: Incompatible branch types.
```
### Conclusion
I think this is a good approach. Should we want to add more types to our language, such as tuples,
lists, and so on, we will be able to extend our `decEq` approach to construct more complex equality
proofs, and keep the `typecheck` method the same. Had we not used this approach,
and instead decided to pattern match on types inside of `typecheck`, we would've quickly
found that this only works for types with finitely many values. When we add polymorphic tuples
and lists, we start being able to construct an arbitrary number of types: `[a]`. `[[a]]`, and
so on. Then, we cease to be able to enumerate all possible pairs of types, and require a recursive
solution. I think that this leads us back to `decEq`.
I also hope that I've now redeemed myself as far as logical arguments go. We used dependent types
and made our typechecking function save us from error-checking during evaluation. We did this
without having to manually create different types of expressions like `ArithExpr` and `BoolExpr`,
and without having to duplicate any code.
That's all I have for today, thank you for reading! As always, you can check out the
[full source code for the typechecker and interpreter](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/typesafe-interpreter/TypesafeIntrV2.idr) on my Git server.

View File

@@ -0,0 +1,217 @@
---
title: Meaningfully Typechecking a Language in Idris, With Tuples
date: 2020-08-11T19:57:26-07:00
tags: ["Idris"]
draft: true
---
Some time ago, I wrote a post titled
[Meaningfully Typechecking a Language in Idris]({{< relref "typesafe_interpreter.md" >}}).
I then followed it up with
[Meaningfully Typechecking a Language in Idris, Revisited]({{< relref "typesafe_interpreter_revisited.md" >}}).
In these posts, I described a hypothetical
way of 'typechecking' an expression data type `Expr` into a typesafe form `SafeExpr`.
A `SafeExpr` can be evaluated without any code to handle type errors,
since it's by definition impossible to construct ill-typed expressions using
it. In the first post, we implemented the method only for simple arithmetic
expressions; in my latter post, we extended this to support `if`-expressions.
Near the end of the post, I made the following comment:
> When we add polymorphic tuples and lists, we start being able to construct an
arbitrary number of types: `[a]`. `[[a]]`, and so on. Then, we cease to be able t
enumerate all possible pairs of types, and require a recursive solution. I think
that this leads us back to [our method].
Recently, I thought about this some more, and decided that it's rather simple
to add tuples into our little language. The addition of tuples mean that our
language will have an infinite number of possible types. We would have
`Int`, `(Int, Int)`, `((Int, Int), Int)`, and so on. This would make it
impossible to manually test every possible case in our typechecker,
but our approach of returning `Dec (a = b)` will work just fine.
### Extending The Syntax
First, let's extend our existing language with expressions for
tuples. For simplicity, let's use pairs `(a,b)` instead of general
`n`-element tuples. This would make typechecking less cumbersome while still
having the interesting effect of making the number of types in our language
infinite. We can always represent the 3-element tuple `(a,b,c)` as `((a,b), c)`,
after all. To be able to extract values from our pairs, we'll add the `fst` and
`snd` functions into our language, which accept a tuple and return its
first or second element, respectively.
Our `Expr` data type, which allows ill-typed expressions, ends up as follows:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 31 39 "hl_lines=7 8 9" >}}
I've highlighted the new lines. The additions consist of the `Pair` constructor, which
represents the tuple expression `(a, b)`, and the `Fst` and `Snd` constructors,
which represent the `fst e` and `snd e` expressions, respectively. In
a similar manner, we extend our `SafeExpr` GADT:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 41 49 "hl_lines=7 8 9" >}}
Finally, to provide the `PairType` constructor, we extend the `ExprType` and `repr` functions:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 1 11 "hl_lines=5 11" >}}
### Implementing Equality
An important part of this change is the extension of the `decEq` function,
which compares two types for equality. The kind folks over at `#idris` previously
recommended the use of the `Dec` data type for this purpose. A value of
type `Dec P`
{{< sidenote "right" "decideable-note" "is either a proof that \(P\) is true, or a proof that \(P\) is false." >}}
It's possible that a proposition \(P\) is not provable, and neither is \(\lnot P\).
It is therefore not possible to construct a value of type <code>Dec P</code> for
any proposition <code>P</code>. Having a value of type <code>Dec P</code>, then,
provides us nontrivial information.
{{< /sidenote >}} Our `decEq` function, given two types `a` and `b`, returns
`Dec (a = b)`. Thus, it will return either a proof that `a = b` (which we can
then use to convince the Idris type system that two `SafeExpr` values are,
in fact, of the same type), or a proof of `a = b -> Void` (which tells
us that `a` and `b` are definitely not equal). If you're not sure what the deal with `(=)`
and `Void` is, check out
[this section]({{< relref "typesafe_interpreter_revisited.md" >}}#curry-howard-correspondence)
of the previous article.
A lot of the work in implementing `decEq` went into constructing proofs of falsity.
That is, we needed to explicitly list every case like `decEq IntType BoolType`, and create
a proof that `IntType` cannot equal `BoolType`. However, here's how we use `decEq` in
the typechecking function:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV2.idr" 76 78 >}}
We always throw away the proof inequality! So, rather than spending the time
constructing useless proofs like this, we can just switch `decEq` to return
a `Maybe (a = b)`. The `Just` case will tell us that the two types are equal
(and, as before, provide a proof); the `Nothing` case will tell us that
the two types are _not_ equal, and provide no further information. Let's
see the implementation of `decEq` now:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 13 23 >}}
Lines 14 through 16 are pretty simple; in this case, we can tell at a glance
that the two types are equal, and Idris can infer an equality proof in
the form of `Refl`. We return this proof by writing it in a `Just`.
Line 23 is the catch-all case for any combination of types we didn't handle.
Any combination of types we don't handle is invalid, and thus, this case
returns `Nothing`.
What about lines 17 through 22? This is the case for handling the equality
of two pair types, `(lt1, lt2)` and `(rt1, rt2)`. The equality of the two
types depends on the equality of their constituents. That is, if we
know that `lt1 = rt1` and `lt2 = rt2`, we know that the two pair types
are also equal. If one of the two equalities doesn't hold, the two
pairs obviously aren't equal, and thus, we should return `Nothing`.
This should remind us of `Maybe`'s monadic nature: we can first compute
`decEq lt1 rt1`, and then, if it succeeds, compute `decEq lt2 rt2`.
If both succeed, we will have in hand the two proofs, `lt1 = rt1`
and `lt2 = rt2`. We achieve this effect using `do`-notation,
storing the sub-proofs into `subEq1` and `subEq2`.
What now? Once again, we have to use `replace`. Recall its type:
```Idris
replace : {a:_} -> {x:_} -> {y:_} -> {P : a -> Type} -> x = y -> P x -> P y
```
Given some proposition in terms of `a`, and knowing that `a = b`, `replace`
returns the original proposition, but now in terms of `b`. We know for sure
that:
```Idris
PairType lt1 lt2 = PairType lt1 lt2
```
We can start from there. Let's handle one thing at a time, and try
to replace the second `lt1` with `rt1`. Then, we can replace the second
`lt2` with `rt2`, and we'll have our equality!
Easier said than done, though. How do we tell Idris which `lt1`
we want to substitute? After all, of the following are possible:
```Idris
PairType rt1 lt2 = PairType lt1 lt2 -- First lt1 replaced
PairType lt1 lt2 = PairType rt1 lt2 -- Second lt1 replaced
PairType rt1 lt2 = PairType rt1 lt2 -- Both replaced
```
The key is in the signature, specifically the expressions `P x` and `P y`.
We can think of `P` as a function, and of `replace` as creating a value
of `P` applied to another argument. Thus, the substitution will occur
exactly where the argument of `P` is used. Then, to achieve each
of the above substitution, we can write `P` as follows:
```Idris {linenos=table, hl_lines=[2]}
t1 => PairType t1 lt2 = PairType lt1 lt2
t1 => PairType lt1 lt2 = PairType t1 lt2
t1 => PairType t1 lt2 = PairType t1 lt2
```
The second function (highlighted) is the one we'll need to use to attain
the desired result. Since `P` is an implicit argument to `replace`,
we can explicitly provide it with `{P=...}`, leading to the following
line:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 20 20>}}
We now have a proof of the following proposition:
```Idris
PairType lt1 lt2 = PairType rt1 lt2
```
We want to replace the second `lt2` with `rt2`, which means that we
write our `P` as follows:
```Idris
t2 => PairType lt1 lt2 = PairType rt1 t2
```
Finally, we perform the second replacement, and return the result:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 21 22 >}}
Great! We have finished implement `decEq`.
### Adjusting The Typechecker
It's time to make our typechecker work with tuples.
First, we need to fix the `IfElse` case to accept `Maybe (a=b)` instead
of `Dec (a=b)`:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 71 78 "hl_lines=7 8" >}}
Note that the only change is from `Dec` to `Maybe`; we didn't need to add new cases
or even to know what sort of types are available in the language.
Next, we can write the cases for the new expressions in our language. We can
start with `Pair`, which, given expressions of types `a` and `b`, creates
an expression of type `(a,b)`. As long as the arguments to `Pair` are well-typed,
so is the `Pair` expression itself; thus, there are no errors to handle.
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 79 83 >}}
The case for `Fst` is more complicated. If the argument to `Fst` is a tuple
of type `(a, b)`, then `Fst` constructs from it an expression
of type `a`. Otherwise, the expression is ill-typed, and we return an error.
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 84 89 >}}
The case for `Snd` is very similar:
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 90 96 >}}
### Evaluation Function and Conclusion
We conclude with our final `eval` and `resultStr` functions,
which now look as follows.
{{< codelines "Idris" "typesafe-interpreter/TypesafeIntrV3.idr" 97 111 "hl_lines=7-9 13-15" >}}
As you can see, we require no error handling in `eval`; the expressions returned by
`typecheck` are guaranteed to evaluate to valid Idris values. We have achieved our goal,
with very little changes to `typecheck` other than the addition of new language
constructs. In my opinion, this is a win!
As always, you can see the code on my Git server. Here's
[the latest Idris file,](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/typesafe-interpreter/TypesafeIntrV3.idr)
if you want to check it out (and maybe verify that it compiles). I hope you found
this interesting!

Binary file not shown.

View File

@@ -0,0 +1,78 @@
@import "variables.scss";
$code-color-lineno: grey;
$code-color-keyword: black;
$code-color-type: black;
$code-color-comment: grey;
code {
font-family: $font-code;
background-color: $code-color;
border: $code-border;
padding: 0 0.25rem 0 0.25rem;
}
pre code {
display: block;
box-sizing: border-box;
padding: 0.5rem;
overflow: auto;
}
.chroma {
.lntable {
border-spacing: 0;
padding: 0.5rem 0 0.5rem 0;
background-color: $code-color;
border-radius: 0;
border: $code-border;
display: block;
overflow: auto;
td {
padding: 0;
}
code {
border: none;
padding: 0;
}
pre {
margin: 0;
}
.lntd:last-child {
width: 100%;
}
}
.lntr {
display: table-row;
}
.lnt {
display: block;
padding: 0 1rem 0 1rem;
color: $code-color-lineno;
}
.hl {
display: block;
background-color: #fffd99;
}
}
.kr, .k {
font-weight: bold;
color: $code-color-keyword;
}
.kt {
font-weight: bold;
color: $code-color-type;
}
.c, .c1 {
color: $code-color-comment;
}

View File

@@ -0,0 +1,47 @@
@import "variables.scss";
@import "mixins.scss";
$margin-width: 30rem;
$margin-inner-offset: 0.5rem;
$margin-outer-offset: 1rem;
@mixin below-two-margins {
@media screen and
(max-width: $container-width-threshold +
2 * ($margin-width + $margin-inner-offset + $margin-outer-offset)) {
@content;
}
}
@mixin below-one-margin {
@media screen and
(max-width: $container-width-threshold +
($margin-width + $margin-inner-offset + $margin-outer-offset)) {
@content;
}
}
@mixin margin-content {
display: block;
position: absolute;
width: $margin-width;
box-sizing: border-box;
}
@mixin margin-content-left {
left: 0;
margin-left: -($margin-width + $container-min-padding + $margin-inner-offset);
@include below-two-margins {
display: none;
}
}
@mixin margin-content-right {
right: 0;
margin-right: -($margin-width + $container-min-padding + $margin-inner-offset);
@include below-one-margin {
display: none;
}
}

View File

@@ -6,7 +6,7 @@
}
@mixin below-container-width {
@media screen and (max-width: $container-width){
@media screen and (max-width: $container-width-threshold){
@content;
}
}

View File

@@ -1,28 +1,10 @@
@import "variables.scss";
@import "mixins.scss";
@import "margin.scss";
$sidenote-accommodate-shrink: 10rem;
$sidenote-width: 30rem;
$sidenote-offset: 1.5rem;
$sidenote-padding: 1rem;
$sidenote-highlight-border-width: .2rem;
@mixin below-two-sidenotes {
@media screen and
(max-width: $container-width +
2 * ($sidenote-width + 2 * $sidenote-offset)) {
@content;
}
}
@mixin below-one-sidenote {
@media screen and
(max-width: $container-width +
($sidenote-width + 3 * $sidenote-offset)) {
@content;
}
}
.sidenote {
&:hover {
.sidenote-label {
@@ -48,25 +30,19 @@ $sidenote-highlight-border-width: .2rem;
}
.sidenote-content {
display: block;
position: absolute;
width: $sidenote-width;
@include margin-content;
@include bordered-block;
margin-top: -1.5rem;
padding: $sidenote-padding;
text-align: left;
&.sidenote-right {
right: 0;
margin-right: -($sidenote-width + $sidenote-offset);
@include margin-content-right;
}
&.sidenote-left {
left: 0;
margin-left: -($sidenote-width + $sidenote-offset);
@include margin-content-left;
}
@include bordered-block;
padding: $sidenote-padding;
box-sizing: border-box;
text-align: left;
}
.sidenote-delimiter {
@@ -78,36 +54,22 @@ $sidenote-highlight-border-width: .2rem;
margin-top: 1rem;
margin-bottom: 1rem;
width: 100%;
display: none;
.sidenote-checkbox:checked ~ & {
display: block;
}
}
@include below-two-sidenotes {
@include below-two-margins {
.sidenote-content.sidenote-left {
@include hidden-sidenote;
margin-left: 0rem;
}
.container {
left: -$sidenote-width/2
}
}
@include below-one-sidenote {
.post-content {
max-width: 100%;
}
@include below-one-margin {
.sidenote-content.sidenote-right {
@include hidden-sidenote;
margin-right: 0rem;
}
.container {
position: initial;
}
}

View File

@@ -1,5 +1,7 @@
@import "variables.scss";
@import "mixins.scss";
@import "margin.scss";
@import "toc.scss";
body {
font-family: $font-body;
@@ -29,22 +31,6 @@ h1, h2, h3, h4, h5, h6 {
}
}
code {
font-family: $font-code;
background-color: $code-color;
}
pre code {
display: block;
padding: 0.5rem;
overflow-x: auto;
background-color: $code-color;
}
div.highlight table pre {
margin: 0;
}
.container {
position: relative;
margin: auto;
@@ -53,7 +39,17 @@ div.highlight table pre {
box-sizing: border-box;
@include below-container-width {
padding: 0rem 1rem 0rem 1rem;
padding: 0 $container-min-padding 0 $container-min-padding;
margin: 0;
max-width: $container-width + 2 * $container-min-padding;
}
@include below-two-margins {
left: -($margin-width + $margin-inner-offset + $margin-outer-offset)/2;
}
@include below-one-margin {
left: 0;
}
}
@@ -62,8 +58,7 @@ div.highlight table pre {
background-color: $primary-color;
border: none;
color: white;
transition: color 0.25s;
transition: background-color 0.25s;
transition: color 0.25s, background-color 0.25s;
text-align: left;
&:focus {
@@ -221,4 +216,20 @@ figure {
figcaption {
text-align: center;
}
&.tiny img {
max-height: 15rem;
}
&.small img {
max-height: 20rem;
}
&.medium img {
max-height: 30rem;
}
}
.twitter-tweet {
margin: auto;
}

View File

@@ -0,0 +1,49 @@
@import "variables.scss";
@import "mixins.scss";
$toc-color: $code-color;
$toc-border-color: $code-border-color;
.table-of-contents {
@include margin-content;
@include margin-content-left;
display: flex;
flex-direction: column;
align-items: end;
margin-bottom: 1rem;
em {
font-style: normal;
font-weight: bold;
font-size: 1.2em;
display: block;
margin-bottom: 0.5rem;
}
#TableOfContents > ul {
padding-left: 0;
}
nav {
margin: 0px;
}
ul {
list-style: none;
padding-left: 2rem;
margin: 0px;
}
a {
padding: 0;
}
div.wrapper {
@include bordered-block;
padding: 1rem;
background-color: $toc-color;
border-color: $toc-border-color;
box-sizing: border-box;
max-width: 100%;
}
}

View File

@@ -1,14 +1,16 @@
$container-width: 45rem;
$container-min-padding: 1rem;
$container-width-threshold: $container-width + 2 * $container-min-padding;
$standard-border-width: .075rem;
$primary-color: #36e281;
$primary-color-dark: darken($primary-color, 10%);
$code-color: #f0f0f0;
$code-color-dark: darken($code-color, 10%);
$border-color: #bfbfbf;
$code-color: #f0f0f0;
$code-border-color: darken($code-color, 10%);
$font-heading: "Lora", serif;
$font-body: "Raleway", serif;
$font-code: "Inconsolata", monospace;
$standard-border: $standard-border-width solid $border-color;
$code-border: $standard-border-width solid $code-border-color;

View File

@@ -10,6 +10,14 @@
</div>
<div class="post-content">
{{ if not (eq .TableOfContents "<nav id=\"TableOfContents\"></nav>") }}
<div class="table-of-contents">
<div class="wrapper">
<em>Table of Contents</em>
{{ .TableOfContents }}
</div>
</div>
{{ end }}
{{ .Content }}
</div>
{{ end }}

View File

@@ -6,14 +6,16 @@
<meta name="description" content="{{ .Description }}">
{{ end }}
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inconsolata&family=Raleway&family=Lora&display=block" media="screen">
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inconsolata:wght@400;700&family=Raleway&family=Lora&display=block" media="screen">
<link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css" media="screen">
{{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }}
{{ $sidenotes := resources.Get "scss/sidenotes.scss" | resources.ToCSS | resources.Minify }}
{{ $code := resources.Get "scss/code.scss" | resources.ToCSS | resources.Minify }}
{{ $icon := resources.Get "img/favicon.png" }}
{{- partial "sidenotes.html" . -}}
<link rel="stylesheet" href="{{ $style.Permalink }}" media="screen">
<link rel="stylesheet" href="{{ $sidenotes.Permalink }}" media="screen">
<link rel="stylesheet" href="{{ $code.Permalink }}" media="screen">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.11.1/dist/katex.min.css" integrity="sha384-zB1R0rpPzHqg7Kpt0Aljp8JPLqbXI3bhnPWROx27a9N0Ll6ZP/+DiW/UqRcLbRjq" crossorigin="anonymous" media="screen">
<link rel="icon" type="image/png" href="{{ $icon.Permalink }}">

View File

@@ -4,8 +4,9 @@
<nav>
<div class="container">
<a href="/">Home</a>
<a href="https://github.com/DanilaFe">GitHub</a>
<a href="/about">About</a>
<a href="https://github.com/DanilaFe">GitHub</a>
<a href="/Resume-Danila-Fedorin.pdf">Resume</a>
<a href="/tags">Tags</a>
<a href="/blog">All Posts</a>
</div>

View File

@@ -6,4 +6,9 @@
{{ .Scratch.Set "u" $t }}
{{ end }}
{{ $v := first (add (sub (int (.Get 3)) (int (.Get 2))) 1) (.Scratch.Get "u") }}
{{ highlight (delimit $v "\n") (.Get 0) "" }}
{{ if (.Get 4) }}
{{ .Scratch.Set "opts" (printf ",%s" (.Get 4)) }}
{{ else }}
{{ .Scratch.Set "opts" "" }}
{{ end }}
{{ highlight (delimit $v "\n") (.Get 0) (printf "linenos=table,linenostart=%d%s" (.Get 2) (.Scratch.Get "opts")) }}