Compare commits
84 Commits
margin-rew
...
8bf67c7dc3
| Author | SHA1 | Date | |
|---|---|---|---|
| 8bf67c7dc3 | |||
| 13214cee96 | |||
| 579c7bad92 | |||
| f00a6a7783 | |||
| 2a81fdd9fb | |||
| 17c59e595c | |||
| ad2576eae2 | |||
| 72d8179cc5 | |||
| dbabec0db6 | |||
| 76675fbc9b | |||
| ca395b5c09 | |||
| 1a05d5ff7a | |||
| 56f0dbd02f | |||
| 9fc0ff961d | |||
| 73441dc93b | |||
| df5f5eba1c | |||
| d950b8dc90 | |||
| 85394b185d | |||
| 86b49f9cc3 | |||
| 9769b3e396 | |||
| e337992410 | |||
| d5c3a44041 | |||
| eade42be49 | |||
| d0fac50cfd | |||
| dd4aa6fb9d | |||
| aa867b2e5f | |||
| 2fa2be4b9e | |||
| d5536467f6 | |||
| 67cb61c93f | |||
| 578d580683 | |||
| 789f277780 | |||
| 308ec615b9 | |||
| 0e40c9e216 | |||
| 5dbf75b5e4 | |||
| b921ddfc8d | |||
| bf3c81fe24 | |||
| 06cbd93f05 | |||
| 6c3780d9ea | |||
| 6f0667bb28 | |||
| 8368283a3e | |||
| 18ee3a1526 | |||
| b0e501f086 | |||
| 385ae59133 | |||
| 49469bdf12 | |||
| 020417e971 | |||
| eff0de5330 | |||
| b219f6855e | |||
| 068d0218b0 | |||
| 65215ccdd6 | |||
| 3e9f6a14f2 | |||
| 7623787b1c | |||
| e15daa8f6d | |||
| 298cf6599c | |||
| 841930a8ef | |||
| 9b37e496cb | |||
| 58e6ad9e79 | |||
| 3aa2a6783e | |||
| d64a0d1fcd | |||
| ba141031dd | |||
| ebdc63f5a0 | |||
| 5af0a09714 | |||
| 8a2bc2660c | |||
| e59b8cf403 | |||
| b078ef9a22 | |||
| fdaec6d5a9 | |||
| b631346379 | |||
| e9f2378b47 | |||
| 7d2f78d25c | |||
| 1f734a613c | |||
| a3c299b057 | |||
| 12aedfce92 | |||
| 65645346a2 | |||
| cb65e89e53 | |||
| 6a2fec8ef4 | |||
| aa59c90810 | |||
| 2b317930a0 | |||
| e7d56dd4bd | |||
| a4fedb276d | |||
| 277c0a2ce6 | |||
| ef3c61e9e6 | |||
| 1908126607 | |||
| 2d77f8489f | |||
| 0371651fdd | |||
| 01734d24f7 |
50
code/compiler/13/CMakeLists.txt
Normal file
@@ -0,0 +1,50 @@
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
project(compiler)
|
||||
|
||||
# We want C++17 for std::optional
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
# Find all the required packages
|
||||
find_package(BISON)
|
||||
find_package(FLEX)
|
||||
find_package(LLVM REQUIRED CONFIG)
|
||||
|
||||
# Set up the flex and bison targets
|
||||
bison_target(parser
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
|
||||
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
|
||||
COMPILE_FLAGS "-d")
|
||||
flex_target(scanner
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
|
||||
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
|
||||
add_flex_bison_dependency(scanner parser)
|
||||
|
||||
# Find all the relevant LLVM components
|
||||
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||
|
||||
# Create compiler executable
|
||||
add_executable(compiler
|
||||
definition.cpp definition.hpp
|
||||
parsed_type.cpp parsed_type.hpp
|
||||
ast.cpp ast.hpp
|
||||
llvm_context.cpp llvm_context.hpp
|
||||
type_env.cpp type_env.hpp
|
||||
env.cpp env.hpp
|
||||
type.cpp type.hpp
|
||||
error.cpp error.hpp
|
||||
binop.cpp binop.hpp
|
||||
instruction.cpp instruction.hpp
|
||||
graph.cpp graph.hpp
|
||||
global_scope.cpp global_scope.hpp
|
||||
${BISON_parser_OUTPUTS}
|
||||
${FLEX_scanner_OUTPUTS}
|
||||
main.cpp
|
||||
)
|
||||
|
||||
# Configure compiler executable
|
||||
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS})
|
||||
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
|
||||
target_link_libraries(compiler ${LLVM_LIBS})
|
||||
602
code/compiler/13/ast.cpp
Normal file
@@ -0,0 +1,602 @@
|
||||
#include "ast.hpp"
|
||||
#include <ostream>
|
||||
#include <type_traits>
|
||||
#include "binop.hpp"
|
||||
#include "error.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "env.hpp"
|
||||
|
||||
static void print_indent(int n, std::ostream& to) {
|
||||
while(n--) to << " ";
|
||||
}
|
||||
|
||||
void ast_int::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "INT: " << value << std::endl;
|
||||
}
|
||||
|
||||
void ast_int::find_free(std::set<std::string>& into) {
|
||||
|
||||
}
|
||||
|
||||
type_ptr ast_int::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
return type_ptr(new type_app(env->lookup_type("Int")));
|
||||
}
|
||||
|
||||
void ast_int::translate(global_scope& scope) {
|
||||
|
||||
}
|
||||
|
||||
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||
}
|
||||
|
||||
void ast_lid::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "LID: " << id << std::endl;
|
||||
}
|
||||
|
||||
void ast_lid::find_free(std::set<std::string>& into) {
|
||||
into.insert(id);
|
||||
}
|
||||
|
||||
type_ptr ast_lid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_scheme_ptr lid_type = env->lookup(id);
|
||||
if(!lid_type)
|
||||
throw type_error(std::string("unknown identifier ") + id, loc);
|
||||
return lid_type->instantiate(mgr);
|
||||
}
|
||||
|
||||
void ast_lid::translate(global_scope& scope) {
|
||||
|
||||
}
|
||||
|
||||
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
auto mangled_name = this->env->get_mangled_name(id);
|
||||
|
||||
// Local names shouldn't need mangling.
|
||||
assert(!(mangled_name != id && !this->env->is_global(id)));
|
||||
|
||||
into.push_back(instruction_ptr(
|
||||
(env->has_variable(mangled_name) && !this->env->is_global(id)) ?
|
||||
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||
(instruction*) new instruction_pushglobal(mangled_name)));
|
||||
}
|
||||
|
||||
void ast_uid::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "UID: " << id << std::endl;
|
||||
}
|
||||
|
||||
void ast_uid::find_free(std::set<std::string>& into) {
|
||||
|
||||
}
|
||||
|
||||
type_ptr ast_uid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_scheme_ptr uid_type = env->lookup(id);
|
||||
if(!uid_type)
|
||||
throw type_error(std::string("unknown constructor ") + id, loc);
|
||||
return uid_type->instantiate(mgr);
|
||||
}
|
||||
|
||||
void ast_uid::translate(global_scope& scope) {
|
||||
|
||||
}
|
||||
|
||||
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(
|
||||
new instruction_pushglobal(this->env->get_mangled_name(id))));
|
||||
}
|
||||
|
||||
void ast_binop::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "BINOP: " << op_name(op) << std::endl;
|
||||
left->print(indent + 1, to);
|
||||
right->print(indent + 1, to);
|
||||
}
|
||||
|
||||
void ast_binop::find_free(std::set<std::string>& into) {
|
||||
left->find_free(into);
|
||||
right->find_free(into);
|
||||
}
|
||||
|
||||
type_ptr ast_binop::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
|
||||
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op), loc);
|
||||
|
||||
// For better type errors, we first require binary function,
|
||||
// and only then unify each argument. This way, we can
|
||||
// precisely point out which argument is "wrong".
|
||||
|
||||
type_ptr return_type = mgr.new_type();
|
||||
type_ptr second_type = mgr.new_type();
|
||||
type_ptr first_type = mgr.new_type();
|
||||
type_ptr arrow_one = type_ptr(new type_arr(second_type, return_type));
|
||||
type_ptr arrow_two = type_ptr(new type_arr(first_type, arrow_one));
|
||||
|
||||
mgr.unify(ftype, arrow_two, loc);
|
||||
mgr.unify(first_type, ltype, left->loc);
|
||||
mgr.unify(second_type, rtype, right->loc);
|
||||
return return_type;
|
||||
}
|
||||
|
||||
void ast_binop::translate(global_scope& scope) {
|
||||
left->translate(scope);
|
||||
right->translate(scope);
|
||||
}
|
||||
|
||||
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
right->compile(env, into);
|
||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||
|
||||
auto mangled_name = this->env->get_mangled_name(op_name(op));
|
||||
into.push_back(instruction_ptr(new instruction_pushglobal(mangled_name)));
|
||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||
}
|
||||
|
||||
void ast_app::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "APP:" << std::endl;
|
||||
left->print(indent + 1, to);
|
||||
right->print(indent + 1, to);
|
||||
}
|
||||
|
||||
void ast_app::find_free(std::set<std::string>& into) {
|
||||
left->find_free(into);
|
||||
right->find_free(into);
|
||||
}
|
||||
|
||||
type_ptr ast_app::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_ptr ltype = left->typecheck(mgr, env);
|
||||
type_ptr rtype = right->typecheck(mgr, env);
|
||||
|
||||
type_ptr return_type = mgr.new_type();
|
||||
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||
mgr.unify(arrow, ltype, left->loc);
|
||||
return return_type;
|
||||
}
|
||||
|
||||
void ast_app::translate(global_scope& scope) {
|
||||
left->translate(scope);
|
||||
right->translate(scope);
|
||||
}
|
||||
|
||||
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
right->compile(env, into);
|
||||
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||
}
|
||||
|
||||
void ast_case::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "CASE: " << std::endl;
|
||||
for(auto& branch : branches) {
|
||||
print_indent(indent + 1, to);
|
||||
branch->pat->print(to);
|
||||
to << std::endl;
|
||||
branch->expr->print(indent + 2, to);
|
||||
}
|
||||
}
|
||||
|
||||
void ast_case::find_free(std::set<std::string>& into) {
|
||||
of->find_free(into);
|
||||
for(auto& branch : branches) {
|
||||
std::set<std::string> free_in_branch;
|
||||
std::set<std::string> pattern_variables;
|
||||
branch->pat->find_variables(pattern_variables);
|
||||
branch->expr->find_free(free_in_branch);
|
||||
for(auto& free : free_in_branch) {
|
||||
if(pattern_variables.find(free) == pattern_variables.end())
|
||||
into.insert(free);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type_ptr ast_case::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
type_var* var;
|
||||
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||
type_ptr branch_type = mgr.new_type();
|
||||
|
||||
for(auto& branch : branches) {
|
||||
type_env_ptr new_env = type_scope(env);
|
||||
branch->pat->typecheck(case_type, mgr, new_env);
|
||||
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
|
||||
mgr.unify(curr_branch_type, branch_type, branch->expr->loc);
|
||||
}
|
||||
|
||||
input_type = mgr.resolve(case_type, var);
|
||||
type_app* app_type;
|
||||
|
||||
return branch_type;
|
||||
}
|
||||
|
||||
void ast_case::translate(global_scope& scope) {
|
||||
of->translate(scope);
|
||||
for(auto& branch : branches) {
|
||||
branch->expr->translate(scope);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct case_mappings {
|
||||
using tag_type = typename T::tag_type;
|
||||
std::map<tag_type, std::vector<instruction_ptr>> defined_cases;
|
||||
std::optional<std::vector<instruction_ptr>> default_case;
|
||||
|
||||
std::vector<instruction_ptr>& make_case_for(tag_type tag) {
|
||||
if(default_case)
|
||||
throw type_error("attempted pattern match after catch-all");
|
||||
return defined_cases[tag];
|
||||
}
|
||||
|
||||
std::vector<instruction_ptr>& make_default_case() {
|
||||
if(default_case)
|
||||
throw type_error("attempted repeated use of catch-all");
|
||||
default_case.emplace(std::vector<instruction_ptr>());
|
||||
return *default_case;
|
||||
}
|
||||
|
||||
std::vector<instruction_ptr>& get_specific_case_for(tag_type tag) {
|
||||
auto existing_case = defined_cases.find(tag);
|
||||
assert(existing_case != defined_cases.end());
|
||||
return existing_case->second;
|
||||
}
|
||||
|
||||
std::vector<instruction_ptr>& get_default_case() {
|
||||
assert(default_case);
|
||||
return *default_case;
|
||||
}
|
||||
|
||||
std::vector<instruction_ptr>& get_case_for(tag_type tag) {
|
||||
if(case_defined_for(tag)) return get_specific_case_for(tag);
|
||||
return get_default_case();
|
||||
}
|
||||
|
||||
bool case_defined_for(tag_type tag) {
|
||||
return defined_cases.find(tag) != defined_cases.end();
|
||||
}
|
||||
|
||||
bool default_case_defined() { return default_case.has_value(); }
|
||||
|
||||
size_t defined_cases_count() { return defined_cases.size(); }
|
||||
};
|
||||
|
||||
|
||||
struct case_strategy_bool {
|
||||
using tag_type = bool;
|
||||
using repr_type = bool;
|
||||
|
||||
tag_type tag_from_repr(repr_type b) { return b; }
|
||||
|
||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
||||
pattern_constr* cpat;
|
||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())) ||
|
||||
(cpat->constr != "True" && cpat->constr != "False") ||
|
||||
cpat->params.size() != 0)
|
||||
throw type_error(
|
||||
"pattern cannot be converted to a boolean",
|
||||
pt->loc);
|
||||
return cpat->constr == "True";
|
||||
}
|
||||
|
||||
void compile_branch(
|
||||
const branch_ptr& branch,
|
||||
const env_ptr& env,
|
||||
repr_type repr,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
branch->expr->compile(env_ptr(new env_offset(1, env)), into);
|
||||
into.push_back(instruction_ptr(new instruction_slide(1)));
|
||||
}
|
||||
|
||||
size_t case_count(const type* type) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
void into_instructions(
|
||||
const type* type,
|
||||
case_mappings<case_strategy_bool>& ms,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
if(ms.defined_cases_count() == 0) {
|
||||
for(auto& instruction : ms.get_default_case())
|
||||
into.push_back(std::move(instruction));
|
||||
return;
|
||||
}
|
||||
|
||||
into.push_back(instruction_ptr(new instruction_if(
|
||||
std::move(ms.get_case_for(true)),
|
||||
std::move(ms.get_case_for(false)))));
|
||||
}
|
||||
};
|
||||
|
||||
struct case_strategy_data {
|
||||
using tag_type = int;
|
||||
using repr_type = std::pair<const type_data::constructor*, const std::vector<std::string>*>;
|
||||
|
||||
tag_type tag_from_repr(const repr_type& repr) { return repr.first->tag; }
|
||||
|
||||
repr_type from_typed_pattern(const pattern_ptr& pt, const type* type) {
|
||||
pattern_constr* cpat;
|
||||
if(!(cpat = dynamic_cast<pattern_constr*>(pt.get())))
|
||||
throw type_error(
|
||||
"pattern cannot be interpreted as constructor.",
|
||||
pt->loc);
|
||||
return std::make_pair(
|
||||
&static_cast<const type_data*>(type)->constructors.find(cpat->constr)->second,
|
||||
&cpat->params);
|
||||
}
|
||||
|
||||
void compile_branch(
|
||||
const branch_ptr& branch,
|
||||
const env_ptr& env,
|
||||
const repr_type& repr,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
env_ptr new_env = env;
|
||||
for(auto it = repr.second->rbegin(); it != repr.second->rend(); it++) {
|
||||
new_env = env_ptr(new env_var(*it, new_env));
|
||||
}
|
||||
|
||||
into.push_back(instruction_ptr(new instruction_split(repr.second->size())));
|
||||
branch->expr->compile(new_env, into);
|
||||
into.push_back(instruction_ptr(new instruction_slide(repr.second->size())));
|
||||
}
|
||||
|
||||
size_t case_count(const type* type) {
|
||||
return static_cast<const type_data*>(type)->constructors.size();
|
||||
}
|
||||
|
||||
void into_instructions(
|
||||
const type* type,
|
||||
case_mappings<case_strategy_data>& ms,
|
||||
std::vector<instruction_ptr>& into) {
|
||||
instruction_jump* jump_instruction = new instruction_jump();
|
||||
instruction_ptr inst(jump_instruction);
|
||||
|
||||
auto data_type = static_cast<const type_data*>(type);
|
||||
for(auto& constr : data_type->constructors) {
|
||||
if(!ms.case_defined_for(constr.second.tag)) continue;
|
||||
jump_instruction->branches.push_back(
|
||||
std::move(ms.get_specific_case_for(constr.second.tag)));
|
||||
jump_instruction->tag_mappings[constr.second.tag] =
|
||||
jump_instruction->branches.size() - 1;
|
||||
}
|
||||
|
||||
if(ms.default_case_defined()) {
|
||||
jump_instruction->branches.push_back(
|
||||
std::move(ms.get_default_case()));
|
||||
for(auto& constr : data_type->constructors) {
|
||||
if(ms.case_defined_for(constr.second.tag)) continue;
|
||||
jump_instruction->tag_mappings[constr.second.tag] =
|
||||
jump_instruction->branches.size();
|
||||
}
|
||||
}
|
||||
|
||||
into.push_back(std::move(inst));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void compile_case(const ast_case& node, const env_ptr& env, const type* type, std::vector<instruction_ptr>& into) {
|
||||
T strategy;
|
||||
case_mappings<T> cases;
|
||||
for(auto& branch : node.branches) {
|
||||
pattern_var* vpat;
|
||||
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||
if(cases.defined_cases_count() == strategy.case_count(type))
|
||||
throw type_error("redundant catch-all pattern", branch->pat->loc);
|
||||
auto& branch_into = cases.make_default_case();
|
||||
env_ptr new_env(new env_var(vpat->var, env));
|
||||
branch->expr->compile(new_env, branch_into);
|
||||
branch_into.push_back(instruction_ptr(new instruction_slide(1)));
|
||||
} else {
|
||||
auto repr = strategy.from_typed_pattern(branch->pat, type);
|
||||
auto& branch_into = cases.make_case_for(strategy.tag_from_repr(repr));
|
||||
strategy.compile_branch(branch, env, repr, branch_into);
|
||||
}
|
||||
}
|
||||
|
||||
if(!(cases.defined_cases_count() == strategy.case_count(type) ||
|
||||
cases.default_case_defined()))
|
||||
throw type_error("incomplete patterns", node.loc);
|
||||
|
||||
strategy.into_instructions(type, cases, into);
|
||||
}
|
||||
|
||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
||||
type_data* data;
|
||||
type_internal* internal;
|
||||
|
||||
of->compile(env, into);
|
||||
into.push_back(instruction_ptr(new instruction_eval()));
|
||||
|
||||
if(app_type && (data = dynamic_cast<type_data*>(app_type->constructor.get()))) {
|
||||
compile_case<case_strategy_data>(*this, env, data, into);
|
||||
return;
|
||||
} else if(app_type && (internal = dynamic_cast<type_internal*>(app_type->constructor.get()))) {
|
||||
if(internal->name == "Bool") {
|
||||
compile_case<case_strategy_bool>(*this, env, data, into);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw type_error("attempting unsupported case analysis", of->loc);
|
||||
}
|
||||
|
||||
void ast_let::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "LET: " << std::endl;
|
||||
in->print(indent + 1, to);
|
||||
}
|
||||
|
||||
void ast_let::find_free(std::set<std::string>& into) {
|
||||
definitions.find_free(into);
|
||||
std::set<std::string> all_free;
|
||||
in->find_free(all_free);
|
||||
for(auto& free_var : all_free) {
|
||||
if(definitions.defs_defn.find(free_var) == definitions.defs_defn.end())
|
||||
into.insert(free_var);
|
||||
}
|
||||
}
|
||||
|
||||
type_ptr ast_let::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
definitions.typecheck(mgr, env);
|
||||
return in->typecheck(mgr, definitions.env);
|
||||
}
|
||||
|
||||
void ast_let::translate(global_scope& scope) {
|
||||
for(auto& def : definitions.defs_data) {
|
||||
def.second->into_globals(scope);
|
||||
}
|
||||
for(auto& def : definitions.defs_defn) {
|
||||
size_t original_params = def.second->params.size();
|
||||
std::string original_name = def.second->name;
|
||||
auto& global_definition = def.second->into_global(scope);
|
||||
size_t captured = global_definition.params.size() - original_params;
|
||||
|
||||
type_env_ptr mangled_env = type_scope(env);
|
||||
mangled_env->bind(def.first, env->lookup(def.first), visibility::global);
|
||||
mangled_env->set_mangled_name(def.first, global_definition.name);
|
||||
|
||||
ast_ptr global_app(new ast_lid(original_name));
|
||||
global_app->env = mangled_env;
|
||||
for(auto& param : global_definition.params) {
|
||||
if(!(captured--)) break;
|
||||
ast_ptr new_arg(new ast_lid(param));
|
||||
new_arg->env = env;
|
||||
global_app = ast_ptr(new ast_app(std::move(global_app), std::move(new_arg)));
|
||||
global_app->env = env;
|
||||
}
|
||||
translated_definitions.push_back({ def.first, std::move(global_app) });
|
||||
}
|
||||
in->translate(scope);
|
||||
}
|
||||
|
||||
void ast_let::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
into.push_back(instruction_ptr(new instruction_alloc(translated_definitions.size())));
|
||||
env_ptr new_env = env;
|
||||
for(auto& def : translated_definitions) {
|
||||
new_env = env_ptr(new env_var(def.first, std::move(new_env)));
|
||||
}
|
||||
int offset = translated_definitions.size() - 1;
|
||||
for(auto& def : translated_definitions) {
|
||||
def.second->compile(new_env, into);
|
||||
into.push_back(instruction_ptr(new instruction_update(offset--)));
|
||||
}
|
||||
in->compile(new_env, into);
|
||||
into.push_back(instruction_ptr(new instruction_slide(translated_definitions.size())));
|
||||
}
|
||||
|
||||
void ast_lambda::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "LAMBDA";
|
||||
for(auto& param : params) {
|
||||
to << " " << param;
|
||||
}
|
||||
to << std::endl;
|
||||
body->print(indent+1, to);
|
||||
}
|
||||
|
||||
void ast_lambda::find_free(std::set<std::string>& into) {
|
||||
body->find_free(free_variables);
|
||||
for(auto& param : params) {
|
||||
free_variables.erase(param);
|
||||
}
|
||||
into.insert(free_variables.begin(), free_variables.end());
|
||||
}
|
||||
|
||||
type_ptr ast_lambda::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = env;
|
||||
var_env = type_scope(env);
|
||||
type_ptr return_type = mgr.new_type();
|
||||
type_ptr full_type = return_type;
|
||||
|
||||
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||
type_ptr param_type = mgr.new_type();
|
||||
var_env->bind(*it, param_type);
|
||||
full_type = type_ptr(new type_arr(std::move(param_type), full_type));
|
||||
}
|
||||
|
||||
mgr.unify(return_type, body->typecheck(mgr, var_env), body->loc);
|
||||
return full_type;
|
||||
}
|
||||
|
||||
void ast_lambda::translate(global_scope& scope) {
|
||||
std::vector<std::string> function_params;
|
||||
for(auto& free_variable : free_variables) {
|
||||
if(env->is_global(free_variable)) continue;
|
||||
function_params.push_back(free_variable);
|
||||
}
|
||||
size_t captured_count = function_params.size();
|
||||
function_params.insert(function_params.end(), params.begin(), params.end());
|
||||
|
||||
auto& new_function = scope.add_function("lambda", std::move(function_params), std::move(body));
|
||||
type_env_ptr mangled_env = type_scope(env);
|
||||
mangled_env->bind("lambda", type_scheme_ptr(nullptr), visibility::global);
|
||||
mangled_env->set_mangled_name("lambda", new_function.name);
|
||||
ast_ptr new_application = ast_ptr(new ast_lid("lambda"));
|
||||
new_application->env = mangled_env;
|
||||
|
||||
for(auto& param : new_function.params) {
|
||||
if(!(captured_count--)) break;
|
||||
ast_ptr new_arg = ast_ptr(new ast_lid(param));
|
||||
new_arg->env = env;
|
||||
new_application = ast_ptr(new ast_app(std::move(new_application), std::move(new_arg)));
|
||||
new_application->env = env;
|
||||
}
|
||||
translated = std::move(new_application);
|
||||
}
|
||||
|
||||
void ast_lambda::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||
translated->compile(env, into);
|
||||
}
|
||||
|
||||
void pattern_var::print(std::ostream& to) const {
|
||||
to << var;
|
||||
}
|
||||
|
||||
void pattern_var::find_variables(std::set<std::string>& into) const {
|
||||
into.insert(var);
|
||||
}
|
||||
|
||||
void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||
env->bind(var, t);
|
||||
}
|
||||
|
||||
void pattern_constr::print(std::ostream& to) const {
|
||||
to << constr;
|
||||
for(auto& param : params) {
|
||||
to << " " << param;
|
||||
}
|
||||
}
|
||||
|
||||
void pattern_constr::find_variables(std::set<std::string>& into) const {
|
||||
into.insert(params.begin(), params.end());
|
||||
}
|
||||
|
||||
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||
type_scheme_ptr constructor_type_scheme = env->lookup(constr);
|
||||
if(!constructor_type_scheme) {
|
||||
throw type_error(std::string("pattern using unknown constructor ") + constr, loc);
|
||||
}
|
||||
type_ptr constructor_type = constructor_type_scheme->instantiate(mgr);
|
||||
|
||||
for(auto& param : params) {
|
||||
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||
if(!arr) throw type_error("too many parameters in constructor pattern", loc);
|
||||
|
||||
env->bind(param, arr->left);
|
||||
constructor_type = arr->right;
|
||||
}
|
||||
|
||||
mgr.unify(t, constructor_type, loc);
|
||||
}
|
||||
195
code/compiler/13/ast.hpp
Normal file
@@ -0,0 +1,195 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "binop.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "env.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "location.hh"
|
||||
#include "global_scope.hpp"
|
||||
|
||||
struct ast {
|
||||
type_env_ptr env;
|
||||
yy::location loc;
|
||||
|
||||
ast(yy::location l) : env(nullptr), loc(std::move(l)) {}
|
||||
virtual ~ast() = default;
|
||||
|
||||
virtual void print(int indent, std::ostream& to) const = 0;
|
||||
virtual void find_free(std::set<std::string>& into) = 0;
|
||||
virtual type_ptr typecheck(type_mgr& mgr, type_env_ptr& env) = 0;
|
||||
virtual void translate(global_scope& scope) = 0;
|
||||
virtual void compile(const env_ptr& env,
|
||||
std::vector<instruction_ptr>& into) const = 0;
|
||||
};
|
||||
|
||||
using ast_ptr = std::unique_ptr<ast>;
|
||||
|
||||
struct pattern {
|
||||
yy::location loc;
|
||||
|
||||
pattern(yy::location l) : loc(std::move(l)) {}
|
||||
virtual ~pattern() = default;
|
||||
|
||||
virtual void print(std::ostream& to) const = 0;
|
||||
virtual void find_variables(std::set<std::string>& into) const = 0;
|
||||
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0;
|
||||
};
|
||||
|
||||
using pattern_ptr = std::unique_ptr<pattern>;
|
||||
|
||||
struct branch {
|
||||
pattern_ptr pat;
|
||||
ast_ptr expr;
|
||||
|
||||
branch(pattern_ptr p, ast_ptr a)
|
||||
: pat(std::move(p)), expr(std::move(a)) {}
|
||||
};
|
||||
|
||||
using branch_ptr = std::unique_ptr<branch>;
|
||||
|
||||
struct ast_int : public ast {
|
||||
int value;
|
||||
|
||||
explicit ast_int(int v, yy::location l = yy::location())
|
||||
: ast(std::move(l)), value(v) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_lid : public ast {
|
||||
std::string id;
|
||||
|
||||
explicit ast_lid(std::string i, yy::location l = yy::location())
|
||||
: ast(std::move(l)), id(std::move(i)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_uid : public ast {
|
||||
std::string id;
|
||||
|
||||
explicit ast_uid(std::string i, yy::location l = yy::location())
|
||||
: ast(std::move(l)), id(std::move(i)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_binop : public ast {
|
||||
binop op;
|
||||
ast_ptr left;
|
||||
ast_ptr right;
|
||||
|
||||
ast_binop(binop o, ast_ptr l, ast_ptr r, yy::location lc = yy::location())
|
||||
: ast(std::move(lc)), op(o), left(std::move(l)), right(std::move(r)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_app : public ast {
|
||||
ast_ptr left;
|
||||
ast_ptr right;
|
||||
|
||||
ast_app(ast_ptr l, ast_ptr r, yy::location lc = yy::location())
|
||||
: ast(std::move(lc)), left(std::move(l)), right(std::move(r)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_case : public ast {
|
||||
ast_ptr of;
|
||||
type_ptr input_type;
|
||||
std::vector<branch_ptr> branches;
|
||||
|
||||
ast_case(ast_ptr o, std::vector<branch_ptr> b, yy::location l = yy::location())
|
||||
: ast(std::move(l)), of(std::move(o)), branches(std::move(b)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_let : public ast {
|
||||
using basic_definition = std::pair<std::string, ast_ptr>;
|
||||
|
||||
definition_group definitions;
|
||||
ast_ptr in;
|
||||
|
||||
std::vector<basic_definition> translated_definitions;
|
||||
|
||||
ast_let(definition_group g, ast_ptr i, yy::location l = yy::location())
|
||||
: ast(std::move(l)), definitions(std::move(g)), in(std::move(i)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct ast_lambda : public ast {
|
||||
std::vector<std::string> params;
|
||||
ast_ptr body;
|
||||
|
||||
type_env_ptr var_env;
|
||||
|
||||
std::set<std::string> free_variables;
|
||||
ast_ptr translated;
|
||||
|
||||
ast_lambda(std::vector<std::string> ps, ast_ptr b, yy::location l = yy::location())
|
||||
: ast(std::move(l)), params(std::move(ps)), body(std::move(b)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void find_free(std::set<std::string>& into);
|
||||
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
void translate(global_scope& scope);
|
||||
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||
};
|
||||
|
||||
struct pattern_var : public pattern {
|
||||
std::string var;
|
||||
|
||||
pattern_var(std::string v, yy::location l = yy::location())
|
||||
: pattern(std::move(l)), var(std::move(v)) {}
|
||||
|
||||
void print(std::ostream &to) const;
|
||||
void find_variables(std::set<std::string>& into) const;
|
||||
void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
||||
};
|
||||
|
||||
struct pattern_constr : public pattern {
|
||||
std::string constr;
|
||||
std::vector<std::string> params;
|
||||
|
||||
pattern_constr(std::string c, std::vector<std::string> p, yy::location l = yy::location())
|
||||
: pattern(std::move(l)), constr(std::move(c)), params(std::move(p)) {}
|
||||
|
||||
void print(std::ostream &to) const;
|
||||
void find_variables(std::set<std::string>& into) const;
|
||||
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
||||
};
|
||||
27
code/compiler/13/binop.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
#include "binop.hpp"
|
||||
|
||||
std::string op_name(binop op) {
|
||||
switch(op) {
|
||||
case PLUS: return "+";
|
||||
case MINUS: return "-";
|
||||
case TIMES: return "*";
|
||||
case DIVIDE: return "/";
|
||||
case MODULO: return "%";
|
||||
case EQUALS: return "==";
|
||||
case LESS_EQUALS: return "<=";
|
||||
}
|
||||
return "??";
|
||||
}
|
||||
|
||||
std::string op_action(binop op) {
|
||||
switch(op) {
|
||||
case PLUS: return "plus";
|
||||
case MINUS: return "minus";
|
||||
case TIMES: return "times";
|
||||
case DIVIDE: return "divide";
|
||||
case MODULO: return "modulo";
|
||||
case EQUALS: return "equals";
|
||||
case LESS_EQUALS: return "less_equals";
|
||||
}
|
||||
return "??";
|
||||
}
|
||||
15
code/compiler/13/binop.hpp
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
|
||||
enum binop {
|
||||
PLUS,
|
||||
MINUS,
|
||||
TIMES,
|
||||
DIVIDE,
|
||||
MODULO,
|
||||
EQUALS,
|
||||
LESS_EQUALS,
|
||||
};
|
||||
|
||||
std::string op_name(binop op);
|
||||
std::string op_action(binop op);
|
||||
148
code/compiler/13/definition.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
#include "definition.hpp"
|
||||
#include <cassert>
|
||||
#include "error.hpp"
|
||||
#include "ast.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "graph.hpp"
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Type.h>
|
||||
|
||||
void definition_defn::find_free() {
|
||||
body->find_free(free_variables);
|
||||
for(auto& param : params) {
|
||||
free_variables.erase(param);
|
||||
}
|
||||
}
|
||||
|
||||
void definition_defn::insert_types(type_mgr& mgr, type_env_ptr& env, visibility v) {
|
||||
this->env = env;
|
||||
var_env = type_scope(env);
|
||||
return_type = mgr.new_type();
|
||||
full_type = return_type;
|
||||
|
||||
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||
type_ptr param_type = mgr.new_type();
|
||||
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||
var_env->bind(*it, param_type);
|
||||
}
|
||||
env->bind(name, full_type, v);
|
||||
}
|
||||
|
||||
void definition_defn::typecheck(type_mgr& mgr) {
|
||||
type_ptr body_type = body->typecheck(mgr, var_env);
|
||||
mgr.unify(return_type, body_type);
|
||||
}
|
||||
|
||||
|
||||
global_function& definition_defn::into_global(global_scope& scope) {
|
||||
std::vector<std::string> all_params;
|
||||
for(auto& free : free_variables) {
|
||||
if(env->is_global(free)) continue;
|
||||
all_params.push_back(free);
|
||||
}
|
||||
all_params.insert(all_params.end(), params.begin(), params.end());
|
||||
body->translate(scope);
|
||||
return scope.add_function(name, std::move(all_params), std::move(body));
|
||||
}
|
||||
|
||||
void definition_data::insert_types(type_env_ptr& env) {
|
||||
this->env = env;
|
||||
env->bind_type(name, type_ptr(new type_data(name, vars.size())));
|
||||
}
|
||||
|
||||
void definition_data::insert_constructors() const {
|
||||
type_ptr this_type_ptr = env->lookup_type(name);
|
||||
type_data* this_type = static_cast<type_data*>(this_type_ptr.get());
|
||||
int next_tag = 0;
|
||||
|
||||
std::set<std::string> var_set;
|
||||
type_app* return_app = new type_app(std::move(this_type_ptr));
|
||||
type_ptr return_type(return_app);
|
||||
for(auto& var : vars) {
|
||||
if(var_set.find(var) != var_set.end())
|
||||
throw std::runtime_error(
|
||||
std::string("type variable ") +
|
||||
var + std::string(" used twice in data type definition."));
|
||||
var_set.insert(var);
|
||||
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
||||
}
|
||||
|
||||
for(auto& constructor : constructors) {
|
||||
constructor->tag = next_tag;
|
||||
this_type->constructors[constructor->name] = { next_tag++ };
|
||||
|
||||
type_ptr full_type = return_type;
|
||||
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||
type_ptr type = (*it)->to_type(var_set, env);
|
||||
full_type = type_ptr(new type_arr(type, full_type));
|
||||
}
|
||||
|
||||
type_scheme_ptr full_scheme(new type_scheme(std::move(full_type)));
|
||||
full_scheme->forall.insert(full_scheme->forall.begin(), vars.begin(), vars.end());
|
||||
env->bind(constructor->name, full_scheme);
|
||||
}
|
||||
}
|
||||
|
||||
void definition_data::into_globals(global_scope& scope) {
|
||||
for(auto& constructor : constructors) {
|
||||
global_constructor& c = scope.add_constructor(
|
||||
constructor->name, constructor->tag, constructor->types.size());
|
||||
env->set_mangled_name(constructor->name, c.name);
|
||||
}
|
||||
}
|
||||
|
||||
void definition_group::find_free(std::set<std::string>& into) {
|
||||
for(auto& def_pair : defs_defn) {
|
||||
def_pair.second->find_free();
|
||||
for(auto& free_var : def_pair.second->free_variables) {
|
||||
if(defs_defn.find(free_var) == defs_defn.end()) {
|
||||
into.insert(free_var);
|
||||
} else {
|
||||
def_pair.second->nearby_variables.insert(free_var);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void definition_group::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||
this->env = type_scope(env);
|
||||
|
||||
for(auto& def_data : defs_data) {
|
||||
def_data.second->insert_types(this->env);
|
||||
}
|
||||
for(auto& def_data : defs_data) {
|
||||
def_data.second->insert_constructors();
|
||||
}
|
||||
|
||||
function_graph dependency_graph;
|
||||
|
||||
for(auto& def_defn : defs_defn) {
|
||||
def_defn.second->find_free();
|
||||
dependency_graph.add_function(def_defn.second->name);
|
||||
|
||||
for(auto& dependency : def_defn.second->nearby_variables) {
|
||||
assert(defs_defn.find(dependency) != defs_defn.end());
|
||||
dependency_graph.add_edge(def_defn.second->name, dependency);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<group_ptr> groups = dependency_graph.compute_order();
|
||||
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
|
||||
auto& group = *it;
|
||||
for(auto& def_defnn_name : group->members) {
|
||||
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
||||
def_defn->insert_types(mgr, this->env, vis);
|
||||
}
|
||||
for(auto& def_defnn_name : group->members) {
|
||||
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
||||
def_defn->typecheck(mgr);
|
||||
}
|
||||
for(auto& def_defnn_name : group->members) {
|
||||
this->env->generalize(def_defnn_name, *group, mgr);
|
||||
}
|
||||
}
|
||||
}
|
||||
91
code/compiler/13/definition.hpp
Normal file
@@ -0,0 +1,91 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "instruction.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
#include "parsed_type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "location.hh"
|
||||
#include "global_scope.hpp"
|
||||
|
||||
struct ast;
|
||||
using ast_ptr = std::unique_ptr<ast>;
|
||||
|
||||
struct constructor {
|
||||
std::string name;
|
||||
std::vector<parsed_type_ptr> types;
|
||||
int8_t tag;
|
||||
|
||||
constructor(std::string n, std::vector<parsed_type_ptr> ts)
|
||||
: name(std::move(n)), types(std::move(ts)) {}
|
||||
};
|
||||
|
||||
using constructor_ptr = std::unique_ptr<constructor>;
|
||||
|
||||
struct definition_defn {
|
||||
std::string name;
|
||||
std::vector<std::string> params;
|
||||
ast_ptr body;
|
||||
yy::location loc;
|
||||
|
||||
type_env_ptr env;
|
||||
type_env_ptr var_env;
|
||||
std::set<std::string> free_variables;
|
||||
std::set<std::string> nearby_variables;
|
||||
type_ptr full_type;
|
||||
type_ptr return_type;
|
||||
|
||||
definition_defn(
|
||||
std::string n,
|
||||
std::vector<std::string> p,
|
||||
ast_ptr b,
|
||||
yy::location l = yy::location())
|
||||
: name(std::move(n)), params(std::move(p)), body(std::move(b)), loc(std::move(l)) {
|
||||
|
||||
}
|
||||
|
||||
void find_free();
|
||||
void insert_types(type_mgr& mgr, type_env_ptr& env, visibility v);
|
||||
void typecheck(type_mgr& mgr);
|
||||
|
||||
global_function& into_global(global_scope& scope);
|
||||
};
|
||||
|
||||
using definition_defn_ptr = std::unique_ptr<definition_defn>;
|
||||
|
||||
struct definition_data {
|
||||
std::string name;
|
||||
std::vector<std::string> vars;
|
||||
std::vector<constructor_ptr> constructors;
|
||||
yy::location loc;
|
||||
|
||||
type_env_ptr env;
|
||||
|
||||
definition_data(
|
||||
std::string n,
|
||||
std::vector<std::string> vs,
|
||||
std::vector<constructor_ptr> cs,
|
||||
yy::location l = yy::location())
|
||||
: name(std::move(n)), vars(std::move(vs)), constructors(std::move(cs)), loc(std::move(l)) {}
|
||||
|
||||
void insert_types(type_env_ptr& env);
|
||||
void insert_constructors() const;
|
||||
|
||||
void into_globals(global_scope& scope);
|
||||
};
|
||||
|
||||
using definition_data_ptr = std::unique_ptr<definition_data>;
|
||||
|
||||
struct definition_group {
|
||||
std::map<std::string, definition_data_ptr> defs_data;
|
||||
std::map<std::string, definition_defn_ptr> defs_defn;
|
||||
visibility vis;
|
||||
type_env_ptr env;
|
||||
|
||||
definition_group(visibility v = visibility::local) : vis(v) {}
|
||||
|
||||
void find_free(std::set<std::string>& into);
|
||||
void typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||
};
|
||||
24
code/compiler/13/env.cpp
Normal file
@@ -0,0 +1,24 @@
|
||||
#include "env.hpp"
|
||||
#include <cassert>
|
||||
|
||||
int env_var::get_offset(const std::string& name) const {
|
||||
if(name == this->name) return 0;
|
||||
assert(parent);
|
||||
return parent->get_offset(name) + 1;
|
||||
}
|
||||
|
||||
bool env_var::has_variable(const std::string& name) const {
|
||||
if(name == this->name) return true;
|
||||
if(parent) return parent->has_variable(name);
|
||||
return false;
|
||||
}
|
||||
|
||||
int env_offset::get_offset(const std::string& name) const {
|
||||
assert(parent);
|
||||
return parent->get_offset(name) + offset;
|
||||
}
|
||||
|
||||
bool env_offset::has_variable(const std::string& name) const {
|
||||
if(parent) return parent->has_variable(name);
|
||||
return false;
|
||||
}
|
||||
34
code/compiler/13/env.hpp
Normal file
@@ -0,0 +1,34 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
struct env {
|
||||
virtual ~env() = default;
|
||||
|
||||
virtual int get_offset(const std::string& name) const = 0;
|
||||
virtual bool has_variable(const std::string& name) const = 0;
|
||||
};
|
||||
|
||||
using env_ptr = std::shared_ptr<env>;
|
||||
|
||||
struct env_var : public env {
|
||||
std::string name;
|
||||
env_ptr parent;
|
||||
|
||||
env_var(std::string n, env_ptr p)
|
||||
: name(std::move(n)), parent(std::move(p)) {}
|
||||
|
||||
int get_offset(const std::string& name) const;
|
||||
bool has_variable(const std::string& name) const;
|
||||
};
|
||||
|
||||
struct env_offset : public env {
|
||||
int offset;
|
||||
env_ptr parent;
|
||||
|
||||
env_offset(int o, env_ptr p)
|
||||
: offset(o), parent(std::move(p)) {}
|
||||
|
||||
int get_offset(const std::string& name) const;
|
||||
bool has_variable(const std::string& name) const;
|
||||
};
|
||||
28
code/compiler/13/error.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
#include "error.hpp"
|
||||
|
||||
const char* type_error::what() const noexcept {
|
||||
return "an error occured while checking the types of the program";
|
||||
}
|
||||
|
||||
void type_error::pretty_print(std::ostream& to, parse_driver& drv) {
|
||||
to << "encountered error while typechecking program: ";
|
||||
to << description << std::endl;
|
||||
|
||||
if(loc) {
|
||||
to << "occuring on line " << loc->begin.line << ":" << std::endl;
|
||||
to << std::endl << "```" << std::endl;
|
||||
drv.print_highlighted_location(to, *loc);
|
||||
to << "```" << std::endl << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void unification_error::pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr) {
|
||||
type_error::pretty_print(to, drv);
|
||||
to << "the expected type was:" << std::endl;
|
||||
to << " \033[34m";
|
||||
left->print(mgr, to);
|
||||
to << std::endl << "\033[0mwhile the actual type was:" << std::endl;
|
||||
to << " \033[32m";
|
||||
right->print(mgr, to);
|
||||
to << "\033[0m" << std::endl;
|
||||
}
|
||||
30
code/compiler/13/error.hpp
Normal file
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
#include <exception>
|
||||
#include <optional>
|
||||
#include "type.hpp"
|
||||
#include "location.hh"
|
||||
#include "parse_driver.hpp"
|
||||
|
||||
using maybe_location = std::optional<yy::location>;
|
||||
|
||||
struct type_error : std::exception {
|
||||
std::string description;
|
||||
std::optional<yy::location> loc;
|
||||
|
||||
type_error(std::string d, maybe_location l = std::nullopt)
|
||||
: description(std::move(d)), loc(std::move(l)) {}
|
||||
|
||||
const char* what() const noexcept override;
|
||||
void pretty_print(std::ostream& to, parse_driver& drv);
|
||||
};
|
||||
|
||||
struct unification_error : public type_error {
|
||||
type_ptr left;
|
||||
type_ptr right;
|
||||
|
||||
unification_error(type_ptr l, type_ptr r, maybe_location loc = std::nullopt)
|
||||
: left(std::move(l)), right(std::move(r)),
|
||||
type_error("failed to unify types", std::move(loc)) {}
|
||||
|
||||
void pretty_print(std::ostream& to, parse_driver& drv, type_mgr& mgr);
|
||||
};
|
||||
2
code/compiler/13/examples/bad1.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
data Bool = { True, False }
|
||||
defn main = { 3 + True }
|
||||
1
code/compiler/13/examples/bad2.txt
Normal file
@@ -0,0 +1 @@
|
||||
defn main = { 1 2 3 4 5 }
|
||||
8
code/compiler/13/examples/bad3.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn head l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x y z -> { x }
|
||||
}
|
||||
}
|
||||
6
code/compiler/13/examples/errors/double_catchall.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
n -> { 2 }
|
||||
n -> { 1 }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
data Pair a a = { MkPair a a }
|
||||
7
code/compiler/13/examples/errors/exhausted_patterns.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
True -> { 1 }
|
||||
False -> { 0 }
|
||||
n -> { 2 }
|
||||
}
|
||||
}
|
||||
5
code/compiler/13/examples/errors/incomplete_patterns.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
True -> { 1 }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
defn add x y = { x + y }
|
||||
|
||||
defn main = {
|
||||
case add of {
|
||||
n -> { 1 }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
n -> { 2 }
|
||||
True -> { 1 }
|
||||
False -> { 0 }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn head l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x -> { x }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn head l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x y z -> { x }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
defn main = {
|
||||
case True of {
|
||||
NotBool -> { 1 }
|
||||
True -> { 2 }
|
||||
}
|
||||
}
|
||||
1
code/compiler/13/examples/errors/type_redefinition.txt
Normal file
@@ -0,0 +1 @@
|
||||
data Bool = { True, False }
|
||||
3
code/compiler/13/examples/errors/unknown_lid.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
defn main = {
|
||||
weird 1
|
||||
}
|
||||
1
code/compiler/13/examples/errors/unknown_type.txt
Normal file
@@ -0,0 +1 @@
|
||||
data Wrapper = { Wrap Weird }
|
||||
1
code/compiler/13/examples/errors/unknown_type_param.txt
Normal file
@@ -0,0 +1 @@
|
||||
data Wrapper = { Wrap a }
|
||||
3
code/compiler/13/examples/errors/unknown_uid.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
defn main = {
|
||||
Weird 1
|
||||
}
|
||||
1
code/compiler/13/examples/errors/wrong_type_kind.txt
Normal file
@@ -0,0 +1 @@
|
||||
data Wrapper = { Wrap (Int Bool) }
|
||||
17
code/compiler/13/examples/fixpoint.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
|
||||
defn fix f = { let { defn x = { f x } } in { x } }
|
||||
defn fixpointOnes fo = { Cons 1 fo }
|
||||
defn sumTwo l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> {
|
||||
x + case xs of {
|
||||
Nil -> { 0 }
|
||||
Cons y ys -> { y }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn main = { sumTwo (fix fixpointOnes) }
|
||||
8
code/compiler/13/examples/if.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
data Bool = { True, False }
|
||||
defn if c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
defn main = { if (if True False True) 11 3 }
|
||||
19
code/compiler/13/examples/lambda.txt
Normal file
@@ -0,0 +1,19 @@
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
|
||||
defn sum l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> { x + sum xs}
|
||||
}
|
||||
}
|
||||
|
||||
defn map f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { Cons (f x) (map f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn main = {
|
||||
sum (map \x -> { x * x } (map (\x -> { x + x }) (Cons 1 (Cons 2 (Cons 3 Nil)))))
|
||||
}
|
||||
47
code/compiler/13/examples/letin.txt
Normal file
@@ -0,0 +1,47 @@
|
||||
data Bool = { True, False }
|
||||
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
|
||||
defn if c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
|
||||
defn mergeUntil l r p = {
|
||||
let {
|
||||
defn mergeLeft nl nr = {
|
||||
case nl of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { if (p x) (Cons x (mergeRight xs nr)) Nil }
|
||||
}
|
||||
}
|
||||
defn mergeRight nl nr = {
|
||||
case nr of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { if (p x) (Cons x (mergeLeft nl xs)) Nil }
|
||||
}
|
||||
}
|
||||
} in {
|
||||
mergeLeft l r
|
||||
}
|
||||
}
|
||||
|
||||
defn const x y = { x }
|
||||
|
||||
defn sum l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> { x + sum xs }
|
||||
}
|
||||
}
|
||||
|
||||
defn main = {
|
||||
let {
|
||||
defn firstList = { Cons 1 (Cons 3 (Cons 5 Nil)) }
|
||||
defn secondList = { Cons 2 (Cons 4 (Cons 6 Nil)) }
|
||||
} in {
|
||||
sum (mergeUntil firstList secondList (const True))
|
||||
}
|
||||
}
|
||||
32
code/compiler/13/examples/list.txt
Normal file
@@ -0,0 +1,32 @@
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
|
||||
defn map f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { Cons (f x) (map f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn foldl f b l = {
|
||||
case l of {
|
||||
Nil -> { b }
|
||||
Cons x xs -> { foldl f (f b x) xs }
|
||||
}
|
||||
}
|
||||
|
||||
defn foldr f b l = {
|
||||
case l of {
|
||||
Nil -> { b }
|
||||
Cons x xs -> { f x (foldr f b xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn list = { Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))) }
|
||||
|
||||
defn add x y = { x + y }
|
||||
defn sum l = { foldr add 0 l }
|
||||
|
||||
defn skipAdd x y = { y + 1 }
|
||||
defn length l = { foldr skipAdd 0 l }
|
||||
|
||||
defn main = { sum list + length list }
|
||||
25
code/compiler/13/examples/mutual_recursion.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
data Bool = { True, False }
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn if c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
|
||||
defn oddEven l e = {
|
||||
case l of {
|
||||
Nil -> { e }
|
||||
Cons x xs -> { evenOdd xs e }
|
||||
}
|
||||
}
|
||||
|
||||
defn evenOdd l e = {
|
||||
case l of {
|
||||
Nil -> { e }
|
||||
Cons x xs -> { oddEven xs e }
|
||||
}
|
||||
}
|
||||
|
||||
defn main = { if (oddEven (Cons 1 (Cons 2 (Cons 3 Nil))) True) (oddEven (Cons 1 (Cons 2 (Cons 3 Nil))) 1) 3 }
|
||||
23
code/compiler/13/examples/packed.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
data Pair a b = { Pair a b }
|
||||
|
||||
defn packer = {
|
||||
let {
|
||||
data Packed a = { Packed a }
|
||||
defn pack a = { Packed a }
|
||||
defn unpack p = {
|
||||
case p of {
|
||||
Packed a -> { a }
|
||||
}
|
||||
}
|
||||
} in {
|
||||
Pair pack unpack
|
||||
}
|
||||
}
|
||||
|
||||
defn main = {
|
||||
case packer of {
|
||||
Pair pack unpack -> {
|
||||
unpack (pack 3)
|
||||
}
|
||||
}
|
||||
}
|
||||
17
code/compiler/13/examples/pair.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
data Pair a b = { MkPair a b }
|
||||
|
||||
defn fst p = {
|
||||
case p of {
|
||||
MkPair a b -> { a }
|
||||
}
|
||||
}
|
||||
|
||||
defn snd p = {
|
||||
case p of {
|
||||
MkPair a b -> { b }
|
||||
}
|
||||
}
|
||||
|
||||
defn pair = { MkPair 1 (MkPair 2 3) }
|
||||
|
||||
defn main = { fst pair + snd (snd pair) }
|
||||
122
code/compiler/13/examples/primes.txt
Normal file
@@ -0,0 +1,122 @@
|
||||
data List = { Nil, Cons Nat List }
|
||||
data Bool = { True, False }
|
||||
data Nat = { O, S Nat }
|
||||
|
||||
defn if c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
|
||||
defn toInt n = {
|
||||
case n of {
|
||||
O -> { 0 }
|
||||
S np -> { 1 + toInt np }
|
||||
}
|
||||
}
|
||||
|
||||
defn lte n m = {
|
||||
case m of {
|
||||
O -> {
|
||||
case n of {
|
||||
O -> { True }
|
||||
S np -> { False }
|
||||
}
|
||||
}
|
||||
S mp -> {
|
||||
case n of {
|
||||
O -> { True }
|
||||
S np -> { lte np mp }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn minus n m = {
|
||||
case m of {
|
||||
O -> { n }
|
||||
S mp -> {
|
||||
case n of {
|
||||
O -> { O }
|
||||
S np -> {
|
||||
minus np mp
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn mod n m = {
|
||||
if (lte m n) (mod (minus n m) m) n
|
||||
}
|
||||
|
||||
defn notDivisibleBy n m = {
|
||||
case (mod m n) of {
|
||||
O -> { False }
|
||||
S mp -> { True }
|
||||
}
|
||||
}
|
||||
|
||||
defn filter f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { if (f x) (Cons x (filter f xs)) (filter f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn map f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { Cons (f x) (map f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn nats = {
|
||||
Cons (S (S O)) (map S nats)
|
||||
}
|
||||
|
||||
defn primesRec l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons p xs -> { Cons p (primesRec (filter (notDivisibleBy p) xs)) }
|
||||
}
|
||||
}
|
||||
|
||||
defn primes = {
|
||||
primesRec nats
|
||||
}
|
||||
|
||||
defn take n l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> {
|
||||
case n of {
|
||||
O -> { Nil }
|
||||
S np -> { Cons x (take np xs) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn head l = {
|
||||
case l of {
|
||||
Nil -> { O }
|
||||
Cons x xs -> { x }
|
||||
}
|
||||
}
|
||||
|
||||
defn reverseAcc a l = {
|
||||
case l of {
|
||||
Nil -> { a }
|
||||
Cons x xs -> { reverseAcc (Cons x a) xs }
|
||||
}
|
||||
}
|
||||
|
||||
defn reverse l = {
|
||||
reverseAcc Nil l
|
||||
}
|
||||
|
||||
defn main = {
|
||||
toInt (head (reverse (take ((S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S O))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) primes)))
|
||||
}
|
||||
31
code/compiler/13/examples/runtime1.c
Normal file
@@ -0,0 +1,31 @@
|
||||
#include "../runtime.h"
|
||||
|
||||
void f_add(struct stack* s) {
|
||||
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
|
||||
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
|
||||
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
|
||||
}
|
||||
|
||||
void f_main(struct stack* s) {
|
||||
// PushInt 320
|
||||
stack_push(s, (struct node_base*) alloc_num(320));
|
||||
|
||||
// PushInt 6
|
||||
stack_push(s, (struct node_base*) alloc_num(6));
|
||||
|
||||
// PushGlobal f_add (the function for +)
|
||||
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
|
||||
|
||||
struct node_base* left;
|
||||
struct node_base* right;
|
||||
|
||||
// MkApp
|
||||
left = stack_pop(s);
|
||||
right = stack_pop(s);
|
||||
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||
|
||||
// MkApp
|
||||
left = stack_pop(s);
|
||||
right = stack_pop(s);
|
||||
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||
}
|
||||
2
code/compiler/13/examples/works1.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
defn main = { sum 320 6 }
|
||||
defn sum x y = { x + y }
|
||||
3
code/compiler/13/examples/works2.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
defn add x y = { x + y }
|
||||
defn double x = { add x x }
|
||||
defn main = { double 163 }
|
||||
9
code/compiler/13/examples/works3.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
data List a = { Nil, Cons a (List a) }
|
||||
data Bool = { True, False }
|
||||
defn length l = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> { 1 + length xs }
|
||||
}
|
||||
}
|
||||
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) + length (Cons True (Cons False (Cons True Nil))) }
|
||||
16
code/compiler/13/examples/works4.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn add x y = { x + y }
|
||||
defn mul x y = { x * y }
|
||||
|
||||
defn foldr f b l = {
|
||||
case l of {
|
||||
Nil -> { b }
|
||||
Cons x xs -> { f x (foldr f b xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn main = {
|
||||
foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +
|
||||
foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))))
|
||||
}
|
||||
17
code/compiler/13/examples/works5.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
data List = { Nil, Cons Int List }
|
||||
|
||||
defn sumZip l m = {
|
||||
case l of {
|
||||
Nil -> { 0 }
|
||||
Cons x xs -> {
|
||||
case m of {
|
||||
Nil -> { 0 }
|
||||
Cons y ys -> { x + y + sumZip xs ys }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn ones = { Cons 1 ones }
|
||||
|
||||
defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||
83
code/compiler/13/global_scope.cpp
Normal file
@@ -0,0 +1,83 @@
|
||||
#include "global_scope.hpp"
|
||||
#include "ast.hpp"
|
||||
|
||||
void global_function::compile() {
|
||||
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||
new_env = env_ptr(new env_var(*it, new_env));
|
||||
}
|
||||
body->compile(new_env, instructions);
|
||||
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||
}
|
||||
|
||||
void global_function::declare_llvm(llvm_context& ctx) {
|
||||
generated_function = ctx.create_custom_function(name, params.size());
|
||||
}
|
||||
|
||||
void global_function::generate_llvm(llvm_context& ctx) {
|
||||
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
||||
for(auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, generated_function);
|
||||
}
|
||||
ctx.builder.CreateRetVoid();
|
||||
}
|
||||
|
||||
void global_constructor::generate_llvm(llvm_context& ctx) {
|
||||
auto new_function =
|
||||
ctx.create_custom_function(name, arity);
|
||||
std::vector<instruction_ptr> instructions;
|
||||
instructions.push_back(instruction_ptr(new instruction_pack(tag, arity)));
|
||||
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||
for (auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, new_function);
|
||||
}
|
||||
ctx.builder.CreateRetVoid();
|
||||
}
|
||||
|
||||
global_function& global_scope::add_function(std::string n, std::vector<std::string> ps, ast_ptr b) {
|
||||
global_function* new_function = new global_function(mangle_name(n), std::move(ps), std::move(b));
|
||||
functions.push_back(global_function_ptr(new_function));
|
||||
return *new_function;
|
||||
}
|
||||
|
||||
global_constructor& global_scope::add_constructor(std::string n, int8_t t, size_t a) {
|
||||
global_constructor* new_constructor = new global_constructor(mangle_name(n), t, a);
|
||||
constructors.push_back(global_constructor_ptr(new_constructor));
|
||||
return *new_constructor;
|
||||
}
|
||||
|
||||
void global_scope::compile() {
|
||||
for(auto& function : functions) {
|
||||
function->compile();
|
||||
}
|
||||
}
|
||||
|
||||
void global_scope::generate_llvm(llvm_context& ctx) {
|
||||
for(auto& constructor : constructors) {
|
||||
constructor->generate_llvm(ctx);
|
||||
}
|
||||
for(auto& function : functions) {
|
||||
function->declare_llvm(ctx);
|
||||
}
|
||||
for(auto& function : functions) {
|
||||
function->generate_llvm(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
std::string global_scope::mangle_name(const std::string& n) {
|
||||
auto occurence_it = occurence_count.find(n);
|
||||
int occurence = 0;
|
||||
if(occurence_it != occurence_count.end()) {
|
||||
occurence = occurence_it->second + 1;
|
||||
}
|
||||
occurence_count[n] = occurence;
|
||||
|
||||
std::string final_name = n;
|
||||
if (occurence != 0) {
|
||||
final_name += "_";
|
||||
final_name += std::to_string(occurence);
|
||||
}
|
||||
return final_name;
|
||||
}
|
||||
55
code/compiler/13/global_scope.hpp
Normal file
@@ -0,0 +1,55 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <llvm/IR/Function.h>
|
||||
#include "instruction.hpp"
|
||||
|
||||
struct ast;
|
||||
using ast_ptr = std::unique_ptr<ast>;
|
||||
|
||||
struct global_function {
|
||||
std::string name;
|
||||
std::vector<std::string> params;
|
||||
ast_ptr body;
|
||||
|
||||
std::vector<instruction_ptr> instructions;
|
||||
llvm::Function* generated_function;
|
||||
|
||||
global_function(std::string n, std::vector<std::string> ps, ast_ptr b)
|
||||
: name(std::move(n)), params(std::move(ps)), body(std::move(b)) {}
|
||||
|
||||
void compile();
|
||||
void declare_llvm(llvm_context& ctx);
|
||||
void generate_llvm(llvm_context& ctx);
|
||||
};
|
||||
|
||||
using global_function_ptr = std::unique_ptr<global_function>;
|
||||
|
||||
struct global_constructor {
|
||||
std::string name;
|
||||
int8_t tag;
|
||||
size_t arity;
|
||||
|
||||
global_constructor(std::string n, int8_t t, size_t a)
|
||||
: name(std::move(n)), tag(t), arity(a) {}
|
||||
|
||||
void generate_llvm(llvm_context& ctx);
|
||||
};
|
||||
|
||||
using global_constructor_ptr = std::unique_ptr<global_constructor>;
|
||||
|
||||
struct global_scope {
|
||||
std::map<std::string, int> occurence_count;
|
||||
std::vector<global_function_ptr> functions;
|
||||
std::vector<global_constructor_ptr> constructors;
|
||||
|
||||
global_function& add_function(std::string n, std::vector<std::string> ps, ast_ptr b);
|
||||
global_constructor& add_constructor(std::string n, int8_t t, size_t a);
|
||||
|
||||
void compile();
|
||||
void generate_llvm(llvm_context& ctx);
|
||||
|
||||
private:
|
||||
std::string mangle_name(const std::string& n);
|
||||
};
|
||||
114
code/compiler/13/graph.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
#include "graph.hpp"
|
||||
|
||||
std::set<function_graph::edge> function_graph::compute_transitive_edges() {
|
||||
std::set<edge> transitive_edges;
|
||||
transitive_edges.insert(edges.begin(), edges.end());
|
||||
for(auto& connector : adjacency_lists) {
|
||||
for(auto& from : adjacency_lists) {
|
||||
edge to_connector { from.first, connector.first };
|
||||
for(auto& to : adjacency_lists) {
|
||||
edge full_jump { from.first, to.first };
|
||||
if(transitive_edges.find(full_jump) != transitive_edges.end()) continue;
|
||||
|
||||
edge from_connector { connector.first, to.first };
|
||||
if(transitive_edges.find(to_connector) != transitive_edges.end() &&
|
||||
transitive_edges.find(from_connector) != transitive_edges.end())
|
||||
transitive_edges.insert(std::move(full_jump));
|
||||
}
|
||||
}
|
||||
}
|
||||
return transitive_edges;
|
||||
}
|
||||
|
||||
void function_graph::create_groups(
|
||||
const std::set<edge>& transitive_edges,
|
||||
std::map<function, group_id>& group_ids,
|
||||
std::map<group_id, data_ptr>& group_data_map) {
|
||||
group_id id_counter = 0;
|
||||
for(auto& vertex : adjacency_lists) {
|
||||
if(group_ids.find(vertex.first) != group_ids.end())
|
||||
continue;
|
||||
data_ptr new_group(new group_data);
|
||||
new_group->functions.insert(vertex.first);
|
||||
group_data_map[id_counter] = new_group;
|
||||
group_ids[vertex.first] = id_counter;
|
||||
for(auto& other_vertex : adjacency_lists) {
|
||||
if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() &&
|
||||
transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) {
|
||||
group_ids[other_vertex.first] = id_counter;
|
||||
new_group->functions.insert(other_vertex.first);
|
||||
}
|
||||
}
|
||||
id_counter++;
|
||||
}
|
||||
}
|
||||
|
||||
void function_graph::create_edges(
|
||||
std::map<function, group_id>& group_ids,
|
||||
std::map<group_id, data_ptr>& group_data_map) {
|
||||
std::set<std::pair<group_id, group_id>> group_edges;
|
||||
for(auto& vertex : adjacency_lists) {
|
||||
auto vertex_id = group_ids[vertex.first];
|
||||
auto& vertex_data = group_data_map[vertex_id];
|
||||
for(auto& other_vertex : vertex.second) {
|
||||
auto other_id = group_ids[other_vertex];
|
||||
if(vertex_id == other_id) continue;
|
||||
if(group_edges.find({vertex_id, other_id}) != group_edges.end())
|
||||
continue;
|
||||
group_edges.insert({vertex_id, other_id});
|
||||
vertex_data->adjacency_list.insert(other_id);
|
||||
group_data_map[other_id]->indegree++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<group_ptr> function_graph::generate_order(
|
||||
std::map<function, group_id>& group_ids,
|
||||
std::map<group_id, data_ptr>& group_data_map) {
|
||||
std::queue<group_id> id_queue;
|
||||
std::vector<group_ptr> output;
|
||||
for(auto& group : group_data_map) {
|
||||
if(group.second->indegree == 0) id_queue.push(group.first);
|
||||
}
|
||||
|
||||
while(!id_queue.empty()) {
|
||||
auto new_id = id_queue.front();
|
||||
auto& group_data = group_data_map[new_id];
|
||||
group_ptr output_group(new group);
|
||||
output_group->members = std::move(group_data->functions);
|
||||
id_queue.pop();
|
||||
|
||||
for(auto& adjacent_group : group_data->adjacency_list) {
|
||||
if(--group_data_map[adjacent_group]->indegree == 0)
|
||||
id_queue.push(adjacent_group);
|
||||
}
|
||||
|
||||
output.push_back(std::move(output_group));
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
std::set<function>& function_graph::add_function(const function& f) {
|
||||
auto adjacency_list_it = adjacency_lists.find(f);
|
||||
if(adjacency_list_it != adjacency_lists.end()) {
|
||||
return adjacency_list_it->second;
|
||||
} else {
|
||||
return adjacency_lists[f] = { };
|
||||
}
|
||||
}
|
||||
|
||||
void function_graph::add_edge(const function& from, const function& to) {
|
||||
add_function(from).insert(to);
|
||||
edges.insert({ from, to });
|
||||
}
|
||||
|
||||
std::vector<group_ptr> function_graph::compute_order() {
|
||||
std::set<edge> transitive_edges = compute_transitive_edges();
|
||||
std::map<function, group_id> group_ids;
|
||||
std::map<group_id, data_ptr> group_data_map;
|
||||
|
||||
create_groups(transitive_edges, group_ids, group_data_map);
|
||||
create_edges(group_ids, group_data_map);
|
||||
return generate_order(group_ids, group_data_map);
|
||||
}
|
||||
53
code/compiler/13/graph.hpp
Normal file
@@ -0,0 +1,53 @@
|
||||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
using function = std::string;
|
||||
|
||||
struct group {
|
||||
std::set<function> members;
|
||||
};
|
||||
|
||||
using group_ptr = std::unique_ptr<group>;
|
||||
|
||||
class function_graph {
|
||||
using group_id = size_t;
|
||||
|
||||
struct group_data {
|
||||
std::set<function> functions;
|
||||
std::set<group_id> adjacency_list;
|
||||
size_t indegree;
|
||||
|
||||
group_data() : indegree(0) {}
|
||||
};
|
||||
|
||||
using data_ptr = std::shared_ptr<group_data>;
|
||||
using edge = std::pair<function, function>;
|
||||
using group_edge = std::pair<group_id, group_id>;
|
||||
|
||||
std::map<function, std::set<function>> adjacency_lists;
|
||||
std::set<edge> edges;
|
||||
|
||||
std::set<edge> compute_transitive_edges();
|
||||
void create_groups(
|
||||
const std::set<edge>&,
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
void create_edges(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
std::vector<group_ptr> generate_order(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
|
||||
public:
|
||||
std::set<function>& add_function(const function& f);
|
||||
void add_edge(const function& from, const function& to);
|
||||
std::vector<group_ptr> compute_order();
|
||||
};
|
||||
219
code/compiler/13/instruction.cpp
Normal file
@@ -0,0 +1,219 @@
|
||||
#include "instruction.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
#include <llvm/IR/BasicBlock.h>
|
||||
#include <llvm/IR/Function.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static void print_indent(int n, std::ostream& to) {
|
||||
while(n--) to << " ";
|
||||
}
|
||||
|
||||
void instruction_pushint::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "PushInt(" << value << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_push(f, ctx.create_num(f, ctx.create_i32(value)));
|
||||
}
|
||||
|
||||
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "PushGlobal(" << name << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto& global_f = ctx.custom_functions.at("f_" + name);
|
||||
auto arity = ctx.create_i32(global_f->arity);
|
||||
ctx.create_push(f, ctx.create_global(f, global_f->function, arity));
|
||||
}
|
||||
|
||||
void instruction_push::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Push(" << offset << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset)));
|
||||
}
|
||||
|
||||
void instruction_pop::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Pop(" << count << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_popn(f, ctx.create_size(count));
|
||||
}
|
||||
|
||||
void instruction_mkapp::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "MkApp()" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto left = ctx.create_pop(f);
|
||||
auto right = ctx.create_pop(f);
|
||||
ctx.create_push(f, ctx.create_app(f, left, right));
|
||||
}
|
||||
|
||||
void instruction_update::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Update(" << offset << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_update(f, ctx.create_size(offset));
|
||||
}
|
||||
|
||||
void instruction_pack::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Pack(" << tag << ", " << size << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag));
|
||||
}
|
||||
|
||||
void instruction_split::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Split()" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_split(f, ctx.create_size(size));
|
||||
}
|
||||
|
||||
void instruction_jump::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Jump(" << std::endl;
|
||||
for(auto& instruction_set : branches) {
|
||||
for(auto& instruction : instruction_set) {
|
||||
instruction->print(indent + 2, to);
|
||||
}
|
||||
to << std::endl;
|
||||
}
|
||||
print_indent(indent, to);
|
||||
to << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||
auto tag = ctx.unwrap_data_tag(top_node);
|
||||
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
|
||||
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||
std::vector<BasicBlock*> blocks;
|
||||
|
||||
for(auto& branch : branches) {
|
||||
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
|
||||
ctx.builder.SetInsertPoint(branch_block);
|
||||
for(auto& instruction : branch) {
|
||||
instruction->gen_llvm(ctx, f);
|
||||
}
|
||||
ctx.builder.CreateBr(safety_block);
|
||||
blocks.push_back(branch_block);
|
||||
}
|
||||
|
||||
for(auto& mapping : tag_mappings) {
|
||||
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
||||
}
|
||||
|
||||
ctx.builder.SetInsertPoint(safety_block);
|
||||
}
|
||||
|
||||
void instruction_if::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "If(" << std::endl;
|
||||
for(auto& instruction : on_true) {
|
||||
instruction->print(indent + 2, to);
|
||||
}
|
||||
to << std::endl;
|
||||
for(auto& instruction : on_false) {
|
||||
instruction->print(indent + 2, to);
|
||||
}
|
||||
print_indent(indent, to);
|
||||
to << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_if::gen_llvm(llvm_context& ctx, llvm::Function* f) const {
|
||||
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||
auto num = ctx.unwrap_num(top_node);
|
||||
|
||||
auto nonzero_block = BasicBlock::Create(ctx.ctx, "nonzero", f);
|
||||
auto zero_block = BasicBlock::Create(ctx.ctx, "zero", f);
|
||||
auto resume_block = BasicBlock::Create(ctx.ctx, "resume", f);
|
||||
auto switch_op = ctx.builder.CreateSwitch(num, nonzero_block, 2);
|
||||
|
||||
switch_op->addCase(ctx.create_i32(0), zero_block);
|
||||
ctx.builder.SetInsertPoint(nonzero_block);
|
||||
for(auto& instruction : on_true) {
|
||||
instruction->gen_llvm(ctx, f);
|
||||
}
|
||||
ctx.builder.CreateBr(resume_block);
|
||||
|
||||
ctx.builder.SetInsertPoint(zero_block);
|
||||
for(auto& instruction : on_false) {
|
||||
instruction->gen_llvm(ctx, f);
|
||||
}
|
||||
ctx.builder.CreateBr(resume_block);
|
||||
|
||||
ctx.builder.SetInsertPoint(resume_block);
|
||||
}
|
||||
|
||||
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Slide(" << offset << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_slide(f, ctx.create_size(offset));
|
||||
}
|
||||
|
||||
void instruction_binop::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "BinOp(" << op_action(op) << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto left_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||
llvm::Value* result;
|
||||
switch(op) {
|
||||
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
|
||||
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
|
||||
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
|
||||
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
|
||||
case MODULO: result = ctx.builder.CreateSRem(left_int, right_int); break;
|
||||
case EQUALS: result = ctx.builder.CreateICmpEQ(left_int, right_int); break;
|
||||
case LESS_EQUALS: result = ctx.builder.CreateICmpSLE(left_int, right_int); break;
|
||||
}
|
||||
ctx.create_push(f, ctx.create_num(f, result));
|
||||
}
|
||||
|
||||
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Eval()" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_unwind(f);
|
||||
}
|
||||
|
||||
void instruction_alloc::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Alloc(" << amount << ")" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_alloc(f, ctx.create_size(amount));
|
||||
}
|
||||
|
||||
void instruction_unwind::print(int indent, std::ostream& to) const {
|
||||
print_indent(indent, to);
|
||||
to << "Unwind()" << std::endl;
|
||||
}
|
||||
|
||||
void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
// Nothing
|
||||
}
|
||||
155
code/compiler/13/instruction.hpp
Normal file
@@ -0,0 +1,155 @@
|
||||
#pragma once
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
#include "binop.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
|
||||
struct instruction {
|
||||
virtual ~instruction() = default;
|
||||
|
||||
virtual void print(int indent, std::ostream& to) const = 0;
|
||||
virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0;
|
||||
};
|
||||
|
||||
using instruction_ptr = std::unique_ptr<instruction>;
|
||||
|
||||
struct instruction_pushint : public instruction {
|
||||
int value;
|
||||
|
||||
instruction_pushint(int v)
|
||||
: value(v) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_pushglobal : public instruction {
|
||||
std::string name;
|
||||
|
||||
instruction_pushglobal(std::string n)
|
||||
: name(std::move(n)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_push : public instruction {
|
||||
int offset;
|
||||
|
||||
instruction_push(int o)
|
||||
: offset(o) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_pop : public instruction {
|
||||
int count;
|
||||
|
||||
instruction_pop(int c)
|
||||
: count(c) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_mkapp : public instruction {
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_update : public instruction {
|
||||
int offset;
|
||||
|
||||
instruction_update(int o)
|
||||
: offset(o) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_pack : public instruction {
|
||||
int tag;
|
||||
int size;
|
||||
|
||||
instruction_pack(int t, int s)
|
||||
: tag(t), size(s) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_split : public instruction {
|
||||
int size;
|
||||
|
||||
instruction_split(int s)
|
||||
: size(s) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_jump : public instruction {
|
||||
std::vector<std::vector<instruction_ptr>> branches;
|
||||
std::map<int, int> tag_mappings;
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_if : public instruction {
|
||||
std::vector<instruction_ptr> on_true;
|
||||
std::vector<instruction_ptr> on_false;
|
||||
|
||||
instruction_if(
|
||||
std::vector<instruction_ptr> t,
|
||||
std::vector<instruction_ptr> f)
|
||||
: on_true(std::move(t)), on_false(std::move(f)) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_slide : public instruction {
|
||||
int offset;
|
||||
|
||||
instruction_slide(int o)
|
||||
: offset(o) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_binop : public instruction {
|
||||
binop op;
|
||||
|
||||
instruction_binop(binop o)
|
||||
: op(o) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_eval : public instruction {
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_alloc : public instruction {
|
||||
int amount;
|
||||
|
||||
instruction_alloc(int a)
|
||||
: amount(a) {}
|
||||
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
|
||||
struct instruction_unwind : public instruction {
|
||||
void print(int indent, std::ostream& to) const;
|
||||
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||
};
|
||||
273
code/compiler/13/llvm_context.cpp
Normal file
@@ -0,0 +1,273 @@
|
||||
#include "llvm_context.hpp"
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void llvm_context::create_types() {
|
||||
stack_type = StructType::create(ctx, "stack");
|
||||
gmachine_type = StructType::create(ctx, "gmachine");
|
||||
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||
gmachine_ptr_type = PointerType::getUnqual(gmachine_type);
|
||||
tag_type = IntegerType::getInt8Ty(ctx);
|
||||
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||
struct_types["node_num"] = StructType::create(ctx, "node_num");
|
||||
struct_types["node_global"] = StructType::create(ctx, "node_global");
|
||||
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||
function_type = FunctionType::get(Type::getVoidTy(ctx), { gmachine_ptr_type }, false);
|
||||
|
||||
gmachine_type->setBody(
|
||||
stack_ptr_type,
|
||||
node_ptr_type,
|
||||
IntegerType::getInt64Ty(ctx),
|
||||
IntegerType::getInt64Ty(ctx)
|
||||
);
|
||||
struct_types.at("node_base")->setBody(
|
||||
IntegerType::getInt32Ty(ctx),
|
||||
IntegerType::getInt8Ty(ctx),
|
||||
node_ptr_type
|
||||
);
|
||||
struct_types.at("node_app")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
node_ptr_type,
|
||||
node_ptr_type
|
||||
);
|
||||
struct_types.at("node_num")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
IntegerType::getInt32Ty(ctx)
|
||||
);
|
||||
struct_types.at("node_global")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false)
|
||||
);
|
||||
struct_types.at("node_ind")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
node_ptr_type
|
||||
);
|
||||
struct_types.at("node_data")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
IntegerType::getInt8Ty(ctx),
|
||||
PointerType::getUnqual(node_ptr_type)
|
||||
);
|
||||
}
|
||||
|
||||
void llvm_context::create_functions() {
|
||||
auto void_type = Type::getVoidTy(ctx);
|
||||
auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8);
|
||||
functions["stack_init"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_init",
|
||||
&module
|
||||
);
|
||||
functions["stack_free"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_free",
|
||||
&module
|
||||
);
|
||||
functions["stack_push"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_pop"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_pop",
|
||||
&module
|
||||
);
|
||||
functions["stack_peek"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_peek",
|
||||
&module
|
||||
);
|
||||
functions["stack_popn"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_popn",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_slide"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_slide",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_update"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_update",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_alloc"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_alloc",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_pack"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type, tag_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_pack",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_split"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_split",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_track"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { gmachine_ptr_type, node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_track",
|
||||
&module
|
||||
);
|
||||
|
||||
auto int32_type = IntegerType::getInt32Ty(ctx);
|
||||
functions["alloc_app"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"alloc_app",
|
||||
&module
|
||||
);
|
||||
functions["alloc_num"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { int32_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"alloc_num",
|
||||
&module
|
||||
);
|
||||
functions["alloc_global"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { function_type, int32_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"alloc_global",
|
||||
&module
|
||||
);
|
||||
functions["alloc_ind"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"alloc_ind",
|
||||
&module
|
||||
);
|
||||
|
||||
functions["unwind"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"unwind",
|
||||
&module
|
||||
);
|
||||
}
|
||||
|
||||
ConstantInt* llvm_context::create_i8(int8_t i) {
|
||||
return ConstantInt::get(ctx, APInt(8, i));
|
||||
}
|
||||
ConstantInt* llvm_context::create_i32(int32_t i) {
|
||||
return ConstantInt::get(ctx, APInt(32, i));
|
||||
}
|
||||
ConstantInt* llvm_context::create_size(size_t i) {
|
||||
return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i));
|
||||
}
|
||||
|
||||
Value* llvm_context::create_pop(Function* f) {
|
||||
auto pop_f = functions.at("stack_pop");
|
||||
return builder.CreateCall(pop_f, { unwrap_gmachine_stack_ptr(f->arg_begin()) });
|
||||
}
|
||||
Value* llvm_context::create_peek(Function* f, Value* off) {
|
||||
auto peek_f = functions.at("stack_peek");
|
||||
return builder.CreateCall(peek_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||
}
|
||||
void llvm_context::create_push(Function* f, Value* v) {
|
||||
auto push_f = functions.at("stack_push");
|
||||
builder.CreateCall(push_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), v });
|
||||
}
|
||||
void llvm_context::create_popn(Function* f, Value* off) {
|
||||
auto popn_f = functions.at("stack_popn");
|
||||
builder.CreateCall(popn_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||
}
|
||||
void llvm_context::create_update(Function* f, Value* off) {
|
||||
auto update_f = functions.at("gmachine_update");
|
||||
builder.CreateCall(update_f, { f->arg_begin(), off });
|
||||
}
|
||||
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
|
||||
auto pack_f = functions.at("gmachine_pack");
|
||||
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
|
||||
}
|
||||
void llvm_context::create_split(Function* f, Value* c) {
|
||||
auto split_f = functions.at("gmachine_split");
|
||||
builder.CreateCall(split_f, { f->arg_begin(), c });
|
||||
}
|
||||
void llvm_context::create_slide(Function* f, Value* off) {
|
||||
auto slide_f = functions.at("gmachine_slide");
|
||||
builder.CreateCall(slide_f, { f->arg_begin(), off });
|
||||
}
|
||||
void llvm_context::create_alloc(Function* f, Value* n) {
|
||||
auto alloc_f = functions.at("gmachine_alloc");
|
||||
builder.CreateCall(alloc_f, { f->arg_begin(), n });
|
||||
}
|
||||
Value* llvm_context::create_track(Function* f, Value* v) {
|
||||
auto track_f = functions.at("gmachine_track");
|
||||
return builder.CreateCall(track_f, { f->arg_begin(), v });
|
||||
}
|
||||
|
||||
void llvm_context::create_unwind(Function* f) {
|
||||
auto unwind_f = functions.at("unwind");
|
||||
builder.CreateCall(unwind_f, { f->args().begin() });
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
|
||||
auto offset_0 = create_i32(0);
|
||||
return builder.CreateGEP(g, { offset_0, offset_0 });
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_num(Value* v) {
|
||||
return builder.CreatePtrToInt(v, IntegerType::getInt32Ty(ctx));
|
||||
}
|
||||
Value* llvm_context::create_num(Function* f, Value* v) {
|
||||
auto alloc_num_f = functions.at("alloc_num");
|
||||
auto alloc_num_call = builder.CreateCall(alloc_num_f, { v });
|
||||
return create_track(f, alloc_num_call);
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_data_tag(Value* v) {
|
||||
auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data"));
|
||||
auto cast = builder.CreatePointerCast(v, data_ptr_type);
|
||||
auto offset_0 = create_i32(0);
|
||||
auto offset_1 = create_i32(1);
|
||||
auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||
return builder.CreateLoad(tag_ptr);
|
||||
}
|
||||
|
||||
Value* llvm_context::create_global(Function* f, Value* gf, Value* a) {
|
||||
auto alloc_global_f = functions.at("alloc_global");
|
||||
auto alloc_global_call = builder.CreateCall(alloc_global_f, { gf, a });
|
||||
return create_track(f, alloc_global_call);
|
||||
}
|
||||
|
||||
Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
|
||||
auto alloc_app_f = functions.at("alloc_app");
|
||||
auto alloc_app_call = builder.CreateCall(alloc_app_f, { l, r });
|
||||
return create_track(f, alloc_app_call);
|
||||
}
|
||||
|
||||
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
|
||||
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||
auto new_function = llvm::Function::Create(
|
||||
function_type,
|
||||
llvm::Function::LinkageTypes::ExternalLinkage,
|
||||
"f_" + name,
|
||||
&module
|
||||
);
|
||||
auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function);
|
||||
|
||||
auto new_custom_f = custom_function_ptr(new custom_function());
|
||||
new_custom_f->arity = arity;
|
||||
new_custom_f->function = new_function;
|
||||
custom_functions["f_" + name] = std::move(new_custom_f);
|
||||
|
||||
return new_function;
|
||||
}
|
||||
72
code/compiler/13/llvm_context.hpp
Normal file
@@ -0,0 +1,72 @@
|
||||
#pragma once
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Value.h>
|
||||
#include <map>
|
||||
|
||||
struct llvm_context {
|
||||
struct custom_function {
|
||||
llvm::Function* function;
|
||||
int32_t arity;
|
||||
};
|
||||
|
||||
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||
|
||||
llvm::LLVMContext ctx;
|
||||
llvm::IRBuilder<> builder;
|
||||
llvm::Module module;
|
||||
|
||||
std::map<std::string, custom_function_ptr> custom_functions;
|
||||
std::map<std::string, llvm::Function*> functions;
|
||||
std::map<std::string, llvm::StructType*> struct_types;
|
||||
|
||||
llvm::StructType* stack_type;
|
||||
llvm::StructType* gmachine_type;
|
||||
llvm::PointerType* stack_ptr_type;
|
||||
llvm::PointerType* gmachine_ptr_type;
|
||||
llvm::PointerType* node_ptr_type;
|
||||
llvm::IntegerType* tag_type;
|
||||
llvm::FunctionType* function_type;
|
||||
|
||||
llvm_context()
|
||||
: builder(ctx), module("bloglang", ctx) {
|
||||
create_types();
|
||||
create_functions();
|
||||
}
|
||||
|
||||
void create_types();
|
||||
void create_functions();
|
||||
|
||||
llvm::ConstantInt* create_i8(int8_t);
|
||||
llvm::ConstantInt* create_i32(int32_t);
|
||||
llvm::ConstantInt* create_size(size_t);
|
||||
|
||||
llvm::Value* create_pop(llvm::Function*);
|
||||
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||
void create_push(llvm::Function*, llvm::Value*);
|
||||
void create_popn(llvm::Function*, llvm::Value*);
|
||||
void create_update(llvm::Function*, llvm::Value*);
|
||||
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
void create_split(llvm::Function*, llvm::Value*);
|
||||
void create_slide(llvm::Function*, llvm::Value*);
|
||||
void create_alloc(llvm::Function*, llvm::Value*);
|
||||
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||
|
||||
void create_unwind(llvm::Function*);
|
||||
|
||||
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||
|
||||
llvm::Value* unwrap_num(llvm::Value*);
|
||||
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||
|
||||
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||
|
||||
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
|
||||
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
|
||||
llvm::Function* create_custom_function(std::string name, int32_t arity);
|
||||
};
|
||||
213
code/compiler/13/main.cpp
Normal file
@@ -0,0 +1,213 @@
|
||||
#include "ast.hpp"
|
||||
#include <iostream>
|
||||
#include "binop.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "graph.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "llvm_context.hpp"
|
||||
#include "parser.hpp"
|
||||
#include "error.hpp"
|
||||
#include "type.hpp"
|
||||
#include "parse_driver.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/Support/TargetSelect.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
void yy::parser::error(const yy::location& loc, const std::string& msg) {
|
||||
std::cout << "An error occured: " << msg << std::endl;
|
||||
}
|
||||
|
||||
void prelude_types(definition_group& defs, type_env_ptr env) {
|
||||
type_ptr int_type = type_ptr(new type_internal("Int"));
|
||||
env->bind_type("Int", int_type);
|
||||
type_ptr int_type_app = type_ptr(new type_app(int_type));
|
||||
|
||||
type_ptr bool_type = type_ptr(new type_internal("Bool"));
|
||||
env->bind_type("Bool", bool_type);
|
||||
type_ptr bool_type_app = type_ptr(new type_app(bool_type));
|
||||
|
||||
type_ptr binop_type = type_ptr(new type_arr(
|
||||
int_type_app,
|
||||
type_ptr(new type_arr(int_type_app, int_type_app))));
|
||||
type_ptr cmp_type = type_ptr(new type_arr(
|
||||
int_type_app,
|
||||
type_ptr(new type_arr(int_type_app, bool_type_app))));
|
||||
|
||||
constexpr binop number_ops[] = { PLUS, MINUS, TIMES, DIVIDE, MODULO };
|
||||
constexpr binop cmp_ops[] = { EQUALS, LESS_EQUALS };
|
||||
for(auto& op : number_ops) {
|
||||
env->bind(op_name(op), binop_type, visibility::global);
|
||||
env->set_mangled_name(op_name(op), op_action(op));
|
||||
}
|
||||
for(auto& op : cmp_ops) {
|
||||
env->bind(op_name(op), cmp_type, visibility::global);
|
||||
env->set_mangled_name(op_name(op), op_action(op));
|
||||
}
|
||||
|
||||
env->bind("True", bool_type_app, visibility::global);
|
||||
env->bind("False", bool_type_app, visibility::global);
|
||||
}
|
||||
|
||||
void typecheck_program(
|
||||
definition_group& defs,
|
||||
type_mgr& mgr, type_env_ptr& env) {
|
||||
prelude_types(defs, env);
|
||||
|
||||
std::set<std::string> free;
|
||||
defs.find_free(free);
|
||||
defs.typecheck(mgr, env);
|
||||
|
||||
#ifdef DEBUG_OUT
|
||||
for(auto& pair : defs.env->names) {
|
||||
std::cout << pair.first << ": ";
|
||||
pair.second.type->print(mgr, std::cout);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
global_scope translate_program(definition_group& group) {
|
||||
global_scope scope;
|
||||
for(auto& data : group.defs_data) {
|
||||
data.second->into_globals(scope);
|
||||
}
|
||||
for(auto& defn : group.defs_defn) {
|
||||
auto& function = defn.second->into_global(scope);
|
||||
function.body->env->parent->set_mangled_name(defn.first, function.name);
|
||||
}
|
||||
return scope;
|
||||
}
|
||||
|
||||
void output_llvm(llvm_context& ctx, const std::string& filename) {
|
||||
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||
|
||||
llvm::InitializeNativeTarget();
|
||||
llvm::InitializeNativeTargetAsmParser();
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
|
||||
std::string error;
|
||||
const llvm::Target* target =
|
||||
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||
if (!target) {
|
||||
std::cerr << error << std::endl;
|
||||
} else {
|
||||
std::string cpu = "generic";
|
||||
std::string features = "";
|
||||
llvm::TargetOptions options;
|
||||
std::unique_ptr<llvm::TargetMachine> targetMachine(
|
||||
target->createTargetMachine(targetTriple, cpu, features,
|
||||
options, llvm::Optional<llvm::Reloc::Model>()));
|
||||
|
||||
ctx.module.setDataLayout(targetMachine->createDataLayout());
|
||||
ctx.module.setTargetTriple(targetTriple);
|
||||
|
||||
std::error_code ec;
|
||||
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
||||
if (ec) {
|
||||
throw std::runtime_error("failed to open object file for writing");
|
||||
} else {
|
||||
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
||||
llvm::legacy::PassManager pm;
|
||||
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||
throw std::runtime_error("failed to add passes to pass manager");
|
||||
} else {
|
||||
pm.run(ctx.module);
|
||||
file.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
||||
auto new_function = ctx.create_custom_function(op_action(op), 2);
|
||||
std::vector<instruction_ptr> instructions;
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
||||
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||
for(auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, new_function);
|
||||
}
|
||||
ctx.builder.CreateRetVoid();
|
||||
}
|
||||
|
||||
void gen_llvm_boolean_constructor(llvm_context& ctx, const std::string& s, bool b) {
|
||||
auto new_function = ctx.create_custom_function(s, 0);
|
||||
std::vector<instruction_ptr> instructions;
|
||||
instructions.push_back(instruction_ptr(new instruction_pushint(b)));
|
||||
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||
for(auto& instruction : instructions) {
|
||||
instruction->gen_llvm(ctx, new_function);
|
||||
}
|
||||
ctx.builder.CreateRetVoid();
|
||||
}
|
||||
|
||||
void gen_llvm(global_scope& scope) {
|
||||
llvm_context ctx;
|
||||
gen_llvm_internal_op(ctx, PLUS);
|
||||
gen_llvm_internal_op(ctx, MINUS);
|
||||
gen_llvm_internal_op(ctx, TIMES);
|
||||
gen_llvm_internal_op(ctx, DIVIDE);
|
||||
gen_llvm_internal_op(ctx, MODULO);
|
||||
gen_llvm_internal_op(ctx, EQUALS);
|
||||
gen_llvm_internal_op(ctx, LESS_EQUALS);
|
||||
gen_llvm_boolean_constructor(ctx, "True", true);
|
||||
gen_llvm_boolean_constructor(ctx, "False", false);
|
||||
|
||||
scope.generate_llvm(ctx);
|
||||
|
||||
#ifdef DEBUG_OUT
|
||||
ctx.module.print(llvm::outs(), nullptr);
|
||||
#endif
|
||||
|
||||
output_llvm(ctx, "program.o");
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if(argc != 2) {
|
||||
std::cerr << "please enter a file to compile." << std::endl;
|
||||
}
|
||||
parse_driver driver(argv[1]);
|
||||
if(!driver.run_parse()) {
|
||||
std::cerr << "failed to open file " << argv[1] << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
type_mgr mgr;
|
||||
type_env_ptr env(new type_env);
|
||||
|
||||
#ifdef DEBUG_OUT
|
||||
for(auto& def_defn : driver.global_defs.defs_defn) {
|
||||
std::cout << def_defn.second->name;
|
||||
for(auto& param : def_defn.second->params) std::cout << " " << param;
|
||||
std::cout << ":" << std::endl;
|
||||
def_defn.second->body->print(1, std::cout);
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
try {
|
||||
typecheck_program(driver.global_defs, mgr, env);
|
||||
global_scope scope = translate_program(driver.global_defs);
|
||||
scope.compile();
|
||||
gen_llvm(scope);
|
||||
} catch(unification_error& err) {
|
||||
err.pretty_print(std::cerr, driver, mgr);
|
||||
} catch(type_error& err) {
|
||||
err.pretty_print(std::cerr, driver);
|
||||
} catch(std::runtime_error& err) {
|
||||
std::cerr << err.what() << std::endl;
|
||||
}
|
||||
}
|
||||
80
code/compiler/13/parse_driver.hpp
Normal file
@@ -0,0 +1,80 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include "definition.hpp"
|
||||
#include "location.hh"
|
||||
#include "parser.hpp"
|
||||
|
||||
struct parse_driver;
|
||||
|
||||
void scanner_init(parse_driver* d, yyscan_t* scanner);
|
||||
void scanner_destroy(yyscan_t* scanner);
|
||||
|
||||
struct parse_driver {
|
||||
std::string file_name;
|
||||
std::ifstream file_stream;
|
||||
std::ostringstream string_stream;
|
||||
|
||||
yy::location location;
|
||||
size_t file_offset;
|
||||
|
||||
std::vector<size_t> line_offsets;
|
||||
definition_group global_defs;
|
||||
std::string read_file;
|
||||
|
||||
parse_driver(const std::string& file)
|
||||
: file_name(file), file_offset(0) {}
|
||||
|
||||
bool run_parse() {
|
||||
file_stream.open(file_name);
|
||||
if(!file_stream.good()) return false;
|
||||
line_offsets.push_back(0);
|
||||
yyscan_t scanner;
|
||||
scanner_init(this, &scanner);
|
||||
yy::parser parser(scanner, *this);
|
||||
parser();
|
||||
scanner_destroy(&scanner);
|
||||
read_file = string_stream.str();
|
||||
return true;
|
||||
}
|
||||
|
||||
int get() {
|
||||
int new_char = file_stream.get();
|
||||
if(new_char == EOF) return EOF;
|
||||
file_offset++;
|
||||
if(new_char == '\n') line_offsets.push_back(file_offset);
|
||||
string_stream.put(new_char);
|
||||
return new_char;
|
||||
}
|
||||
|
||||
size_t get_index(int line, int column) {
|
||||
assert(line > 0);
|
||||
assert(line <= line_offsets.size());
|
||||
size_t file_offset = line_offsets[line-1];
|
||||
file_offset += column - 1;
|
||||
return file_offset;
|
||||
}
|
||||
|
||||
size_t get_line_end(int line) {
|
||||
if(line > line_offsets.size()) return read_file.size();
|
||||
return get_index(line+1, 1);
|
||||
}
|
||||
|
||||
void print_highlighted_location(std::ostream& stream, const yy::location& loc) {
|
||||
size_t print_start = get_index(loc.begin.line, 1);
|
||||
size_t highlight_start = get_index(loc.begin.line, loc.begin.column);
|
||||
size_t highlight_end = get_index(loc.end.line, loc.end.column);
|
||||
size_t print_end = get_line_end(loc.end.line);
|
||||
const char* content = read_file.c_str();
|
||||
stream.write(content + print_start, highlight_start - print_start);
|
||||
stream << "\033[4;31m";
|
||||
stream.write(content + highlight_start, highlight_end - highlight_start);
|
||||
stream << "\033[0m";
|
||||
stream.write(content + highlight_end, print_end - highlight_end);
|
||||
}
|
||||
};
|
||||
|
||||
#define YY_DECL yy::parser::symbol_type yylex(yyscan_t yyscanner, parse_driver& drv)
|
||||
|
||||
YY_DECL;
|
||||
48
code/compiler/13/parsed_type.cpp
Normal file
@@ -0,0 +1,48 @@
|
||||
#include "parsed_type.hpp"
|
||||
#include <sstream>
|
||||
#include "type.hpp"
|
||||
#include "type_env.hpp"
|
||||
#include "error.hpp"
|
||||
|
||||
type_ptr parsed_type_app::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& e) const {
|
||||
auto parent_type = e.lookup_type(name);
|
||||
if(parent_type == nullptr)
|
||||
throw type_error(std::string("no such type or type constructor ") + name);
|
||||
type_base* base_type;
|
||||
if(!(base_type = dynamic_cast<type_base*>(parent_type.get())))
|
||||
throw type_error(std::string("invalid type ") + name);
|
||||
if(base_type->arity != arguments.size()) {
|
||||
std::ostringstream error_stream;
|
||||
error_stream << "invalid application of type ";
|
||||
error_stream << name;
|
||||
error_stream << " (" << base_type->arity << " argument(s) expected, ";
|
||||
error_stream << "but " << arguments.size() << " provided)";
|
||||
throw type_error(error_stream.str());
|
||||
}
|
||||
|
||||
type_app* new_app = new type_app(std::move(parent_type));
|
||||
type_ptr to_return(new_app);
|
||||
for(auto& arg : arguments) {
|
||||
new_app->arguments.push_back(arg->to_type(vars, e));
|
||||
}
|
||||
return to_return;
|
||||
}
|
||||
|
||||
type_ptr parsed_type_var::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& e) const {
|
||||
if(vars.find(var) == vars.end())
|
||||
throw type_error(std::string("the type variable ") + var + std::string(" was not explicitly declared."));
|
||||
return type_ptr(new type_var(var));
|
||||
}
|
||||
|
||||
|
||||
type_ptr parsed_type_arr::to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& env) const {
|
||||
auto new_left = left->to_type(vars, env);
|
||||
auto new_right = right->to_type(vars, env);
|
||||
return type_ptr(new type_arr(std::move(new_left), std::move(new_right)));
|
||||
}
|
||||
43
code/compiler/13/parsed_type.hpp
Normal file
@@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "type_env.hpp"
|
||||
|
||||
struct parsed_type {
|
||||
virtual type_ptr to_type(
|
||||
const std::set<std::string>& vars,
|
||||
const type_env& env) const = 0;
|
||||
};
|
||||
|
||||
using parsed_type_ptr = std::unique_ptr<parsed_type>;
|
||||
|
||||
struct parsed_type_app : parsed_type {
|
||||
std::string name;
|
||||
std::vector<parsed_type_ptr> arguments;
|
||||
|
||||
parsed_type_app(
|
||||
std::string n,
|
||||
std::vector<parsed_type_ptr> as)
|
||||
: name(std::move(n)), arguments(std::move(as)) {}
|
||||
|
||||
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||
};
|
||||
|
||||
struct parsed_type_var : parsed_type {
|
||||
std::string var;
|
||||
|
||||
parsed_type_var(std::string v) : var(std::move(v)) {}
|
||||
|
||||
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||
};
|
||||
|
||||
struct parsed_type_arr : parsed_type {
|
||||
parsed_type_ptr left;
|
||||
parsed_type_ptr right;
|
||||
|
||||
parsed_type_arr(parsed_type_ptr l, parsed_type_ptr r)
|
||||
: left(std::move(l)), right(std::move(r)) {}
|
||||
|
||||
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||
};
|
||||
213
code/compiler/13/parser.y
Normal file
@@ -0,0 +1,213 @@
|
||||
%code requires {
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "ast.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "parser.hpp"
|
||||
#include "parsed_type.hpp"
|
||||
|
||||
class parse_driver;
|
||||
using yyscan_t = void*;
|
||||
}
|
||||
|
||||
%param { yyscan_t scanner }
|
||||
%param { parse_driver& drv }
|
||||
|
||||
%code {
|
||||
#include "parse_driver.hpp"
|
||||
}
|
||||
|
||||
%token BACKSLASH
|
||||
%token BACKTICK
|
||||
%token PLUS
|
||||
%token TIMES
|
||||
%token MINUS
|
||||
%token DIVIDE
|
||||
%token MODULO
|
||||
%token EQUALS
|
||||
%token LESS_EQUALS
|
||||
%token <int> INT
|
||||
%token DEFN
|
||||
%token DATA
|
||||
%token CASE
|
||||
%token OF
|
||||
%token LET
|
||||
%token IN
|
||||
%token OCURLY
|
||||
%token CCURLY
|
||||
%token OPAREN
|
||||
%token CPAREN
|
||||
%token COMMA
|
||||
%token ARROW
|
||||
%token EQUAL
|
||||
%token <std::string> LID
|
||||
%token <std::string> UID
|
||||
|
||||
%language "c++"
|
||||
%define api.value.type variant
|
||||
%define api.token.constructor
|
||||
|
||||
%locations
|
||||
|
||||
%type <std::vector<std::string>> lowercaseParams
|
||||
%type <std::vector<branch_ptr>> branches
|
||||
%type <std::vector<constructor_ptr>> constructors
|
||||
%type <std::vector<parsed_type_ptr>> typeList
|
||||
%type <binop> anyBinop
|
||||
%type <definition_group> definitions
|
||||
%type <parsed_type_ptr> type nonArrowType typeListElement
|
||||
%type <ast_ptr> aInfix aEq aAdd aMul case let lambda app appBase
|
||||
%type <definition_data_ptr> data
|
||||
%type <definition_defn_ptr> defn
|
||||
%type <branch_ptr> branch
|
||||
%type <pattern_ptr> pattern
|
||||
%type <constructor_ptr> constructor
|
||||
|
||||
%start program
|
||||
|
||||
%%
|
||||
|
||||
program
|
||||
: definitions { $1.vis = visibility::global; std::swap(drv.global_defs, $1); }
|
||||
;
|
||||
|
||||
definitions
|
||||
: definitions defn { $$ = std::move($1); auto name = $2->name; $$.defs_defn[name] = std::move($2); }
|
||||
| definitions data { $$ = std::move($1); auto name = $2->name; $$.defs_data[name] = std::move($2); }
|
||||
| %empty { $$ = definition_group(); }
|
||||
;
|
||||
|
||||
defn
|
||||
: DEFN LID lowercaseParams EQUAL OCURLY aInfix CCURLY
|
||||
{ $$ = definition_defn_ptr(
|
||||
new definition_defn(std::move($2), std::move($3), std::move($6), @$)); }
|
||||
;
|
||||
|
||||
lowercaseParams
|
||||
: %empty { $$ = std::vector<std::string>(); }
|
||||
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
;
|
||||
|
||||
aInfix
|
||||
: aInfix BACKTICK LID BACKTICK aEq
|
||||
{ $$ = ast_ptr(new ast_app(
|
||||
ast_ptr(new ast_app(ast_ptr(new ast_lid(std::move($3))), std::move($1))), std::move($5))); }
|
||||
| aInfix BACKTICK UID BACKTICK aEq
|
||||
{ $$ = ast_ptr(new ast_app(
|
||||
ast_ptr(new ast_app(ast_ptr(new ast_uid(std::move($3))), std::move($1))), std::move($5))); }
|
||||
| aEq { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
aEq
|
||||
: aAdd EQUALS aAdd { $$ = ast_ptr(new ast_binop(EQUALS, std::move($1), std::move($3), @$)); }
|
||||
| aAdd LESS_EQUALS aAdd { $$ = ast_ptr(new ast_binop(LESS_EQUALS, std::move($1), std::move($3), @$)); }
|
||||
| aAdd { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
aAdd
|
||||
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3), @$)); }
|
||||
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3), @$)); }
|
||||
| aMul { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
aMul
|
||||
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3), @$)); }
|
||||
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3), @$)); }
|
||||
| aMul MODULO app { $$ = ast_ptr(new ast_binop(MODULO, std::move($1), std::move($3), @$)); }
|
||||
| app { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
app
|
||||
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2), @$)); }
|
||||
| appBase { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
appBase
|
||||
: INT { $$ = ast_ptr(new ast_int($1, @$)); }
|
||||
| LID { $$ = ast_ptr(new ast_lid(std::move($1), @$)); }
|
||||
| UID { $$ = ast_ptr(new ast_uid(std::move($1), @$)); }
|
||||
| OPAREN aInfix CPAREN { $$ = std::move($2); }
|
||||
| OPAREN anyBinop CPAREN { $$ = ast_ptr(new ast_lid(op_name($2))); }
|
||||
| case { $$ = std::move($1); }
|
||||
| let { $$ = std::move($1); }
|
||||
| lambda { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
anyBinop
|
||||
: PLUS { $$ = PLUS; }
|
||||
| MINUS { $$ = MINUS; }
|
||||
| TIMES { $$ = TIMES; }
|
||||
| DIVIDE { $$ = DIVIDE; }
|
||||
| MODULO { $$ = MODULO; }
|
||||
| EQUALS { $$ = EQUALS; }
|
||||
| LESS_EQUALS { $$ = LESS_EQUALS; }
|
||||
;
|
||||
|
||||
let
|
||||
: LET OCURLY definitions CCURLY IN OCURLY aInfix CCURLY
|
||||
{ $$ = ast_ptr(new ast_let(std::move($3), std::move($7), @$)); }
|
||||
;
|
||||
|
||||
lambda
|
||||
: BACKSLASH lowercaseParams ARROW OCURLY aInfix CCURLY
|
||||
{ $$ = ast_ptr(new ast_lambda(std::move($2), std::move($5), @$)); }
|
||||
;
|
||||
|
||||
case
|
||||
: CASE aInfix OF OCURLY branches CCURLY
|
||||
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5), @$)); }
|
||||
;
|
||||
|
||||
branches
|
||||
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||
;
|
||||
|
||||
branch
|
||||
: pattern ARROW OCURLY aInfix CCURLY
|
||||
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||
;
|
||||
|
||||
pattern
|
||||
: LID { $$ = pattern_ptr(new pattern_var(std::move($1), @$)); }
|
||||
| UID lowercaseParams
|
||||
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2), @$)); }
|
||||
;
|
||||
|
||||
data
|
||||
: DATA UID lowercaseParams EQUAL OCURLY constructors CCURLY
|
||||
{ $$ = definition_data_ptr(new definition_data(std::move($2), std::move($3), std::move($6), @$)); }
|
||||
;
|
||||
|
||||
constructors
|
||||
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
|
||||
| constructor
|
||||
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
|
||||
;
|
||||
|
||||
constructor
|
||||
: UID typeList
|
||||
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||
;
|
||||
|
||||
type
|
||||
: nonArrowType ARROW type { $$ = parsed_type_ptr(new parsed_type_arr(std::move($1), std::move($3))); }
|
||||
| nonArrowType { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
nonArrowType
|
||||
: UID typeList { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), std::move($2))); }
|
||||
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||
| OPAREN type CPAREN { $$ = std::move($2); }
|
||||
;
|
||||
|
||||
typeListElement
|
||||
: OPAREN type CPAREN { $$ = std::move($2); }
|
||||
| UID { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), {})); }
|
||||
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||
;
|
||||
|
||||
typeList
|
||||
: %empty { $$ = std::vector<parsed_type_ptr>(); }
|
||||
| typeList typeListElement { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
;
|
||||
272
code/compiler/13/runtime.c
Normal file
@@ -0,0 +1,272 @@
|
||||
#include <bits/stdint-intn.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <memory.h>
|
||||
#include <stdio.h>
|
||||
#include "runtime.h"
|
||||
|
||||
#define INT_MARKER (1l << 63)
|
||||
#define IS_INT(n) ((uint64_t) n & INT_MARKER)
|
||||
|
||||
struct node_base* alloc_node() {
|
||||
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||
new_node->gc_next = NULL;
|
||||
new_node->gc_reachable = 0;
|
||||
assert(new_node != NULL);
|
||||
return new_node;
|
||||
}
|
||||
|
||||
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
||||
struct node_app* node = (struct node_app*) alloc_node();
|
||||
node->base.tag = NODE_APP;
|
||||
node->left = l;
|
||||
node->right = r;
|
||||
return node;
|
||||
}
|
||||
|
||||
struct node_num* alloc_num(int32_t n) {
|
||||
return (struct node_num*) (INT_MARKER | n);
|
||||
}
|
||||
|
||||
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
||||
struct node_global* node = (struct node_global*) alloc_node();
|
||||
node->base.tag = NODE_GLOBAL;
|
||||
node->arity = a;
|
||||
node->function = f;
|
||||
return node;
|
||||
}
|
||||
|
||||
struct node_ind* alloc_ind(struct node_base* n) {
|
||||
struct node_ind* node = (struct node_ind*) alloc_node();
|
||||
node->base.tag = NODE_IND;
|
||||
node->next = n;
|
||||
return node;
|
||||
}
|
||||
|
||||
void free_node_direct(struct node_base* n) {
|
||||
if(n->tag == NODE_DATA) {
|
||||
free(((struct node_data*) n)->array);
|
||||
}
|
||||
}
|
||||
|
||||
void gc_visit_node(struct node_base* n) {
|
||||
if(IS_INT(n) || n->gc_reachable) return;
|
||||
n->gc_reachable = 1;
|
||||
|
||||
if(n->tag == NODE_APP) {
|
||||
struct node_app* app = (struct node_app*) n;
|
||||
gc_visit_node(app->left);
|
||||
gc_visit_node(app->right);
|
||||
} if(n->tag == NODE_IND) {
|
||||
struct node_ind* ind = (struct node_ind*) n;
|
||||
gc_visit_node(ind->next);
|
||||
} if(n->tag == NODE_DATA) {
|
||||
struct node_data* data = (struct node_data*) n;
|
||||
struct node_base** to_visit = data->array;
|
||||
while(*to_visit) {
|
||||
gc_visit_node(*to_visit);
|
||||
to_visit++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void stack_init(struct stack* s) {
|
||||
s->size = 4;
|
||||
s->count = 0;
|
||||
s->data = malloc(sizeof(*s->data) * s->size);
|
||||
assert(s->data != NULL);
|
||||
}
|
||||
|
||||
void stack_free(struct stack* s) {
|
||||
free(s->data);
|
||||
}
|
||||
|
||||
void stack_push(struct stack* s, struct node_base* n) {
|
||||
while(s->count >= s->size) {
|
||||
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
|
||||
assert(s->data != NULL);
|
||||
}
|
||||
s->data[s->count++] = n;
|
||||
}
|
||||
|
||||
struct node_base* stack_pop(struct stack* s) {
|
||||
assert(s->count > 0);
|
||||
return s->data[--s->count];
|
||||
}
|
||||
|
||||
struct node_base* stack_peek(struct stack* s, size_t o) {
|
||||
assert(s->count > o);
|
||||
return s->data[s->count - o - 1];
|
||||
}
|
||||
|
||||
void stack_popn(struct stack* s, size_t n) {
|
||||
assert(s->count >= n);
|
||||
s->count -= n;
|
||||
}
|
||||
|
||||
void gmachine_init(struct gmachine* g) {
|
||||
stack_init(&g->stack);
|
||||
g->gc_nodes = NULL;
|
||||
g->gc_node_count = 0;
|
||||
g->gc_node_threshold = 128;
|
||||
}
|
||||
|
||||
void gmachine_free(struct gmachine* g) {
|
||||
stack_free(&g->stack);
|
||||
struct node_base* to_free = g->gc_nodes;
|
||||
struct node_base* next;
|
||||
|
||||
while(to_free) {
|
||||
next = to_free->gc_next;
|
||||
free_node_direct(to_free);
|
||||
free(to_free);
|
||||
to_free = next;
|
||||
}
|
||||
}
|
||||
|
||||
void gmachine_slide(struct gmachine* g, size_t n) {
|
||||
assert(g->stack.count > n);
|
||||
g->stack.data[g->stack.count - n - 1] = g->stack.data[g->stack.count - 1];
|
||||
g->stack.count -= n;
|
||||
}
|
||||
|
||||
void gmachine_update(struct gmachine* g, size_t o) {
|
||||
assert(g->stack.count > o + 1);
|
||||
struct node_ind* ind =
|
||||
(struct node_ind*) g->stack.data[g->stack.count - o - 2];
|
||||
ind->base.tag = NODE_IND;
|
||||
ind->next = g->stack.data[g->stack.count -= 1];
|
||||
}
|
||||
|
||||
void gmachine_alloc(struct gmachine* g, size_t o) {
|
||||
while(o--) {
|
||||
stack_push(&g->stack,
|
||||
gmachine_track(g, (struct node_base*) alloc_ind(NULL)));
|
||||
}
|
||||
}
|
||||
|
||||
void gmachine_pack(struct gmachine* g, size_t n, int8_t t) {
|
||||
assert(g->stack.count >= n);
|
||||
|
||||
struct node_base** data = malloc(sizeof(*data) * (n + 1));
|
||||
assert(data != NULL);
|
||||
memcpy(data, &g->stack.data[g->stack.count - n], n * sizeof(*data));
|
||||
data[n] = NULL;
|
||||
|
||||
struct node_data* new_node = (struct node_data*) alloc_node();
|
||||
new_node->array = data;
|
||||
new_node->base.tag = NODE_DATA;
|
||||
new_node->tag = t;
|
||||
|
||||
stack_popn(&g->stack, n);
|
||||
stack_push(&g->stack, gmachine_track(g, (struct node_base*) new_node));
|
||||
}
|
||||
|
||||
void gmachine_split(struct gmachine* g, size_t n) {
|
||||
struct node_data* node = (struct node_data*) stack_pop(&g->stack);
|
||||
for(size_t i = 0; i < n; i++) {
|
||||
stack_push(&g->stack, node->array[i]);
|
||||
}
|
||||
}
|
||||
|
||||
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
||||
if(IS_INT(b)) return b;
|
||||
g->gc_node_count++;
|
||||
b->gc_next = g->gc_nodes;
|
||||
g->gc_nodes = b;
|
||||
|
||||
if(g->gc_node_count >= g->gc_node_threshold) {
|
||||
uint64_t nodes_before = g->gc_node_count;
|
||||
gc_visit_node(b);
|
||||
gmachine_gc(g);
|
||||
g->gc_node_threshold = g->gc_node_count * 2;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
void gmachine_gc(struct gmachine* g) {
|
||||
for(size_t i = 0; i < g->stack.count; i++) {
|
||||
gc_visit_node(g->stack.data[i]);
|
||||
}
|
||||
|
||||
struct node_base** head_ptr = &g->gc_nodes;
|
||||
while(*head_ptr) {
|
||||
if((*head_ptr)->gc_reachable) {
|
||||
(*head_ptr)->gc_reachable = 0;
|
||||
head_ptr = &(*head_ptr)->gc_next;
|
||||
} else {
|
||||
struct node_base* to_free = *head_ptr;
|
||||
*head_ptr = to_free->gc_next;
|
||||
free_node_direct(to_free);
|
||||
free(to_free);
|
||||
g->gc_node_count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void unwind(struct gmachine* g) {
|
||||
struct stack* s = &g->stack;
|
||||
|
||||
while(1) {
|
||||
struct node_base* peek = stack_peek(s, 0);
|
||||
if(IS_INT(peek)) {
|
||||
break;
|
||||
} else if(peek->tag == NODE_APP) {
|
||||
struct node_app* n = (struct node_app*) peek;
|
||||
stack_push(s, n->left);
|
||||
} else if(peek->tag == NODE_GLOBAL) {
|
||||
struct node_global* n = (struct node_global*) peek;
|
||||
assert(s->count > n->arity);
|
||||
|
||||
for(size_t i = 1; i <= n->arity; i++) {
|
||||
s->data[s->count - i]
|
||||
= ((struct node_app*) s->data[s->count - i - 1])->right;
|
||||
}
|
||||
|
||||
n->function(g);
|
||||
} else if(peek->tag == NODE_IND) {
|
||||
struct node_ind* n = (struct node_ind*) peek;
|
||||
stack_pop(s);
|
||||
stack_push(s, n->next);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern void f_main(struct gmachine* s);
|
||||
|
||||
void print_node(struct node_base* n) {
|
||||
if(IS_INT(n)) {
|
||||
printf("%d", (int32_t) n);
|
||||
} else if(n->tag == NODE_APP) {
|
||||
struct node_app* app = (struct node_app*) n;
|
||||
print_node(app->left);
|
||||
putchar(' ');
|
||||
print_node(app->right);
|
||||
} else if(n->tag == NODE_DATA) {
|
||||
printf("(Packed)");
|
||||
} else if(n->tag == NODE_GLOBAL) {
|
||||
struct node_global* global = (struct node_global*) n;
|
||||
printf("(Global: %p)", global->function);
|
||||
} else if(n->tag == NODE_IND) {
|
||||
print_node(((struct node_ind*) n)->next);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
struct gmachine gmachine;
|
||||
struct node_global* first_node = alloc_global(f_main, 0);
|
||||
struct node_base* result;
|
||||
|
||||
gmachine_init(&gmachine);
|
||||
gmachine_track(&gmachine, (struct node_base*) first_node);
|
||||
stack_push(&gmachine.stack, (struct node_base*) first_node);
|
||||
unwind(&gmachine);
|
||||
result = stack_pop(&gmachine.stack);
|
||||
printf("Result: ");
|
||||
print_node(result);
|
||||
putchar('\n');
|
||||
gmachine_free(&gmachine);
|
||||
}
|
||||
84
code/compiler/13/runtime.h
Normal file
@@ -0,0 +1,84 @@
|
||||
#pragma once
|
||||
#include <stdlib.h>
|
||||
|
||||
struct gmachine;
|
||||
|
||||
enum node_tag {
|
||||
NODE_APP,
|
||||
NODE_NUM,
|
||||
NODE_GLOBAL,
|
||||
NODE_IND,
|
||||
NODE_DATA
|
||||
};
|
||||
|
||||
struct node_base {
|
||||
enum node_tag tag;
|
||||
int8_t gc_reachable;
|
||||
struct node_base* gc_next;
|
||||
};
|
||||
|
||||
struct node_app {
|
||||
struct node_base base;
|
||||
struct node_base* left;
|
||||
struct node_base* right;
|
||||
};
|
||||
|
||||
struct node_num {
|
||||
struct node_base base;
|
||||
int32_t value;
|
||||
};
|
||||
|
||||
struct node_global {
|
||||
struct node_base base;
|
||||
int32_t arity;
|
||||
void (*function)(struct gmachine*);
|
||||
};
|
||||
|
||||
struct node_ind {
|
||||
struct node_base base;
|
||||
struct node_base* next;
|
||||
};
|
||||
|
||||
struct node_data {
|
||||
struct node_base base;
|
||||
int8_t tag;
|
||||
struct node_base** array;
|
||||
};
|
||||
|
||||
struct node_base* alloc_node();
|
||||
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
|
||||
struct node_num* alloc_num(int32_t n);
|
||||
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a);
|
||||
struct node_ind* alloc_ind(struct node_base* n);
|
||||
void free_node_direct(struct node_base*);
|
||||
void gc_visit_node(struct node_base*);
|
||||
|
||||
struct stack {
|
||||
size_t size;
|
||||
size_t count;
|
||||
struct node_base** data;
|
||||
};
|
||||
|
||||
void stack_init(struct stack* s);
|
||||
void stack_free(struct stack* s);
|
||||
void stack_push(struct stack* s, struct node_base* n);
|
||||
struct node_base* stack_pop(struct stack* s);
|
||||
struct node_base* stack_peek(struct stack* s, size_t o);
|
||||
void stack_popn(struct stack* s, size_t n);
|
||||
|
||||
struct gmachine {
|
||||
struct stack stack;
|
||||
struct node_base* gc_nodes;
|
||||
int64_t gc_node_count;
|
||||
int64_t gc_node_threshold;
|
||||
};
|
||||
|
||||
void gmachine_init(struct gmachine* g);
|
||||
void gmachine_free(struct gmachine* g);
|
||||
void gmachine_slide(struct gmachine* g, size_t n);
|
||||
void gmachine_update(struct gmachine* g, size_t o);
|
||||
void gmachine_alloc(struct gmachine* g, size_t o);
|
||||
void gmachine_pack(struct gmachine* g, size_t n, int8_t t);
|
||||
void gmachine_split(struct gmachine* g, size_t n);
|
||||
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b);
|
||||
void gmachine_gc(struct gmachine* g);
|
||||
58
code/compiler/13/scanner.l
Normal file
@@ -0,0 +1,58 @@
|
||||
%option noyywrap
|
||||
%option reentrant
|
||||
|
||||
%{
|
||||
#include <iostream>
|
||||
#include "ast.hpp"
|
||||
#include "definition.hpp"
|
||||
#include "parse_driver.hpp"
|
||||
#include "parser.hpp"
|
||||
|
||||
#define YY_EXTRA_TYPE parse_driver*
|
||||
#define YY_USER_ACTION drv.location.step(); drv.location.columns(yyleng);
|
||||
#define YY_INPUT(buf,result,max_size) \
|
||||
{ \
|
||||
int c = yyextra->get(); \
|
||||
result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \
|
||||
}
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
\n { drv.location.lines(); }
|
||||
[ ]+ {}
|
||||
\\ { return yy::parser::make_BACKSLASH(drv.location); }
|
||||
\+ { return yy::parser::make_PLUS(drv.location); }
|
||||
\* { return yy::parser::make_TIMES(drv.location); }
|
||||
- { return yy::parser::make_MINUS(drv.location); }
|
||||
\/ { return yy::parser::make_DIVIDE(drv.location); }
|
||||
% { return yy::parser::make_MODULO(drv.location); }
|
||||
== { return yy::parser::make_EQUALS(drv.location); }
|
||||
\<= { return yy::parser::make_LESS_EQUALS(drv.location); }
|
||||
` { return yy::parser::make_BACKTICK(drv.location); }
|
||||
[0-9]+ { return yy::parser::make_INT(atoi(yytext), drv.location); }
|
||||
defn { return yy::parser::make_DEFN(drv.location); }
|
||||
data { return yy::parser::make_DATA(drv.location); }
|
||||
case { return yy::parser::make_CASE(drv.location); }
|
||||
of { return yy::parser::make_OF(drv.location); }
|
||||
let { return yy::parser::make_LET(drv.location); }
|
||||
in { return yy::parser::make_IN(drv.location); }
|
||||
\{ { return yy::parser::make_OCURLY(drv.location); }
|
||||
\} { return yy::parser::make_CCURLY(drv.location); }
|
||||
\( { return yy::parser::make_OPAREN(drv.location); }
|
||||
\) { return yy::parser::make_CPAREN(drv.location); }
|
||||
, { return yy::parser::make_COMMA(drv.location); }
|
||||
-> { return yy::parser::make_ARROW(drv.location); }
|
||||
= { return yy::parser::make_EQUAL(drv.location); }
|
||||
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext), drv.location); }
|
||||
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext), drv.location); }
|
||||
<<EOF>> { return yy::parser::make_YYEOF(drv.location); }
|
||||
|
||||
%%
|
||||
|
||||
void scanner_init(parse_driver* d, yyscan_t* scanner) {
|
||||
yylex_init_extra(d, scanner);
|
||||
}
|
||||
void scanner_destroy(yyscan_t* scanner) {
|
||||
yylex_destroy(*scanner);
|
||||
}
|
||||
23
code/compiler/13/test.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
#include "graph.hpp"
|
||||
|
||||
int main() {
|
||||
function_graph graph;
|
||||
graph.add_edge("f", "g");
|
||||
graph.add_edge("g", "h");
|
||||
graph.add_edge("h", "f");
|
||||
|
||||
graph.add_edge("i", "j");
|
||||
graph.add_edge("j", "i");
|
||||
|
||||
graph.add_edge("j", "f");
|
||||
|
||||
graph.add_edge("x", "f");
|
||||
graph.add_edge("x", "i");
|
||||
|
||||
for(auto& group : graph.compute_order()) {
|
||||
std::cout << "Group: " << std::endl;
|
||||
for(auto& member : group->members) {
|
||||
std::cout << member << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
212
code/compiler/13/type.cpp
Normal file
@@ -0,0 +1,212 @@
|
||||
#include "type.hpp"
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "error.hpp"
|
||||
|
||||
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
if(forall.size() != 0) {
|
||||
to << "forall ";
|
||||
for(auto& var : forall) {
|
||||
to << var << " ";
|
||||
}
|
||||
to << ". ";
|
||||
}
|
||||
monotype->print(mgr, to);
|
||||
}
|
||||
|
||||
type_ptr type_scheme::instantiate(type_mgr& mgr) const {
|
||||
if(forall.size() == 0) return monotype;
|
||||
std::map<std::string, type_ptr> subst;
|
||||
for(auto& var : forall) {
|
||||
subst[var] = mgr.new_type();
|
||||
}
|
||||
return mgr.substitute(subst, monotype);
|
||||
}
|
||||
|
||||
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
auto it = mgr.types.find(name);
|
||||
if(it != mgr.types.end()) {
|
||||
it->second->print(mgr, to);
|
||||
} else {
|
||||
to << name;
|
||||
}
|
||||
}
|
||||
|
||||
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
to << name;
|
||||
}
|
||||
|
||||
void type_internal::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
to << "!" << name;
|
||||
}
|
||||
|
||||
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
type_var* var;
|
||||
bool print_parenths = dynamic_cast<type_arr*>(mgr.resolve(left, var).get()) != nullptr;
|
||||
if(print_parenths) to << "(";
|
||||
left->print(mgr, to);
|
||||
if(print_parenths) to << ")";
|
||||
to << " -> ";
|
||||
right->print(mgr, to);
|
||||
}
|
||||
|
||||
void type_app::print(const type_mgr& mgr, std::ostream& to) const {
|
||||
constructor->print(mgr, to);
|
||||
to << "*";
|
||||
for(auto& arg : arguments) {
|
||||
to << " ";
|
||||
arg->print(mgr, to);
|
||||
}
|
||||
}
|
||||
|
||||
std::string type_mgr::new_type_name() {
|
||||
int temp = last_id++;
|
||||
std::string str = "";
|
||||
|
||||
while(temp != -1) {
|
||||
str += (char) ('a' + (temp % 26));
|
||||
temp = temp / 26 - 1;
|
||||
}
|
||||
|
||||
std::reverse(str.begin(), str.end());
|
||||
return str;
|
||||
}
|
||||
|
||||
type_ptr type_mgr::new_type() {
|
||||
return type_ptr(new type_var(new_type_name()));
|
||||
}
|
||||
|
||||
type_ptr type_mgr::new_arrow_type() {
|
||||
return type_ptr(new type_arr(new_type(), new_type()));
|
||||
}
|
||||
|
||||
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||
type_var* cast;
|
||||
|
||||
var = nullptr;
|
||||
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||
auto it = types.find(cast->name);
|
||||
|
||||
if(it == types.end()) {
|
||||
var = cast;
|
||||
break;
|
||||
}
|
||||
t = it->second;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void type_mgr::unify(type_ptr l, type_ptr r, const std::optional<yy::location>& loc) {
|
||||
type_var *lvar, *rvar;
|
||||
type_arr *larr, *rarr;
|
||||
type_base *lid, *rid;
|
||||
type_app *lapp, *rapp;
|
||||
|
||||
l = resolve(l, lvar);
|
||||
r = resolve(r, rvar);
|
||||
|
||||
if(lvar) {
|
||||
bind(lvar->name, r);
|
||||
return;
|
||||
} else if(rvar) {
|
||||
bind(rvar->name, l);
|
||||
return;
|
||||
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||
unify(larr->left, rarr->left, loc);
|
||||
unify(larr->right, rarr->right, loc);
|
||||
return;
|
||||
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||
if(lid->name == rid->name &&
|
||||
lid->arity == rid->arity &&
|
||||
lid->is_internal() == rid->is_internal())
|
||||
return;
|
||||
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
||||
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
||||
unify(lapp->constructor, rapp->constructor, loc);
|
||||
auto left_it = lapp->arguments.begin();
|
||||
auto right_it = rapp->arguments.begin();
|
||||
while(left_it != lapp->arguments.end() &&
|
||||
right_it != rapp->arguments.end()) {
|
||||
unify(*left_it, *right_it, loc);
|
||||
left_it++, right_it++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
throw unification_error(l, r, loc);
|
||||
}
|
||||
|
||||
type_ptr type_mgr::substitute(const std::map<std::string, type_ptr>& subst, const type_ptr& t) const {
|
||||
type_ptr temp = t;
|
||||
while(type_var* var = dynamic_cast<type_var*>(temp.get())) {
|
||||
auto subst_it = subst.find(var->name);
|
||||
if(subst_it != subst.end()) return subst_it->second;
|
||||
auto var_it = types.find(var->name);
|
||||
if(var_it == types.end()) return t;
|
||||
temp = var_it->second;
|
||||
}
|
||||
|
||||
if(type_arr* arr = dynamic_cast<type_arr*>(temp.get())) {
|
||||
auto left_result = substitute(subst, arr->left);
|
||||
auto right_result = substitute(subst, arr->right);
|
||||
if(left_result == arr->left && right_result == arr->right) return t;
|
||||
return type_ptr(new type_arr(left_result, right_result));
|
||||
} else if(type_app* app = dynamic_cast<type_app*>(temp.get())) {
|
||||
auto constructor_result = substitute(subst, app->constructor);
|
||||
bool arg_changed = false;
|
||||
std::vector<type_ptr> new_args;
|
||||
for(auto& arg : app->arguments) {
|
||||
auto arg_result = substitute(subst, arg);
|
||||
arg_changed |= arg_result != arg;
|
||||
new_args.push_back(std::move(arg_result));
|
||||
}
|
||||
|
||||
if(constructor_result == app->constructor && !arg_changed) return t;
|
||||
type_app* new_app = new type_app(std::move(constructor_result));
|
||||
std::swap(new_app->arguments, new_args);
|
||||
return type_ptr(new_app);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||
|
||||
if(other && other->name == s) return;
|
||||
types[s] = t;
|
||||
}
|
||||
|
||||
void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
|
||||
type_var* var;
|
||||
type_ptr resolved = resolve(t, var);
|
||||
|
||||
if(var) {
|
||||
into.insert(var->name);
|
||||
} else if(type_arr* arr = dynamic_cast<type_arr*>(resolved.get())) {
|
||||
find_free(arr->left, into);
|
||||
find_free(arr->right, into);
|
||||
} else if(type_app* app = dynamic_cast<type_app*>(resolved.get())) {
|
||||
find_free(app->constructor, into);
|
||||
for(auto& arg : app->arguments) find_free(arg, into);
|
||||
}
|
||||
}
|
||||
|
||||
void type_mgr::find_free(const type_scheme_ptr& t, std::set<std::string>& into) const {
|
||||
std::set<std::string> monotype_free;
|
||||
type_mgr limited_mgr;
|
||||
for(auto& binding : types) {
|
||||
auto existing_position = std::find(t->forall.begin(), t->forall.end(), binding.first);
|
||||
if(existing_position != t->forall.end()) continue;
|
||||
limited_mgr.types[binding.first] = binding.second;
|
||||
}
|
||||
limited_mgr.find_free(t->monotype, monotype_free);
|
||||
for(auto& not_free : t->forall) {
|
||||
monotype_free.erase(not_free);
|
||||
}
|
||||
into.insert(monotype_free.begin(), monotype_free.end());
|
||||
}
|
||||
109
code/compiler/13/type.hpp
Normal file
@@ -0,0 +1,109 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <optional>
|
||||
#include "location.hh"
|
||||
|
||||
struct type_mgr;
|
||||
|
||||
struct type {
|
||||
virtual ~type() = default;
|
||||
|
||||
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||
};
|
||||
|
||||
using type_ptr = std::shared_ptr<type>;
|
||||
|
||||
struct type_scheme {
|
||||
std::vector<std::string> forall;
|
||||
type_ptr monotype;
|
||||
|
||||
type_scheme(type_ptr type) : forall(), monotype(std::move(type)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
type_ptr instantiate(type_mgr& mgr) const;
|
||||
};
|
||||
|
||||
using type_scheme_ptr = std::shared_ptr<type_scheme>;
|
||||
|
||||
struct type_var : public type {
|
||||
std::string name;
|
||||
|
||||
type_var(std::string n)
|
||||
: name(std::move(n)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_base : public type {
|
||||
std::string name;
|
||||
int32_t arity;
|
||||
|
||||
type_base(std::string n, int32_t a = 0)
|
||||
: name(std::move(n)), arity(a) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
|
||||
virtual bool is_internal() const { return false; }
|
||||
};
|
||||
|
||||
struct type_internal : public type_base {
|
||||
type_internal(std::string n, int32_t a = 0)
|
||||
: type_base(std::move(n), a) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
|
||||
bool is_internal() const { return true; }
|
||||
};
|
||||
|
||||
struct type_data : public type_base {
|
||||
struct constructor {
|
||||
int tag;
|
||||
};
|
||||
|
||||
std::map<std::string, constructor> constructors;
|
||||
|
||||
type_data(std::string n, int32_t a = 0)
|
||||
: type_base(std::move(n), a) {}
|
||||
};
|
||||
|
||||
struct type_arr : public type {
|
||||
type_ptr left;
|
||||
type_ptr right;
|
||||
|
||||
type_arr(type_ptr l, type_ptr r)
|
||||
: left(std::move(l)), right(std::move(r)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_app : public type {
|
||||
type_ptr constructor;
|
||||
std::vector<type_ptr> arguments;
|
||||
|
||||
type_app(type_ptr c)
|
||||
: constructor(std::move(c)) {}
|
||||
|
||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||
};
|
||||
|
||||
struct type_mgr {
|
||||
int last_id = 0;
|
||||
std::map<std::string, type_ptr> types;
|
||||
|
||||
std::string new_type_name();
|
||||
type_ptr new_type();
|
||||
type_ptr new_arrow_type();
|
||||
|
||||
void unify(type_ptr l, type_ptr r, const std::optional<yy::location>& loc = std::nullopt);
|
||||
type_ptr substitute(
|
||||
const std::map<std::string, type_ptr>& subst,
|
||||
const type_ptr& t) const;
|
||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||
void bind(const std::string& s, type_ptr t);
|
||||
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
||||
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;
|
||||
};
|
||||
92
code/compiler/13/type_env.cpp
Normal file
@@ -0,0 +1,92 @@
|
||||
#include "type_env.hpp"
|
||||
#include "type.hpp"
|
||||
#include "error.hpp"
|
||||
|
||||
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const {
|
||||
if(parent != nullptr) parent->find_free(mgr, into);
|
||||
for(auto& binding : names) {
|
||||
mgr.find_free(binding.second.type, into);
|
||||
}
|
||||
}
|
||||
|
||||
void type_env::find_free_except(const type_mgr& mgr, const group& avoid,
|
||||
std::set<std::string>& into) const {
|
||||
if(parent != nullptr) parent->find_free(mgr, into);
|
||||
for(auto& binding : names) {
|
||||
if(avoid.members.find(binding.first) != avoid.members.end()) continue;
|
||||
mgr.find_free(binding.second.type, into);
|
||||
}
|
||||
}
|
||||
|
||||
type_scheme_ptr type_env::lookup(const std::string& name) const {
|
||||
auto it = names.find(name);
|
||||
if(it != names.end()) return it->second.type;
|
||||
if(parent) return parent->lookup(name);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool type_env::is_global(const std::string& name) const {
|
||||
auto it = names.find(name);
|
||||
if(it != names.end()) return it->second.vis == visibility::global;
|
||||
if(parent) return parent->is_global(name);
|
||||
return false;
|
||||
}
|
||||
|
||||
void type_env::set_mangled_name(const std::string& name, const std::string& mangled) {
|
||||
auto it = names.find(name);
|
||||
if(it != names.end()) {
|
||||
// Local names shouldn't need mangling.
|
||||
assert(it->second.vis == visibility::global);
|
||||
|
||||
it->second.mangled_name = mangled;
|
||||
}
|
||||
}
|
||||
|
||||
const std::string& type_env::get_mangled_name(const std::string& name) const {
|
||||
auto it = names.find(name);
|
||||
if(it != names.end())
|
||||
return (it->second.mangled_name != "") ? it->second.mangled_name : name;
|
||||
if(parent) return parent->get_mangled_name(name);
|
||||
return name;
|
||||
}
|
||||
|
||||
type_ptr type_env::lookup_type(const std::string& name) const {
|
||||
auto it = type_names.find(name);
|
||||
if(it != type_names.end()) return it->second;
|
||||
if(parent) return parent->lookup_type(name);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void type_env::bind(const std::string& name, type_ptr t, visibility v) {
|
||||
type_scheme_ptr new_scheme(new type_scheme(std::move(t)));
|
||||
names[name] = variable_data(std::move(new_scheme), v, "");
|
||||
}
|
||||
|
||||
void type_env::bind(const std::string& name, type_scheme_ptr t, visibility v) {
|
||||
names[name] = variable_data(std::move(t), v, "");
|
||||
}
|
||||
|
||||
void type_env::bind_type(const std::string& type_name, type_ptr t) {
|
||||
if(lookup_type(type_name) != nullptr)
|
||||
throw type_error("redefinition of type");
|
||||
type_names[type_name] = t;
|
||||
}
|
||||
|
||||
void type_env::generalize(const std::string& name, const group& grp, type_mgr& mgr) {
|
||||
auto names_it = names.find(name);
|
||||
assert(names_it != names.end());
|
||||
assert(names_it->second.type->forall.size() == 0);
|
||||
|
||||
std::set<std::string> free_in_type;
|
||||
std::set<std::string> free_in_env;
|
||||
mgr.find_free(names_it->second.type->monotype, free_in_type);
|
||||
find_free_except(mgr, grp, free_in_env);
|
||||
for(auto& free : free_in_type) {
|
||||
if(free_in_env.find(free) != free_in_env.end()) continue;
|
||||
names_it->second.type->forall.push_back(free);
|
||||
}
|
||||
}
|
||||
|
||||
type_env_ptr type_scope(type_env_ptr parent) {
|
||||
return type_env_ptr(new type_env(std::move(parent)));
|
||||
}
|
||||
49
code/compiler/13/type_env.hpp
Normal file
@@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "graph.hpp"
|
||||
#include "type.hpp"
|
||||
|
||||
struct type_env;
|
||||
using type_env_ptr = std::shared_ptr<type_env>;
|
||||
|
||||
enum class visibility { global,local };
|
||||
|
||||
struct type_env {
|
||||
struct variable_data {
|
||||
type_scheme_ptr type;
|
||||
visibility vis;
|
||||
std::string mangled_name;
|
||||
|
||||
variable_data()
|
||||
: variable_data(nullptr, visibility::local, "") {}
|
||||
variable_data(type_scheme_ptr t, visibility v, std::string n)
|
||||
: type(std::move(t)), vis(v), mangled_name(std::move(n)) {}
|
||||
};
|
||||
|
||||
type_env_ptr parent;
|
||||
std::map<std::string, variable_data> names;
|
||||
std::map<std::string, type_ptr> type_names;
|
||||
|
||||
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
||||
type_env() : type_env(nullptr) {}
|
||||
|
||||
void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
|
||||
void find_free_except(const type_mgr& mgr, const group& avoid,
|
||||
std::set<std::string>& into) const;
|
||||
type_scheme_ptr lookup(const std::string& name) const;
|
||||
bool is_global(const std::string& name) const;
|
||||
void set_mangled_name(const std::string& name, const std::string& mangled);
|
||||
const std::string& get_mangled_name(const std::string& name) const;
|
||||
type_ptr lookup_type(const std::string& name) const;
|
||||
void bind(const std::string& name, type_ptr t,
|
||||
visibility v = visibility::local);
|
||||
void bind(const std::string& name, type_scheme_ptr t,
|
||||
visibility v = visibility::local);
|
||||
void bind_type(const std::string& type_name, type_ptr t);
|
||||
void generalize(const std::string& name, const group& grp, type_mgr& mgr);
|
||||
};
|
||||
|
||||
|
||||
type_env_ptr type_scope(type_env_ptr parent);
|
||||
21
code/time-traveling/TakeMax.hs
Normal file
@@ -0,0 +1,21 @@
|
||||
takeUntilMax :: [Int] -> Int -> (Int, [Int])
|
||||
takeUntilMax [] m = (m, [])
|
||||
takeUntilMax [x] _ = (x, [x])
|
||||
takeUntilMax (x:xs) m
|
||||
| x == m = (x, [x])
|
||||
| otherwise =
|
||||
let (m', xs') = takeUntilMax xs m
|
||||
in (max m' x, x:xs')
|
||||
|
||||
doTakeUntilMax :: [Int] -> [Int]
|
||||
doTakeUntilMax l = l'
|
||||
where (m, l') = takeUntilMax l m
|
||||
|
||||
takeUntilMax' :: [Int] -> Int -> (Int, [Int])
|
||||
takeUntilMax' [] m = (m, [])
|
||||
takeUntilMax' [x] _ = (x, [x])
|
||||
takeUntilMax' (x:xs) m
|
||||
| x == m = (maximum (x:xs), [x])
|
||||
| otherwise =
|
||||
let (m', xs') = takeUntilMax' xs m
|
||||
in (max m' x, x:xs')
|
||||
28
code/time-traveling/ValueScore.hs
Normal file
@@ -0,0 +1,28 @@
|
||||
import Data.Map as Map
|
||||
import Data.Maybe
|
||||
import Control.Applicative
|
||||
|
||||
data Element = A | B | C | D
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
addElement :: Element -> Map Element Int -> Map Element Int
|
||||
addElement = alter ((<|> Just 1) . fmap (+1))
|
||||
|
||||
getScore :: Element -> Map Element Int -> Float
|
||||
getScore e m = fromMaybe 1.0 $ ((1.0/) . fromIntegral) <$> Map.lookup e m
|
||||
|
||||
data BinaryTree a = Empty | Node a (BinaryTree a) (BinaryTree a) deriving Show
|
||||
type ElementTree = BinaryTree Element
|
||||
type ScoredElementTree = BinaryTree (Element, Float)
|
||||
|
||||
assignScores :: ElementTree -> Map Element Int -> (Map Element Int, ScoredElementTree)
|
||||
assignScores Empty m = (Map.empty, Empty)
|
||||
assignScores (Node e t1 t2) m = (m', Node (e, getScore e m) t1' t2')
|
||||
where
|
||||
(m1, t1') = assignScores t1 m
|
||||
(m2, t2') = assignScores t2 m
|
||||
m' = addElement e $ unionWith (+) m1 m2
|
||||
|
||||
doAssignScores :: ElementTree -> ScoredElementTree
|
||||
doAssignScores t = t'
|
||||
where (m, t') = assignScores t m
|
||||
99
code/typesafe-interpreter/TypesafeIntrV2.idr
Normal file
@@ -0,0 +1,99 @@
|
||||
data ExprType
|
||||
= IntType
|
||||
| BoolType
|
||||
| StringType
|
||||
|
||||
repr : ExprType -> Type
|
||||
repr IntType = Int
|
||||
repr BoolType = Bool
|
||||
repr StringType = String
|
||||
|
||||
intBoolImpossible : IntType = BoolType -> Void
|
||||
intBoolImpossible Refl impossible
|
||||
|
||||
intStringImpossible : IntType = StringType -> Void
|
||||
intStringImpossible Refl impossible
|
||||
|
||||
boolStringImpossible : BoolType = StringType -> Void
|
||||
boolStringImpossible Refl impossible
|
||||
|
||||
decEq : (a : ExprType) -> (b : ExprType) -> Dec (a = b)
|
||||
decEq IntType IntType = Yes Refl
|
||||
decEq BoolType BoolType = Yes Refl
|
||||
decEq StringType StringType = Yes Refl
|
||||
decEq IntType BoolType = No intBoolImpossible
|
||||
decEq BoolType IntType = No $ intBoolImpossible . sym
|
||||
decEq IntType StringType = No intStringImpossible
|
||||
decEq StringType IntType = No $ intStringImpossible . sym
|
||||
decEq BoolType StringType = No boolStringImpossible
|
||||
decEq StringType BoolType = No $ boolStringImpossible . sym
|
||||
|
||||
data Op
|
||||
= Add
|
||||
| Subtract
|
||||
| Multiply
|
||||
| Divide
|
||||
|
||||
data Expr
|
||||
= IntLit Int
|
||||
| BoolLit Bool
|
||||
| StringLit String
|
||||
| BinOp Op Expr Expr
|
||||
| IfElse Expr Expr Expr
|
||||
|
||||
data SafeExpr : ExprType -> Type where
|
||||
IntLiteral : Int -> SafeExpr IntType
|
||||
BoolLiteral : Bool -> SafeExpr BoolType
|
||||
StringLiteral : String -> SafeExpr StringType
|
||||
BinOperation : (repr a -> repr b -> repr c) -> SafeExpr a -> SafeExpr b -> SafeExpr c
|
||||
IfThenElse : SafeExpr BoolType -> SafeExpr t -> SafeExpr t -> SafeExpr t
|
||||
|
||||
typecheckOp : Op -> (a : ExprType) -> (b : ExprType) -> Either String (c : ExprType ** repr a -> repr b -> repr c)
|
||||
typecheckOp Add IntType IntType = Right (IntType ** (+))
|
||||
typecheckOp Subtract IntType IntType = Right (IntType ** (-))
|
||||
typecheckOp Multiply IntType IntType = Right (IntType ** (*))
|
||||
typecheckOp Divide IntType IntType = Right (IntType ** div)
|
||||
typecheckOp _ _ _ = Left "Invalid binary operator application"
|
||||
|
||||
requireBool : (n : ExprType ** SafeExpr n) -> Either String (SafeExpr BoolType)
|
||||
requireBool (BoolType ** e) = Right e
|
||||
requireBool _ = Left "Not a boolean."
|
||||
|
||||
typecheck : Expr -> Either String (n : ExprType ** SafeExpr n)
|
||||
typecheck (IntLit i) = Right (_ ** IntLiteral i)
|
||||
typecheck (BoolLit b) = Right (_ ** BoolLiteral b)
|
||||
typecheck (StringLit s) = Right (_ ** StringLiteral s)
|
||||
typecheck (BinOp o l r) = do
|
||||
(lt ** le) <- typecheck l
|
||||
(rt ** re) <- typecheck r
|
||||
(ot ** f) <- typecheckOp o lt rt
|
||||
pure (_ ** BinOperation f le re)
|
||||
typecheck (IfElse c t e) =
|
||||
do
|
||||
ce <- typecheck c >>= requireBool
|
||||
(tt ** te) <- typecheck t
|
||||
(et ** ee) <- typecheck e
|
||||
case decEq tt et of
|
||||
Yes p => pure (_ ** IfThenElse ce (replace p te) ee)
|
||||
No _ => Left "Incompatible branch types."
|
||||
|
||||
eval : SafeExpr t -> repr t
|
||||
eval (IntLiteral i) = i
|
||||
eval (BoolLiteral b) = b
|
||||
eval (StringLiteral s) = s
|
||||
eval (BinOperation f l r) = f (eval l) (eval r)
|
||||
eval (IfThenElse c t e) = if (eval c) then (eval t) else (eval e)
|
||||
|
||||
resultStr : {t : ExprType} -> repr t -> String
|
||||
resultStr {t=IntType} i = show i
|
||||
resultStr {t=BoolType} b = show b
|
||||
resultStr {t=StringType} s = show s
|
||||
|
||||
tryEval : Expr -> String
|
||||
tryEval ex =
|
||||
case typecheck ex of
|
||||
Left err => "Type error: " ++ err
|
||||
Right (t ** e) => resultStr $ eval {t} e
|
||||
|
||||
main : IO ()
|
||||
main = putStrLn $ tryEval $ BinOp Add (IfElse (BoolLit True) (IntLit 6) (IntLit 7)) (BinOp Multiply (IntLit 160) (IntLit 2))
|
||||
120
code/typesafe-interpreter/TypesafeIntrV3.idr
Normal file
@@ -0,0 +1,120 @@
|
||||
data ExprType
|
||||
= IntType
|
||||
| BoolType
|
||||
| StringType
|
||||
| PairType ExprType ExprType
|
||||
|
||||
repr : ExprType -> Type
|
||||
repr IntType = Int
|
||||
repr BoolType = Bool
|
||||
repr StringType = String
|
||||
repr (PairType t1 t2) = Pair (repr t1) (repr t2)
|
||||
|
||||
decEq : (a : ExprType) -> (b : ExprType) -> Maybe (a = b)
|
||||
decEq IntType IntType = Just Refl
|
||||
decEq BoolType BoolType = Just Refl
|
||||
decEq StringType StringType = Just Refl
|
||||
decEq (PairType lt1 lt2) (PairType rt1 rt2) = do
|
||||
subEq1 <- decEq lt1 rt1
|
||||
subEq2 <- decEq lt2 rt2
|
||||
let firstEqual = replace {P = \t1 => PairType lt1 lt2 = PairType t1 lt2} subEq1 Refl
|
||||
let secondEqual = replace {P = \t2 => PairType lt1 lt2 = PairType rt1 t2} subEq2 firstEqual
|
||||
pure secondEqual
|
||||
decEq _ _ = Nothing
|
||||
|
||||
data Op
|
||||
= Add
|
||||
| Subtract
|
||||
| Multiply
|
||||
| Divide
|
||||
|
||||
data Expr
|
||||
= IntLit Int
|
||||
| BoolLit Bool
|
||||
| StringLit String
|
||||
| BinOp Op Expr Expr
|
||||
| IfElse Expr Expr Expr
|
||||
| Pair Expr Expr
|
||||
| Fst Expr
|
||||
| Snd Expr
|
||||
|
||||
data SafeExpr : ExprType -> Type where
|
||||
IntLiteral : Int -> SafeExpr IntType
|
||||
BoolLiteral : Bool -> SafeExpr BoolType
|
||||
StringLiteral : String -> SafeExpr StringType
|
||||
BinOperation : (repr a -> repr b -> repr c) -> SafeExpr a -> SafeExpr b -> SafeExpr c
|
||||
IfThenElse : SafeExpr BoolType -> SafeExpr t -> SafeExpr t -> SafeExpr t
|
||||
NewPair : SafeExpr t1 -> SafeExpr t2 -> SafeExpr (PairType t1 t2)
|
||||
First : SafeExpr (PairType t1 t2) -> SafeExpr t1
|
||||
Second : SafeExpr (PairType t1 t2) -> SafeExpr t2
|
||||
|
||||
typecheckOp : Op -> (a : ExprType) -> (b : ExprType) -> Either String (c : ExprType ** repr a -> repr b -> repr c)
|
||||
typecheckOp Add IntType IntType = Right (IntType ** (+))
|
||||
typecheckOp Subtract IntType IntType = Right (IntType ** (-))
|
||||
typecheckOp Multiply IntType IntType = Right (IntType ** (*))
|
||||
typecheckOp Divide IntType IntType = Right (IntType ** div)
|
||||
typecheckOp _ _ _ = Left "Invalid binary operator application"
|
||||
|
||||
requireBool : (n : ExprType ** SafeExpr n) -> Either String (SafeExpr BoolType)
|
||||
requireBool (BoolType ** e) = Right e
|
||||
requireBool _ = Left "Not a boolean."
|
||||
|
||||
typecheck : Expr -> Either String (n : ExprType ** SafeExpr n)
|
||||
typecheck (IntLit i) = Right (_ ** IntLiteral i)
|
||||
typecheck (BoolLit b) = Right (_ ** BoolLiteral b)
|
||||
typecheck (StringLit s) = Right (_ ** StringLiteral s)
|
||||
typecheck (BinOp o l r) = do
|
||||
(lt ** le) <- typecheck l
|
||||
(rt ** re) <- typecheck r
|
||||
(ot ** f) <- typecheckOp o lt rt
|
||||
pure (_ ** BinOperation f le re)
|
||||
typecheck (IfElse c t e) =
|
||||
do
|
||||
ce <- typecheck c >>= requireBool
|
||||
(tt ** te) <- typecheck t
|
||||
(et ** ee) <- typecheck e
|
||||
case decEq tt et of
|
||||
Just p => pure (_ ** IfThenElse ce (replace p te) ee)
|
||||
Nothing => Left "Incompatible branch types."
|
||||
typecheck (Pair l r) =
|
||||
do
|
||||
(lt ** le) <- typecheck l
|
||||
(rt ** re) <- typecheck r
|
||||
pure (_ ** NewPair le re)
|
||||
typecheck (Fst p) =
|
||||
do
|
||||
(pt ** pe) <- typecheck p
|
||||
case pt of
|
||||
PairType _ _ => pure $ (_ ** First pe)
|
||||
_ => Left "Applying fst to non-pair."
|
||||
typecheck (Snd p) =
|
||||
do
|
||||
(pt ** pe) <- typecheck p
|
||||
case pt of
|
||||
PairType _ _ => pure $ (_ ** Second pe)
|
||||
_ => Left "Applying snd to non-pair."
|
||||
|
||||
eval : SafeExpr t -> repr t
|
||||
eval (IntLiteral i) = i
|
||||
eval (BoolLiteral b) = b
|
||||
eval (StringLiteral s) = s
|
||||
eval (BinOperation f l r) = f (eval l) (eval r)
|
||||
eval (IfThenElse c t e) = if (eval c) then (eval t) else (eval e)
|
||||
eval (NewPair l r) = (eval l, eval r)
|
||||
eval (First p) = fst (eval p)
|
||||
eval (Second p) = snd (eval p)
|
||||
|
||||
resultStr : {t : ExprType} -> repr t -> String
|
||||
resultStr {t=IntType} i = show i
|
||||
resultStr {t=BoolType} b = show b
|
||||
resultStr {t=StringType} s = show s
|
||||
resultStr {t=PairType t1 t2} (l,r) = "(" ++ resultStr l ++ ", " ++ resultStr r ++ ")"
|
||||
|
||||
tryEval : Expr -> String
|
||||
tryEval ex =
|
||||
case typecheck ex of
|
||||
Left err => "Type error: " ++ err
|
||||
Right (t ** e) => resultStr $ eval {t} e
|
||||
|
||||
main : IO ()
|
||||
main = putStrLn $ tryEval $ BinOp Add (Fst (IfElse (BoolLit True) (Pair (IntLit 6) (BoolLit True)) (Pair (IntLit 7) (BoolLit False)))) (BinOp Multiply (IntLit 160) (IntLit 2))
|
||||
@@ -3,5 +3,11 @@ languageCode = "en-us"
|
||||
title = "Daniel's Blog"
|
||||
theme = "vanilla"
|
||||
pygmentsCodeFences = true
|
||||
pygmentsStyle = "github"
|
||||
pygmentsUseClasses = true
|
||||
summaryLength = 20
|
||||
|
||||
[markup]
|
||||
[markup.tableOfContents]
|
||||
endLevel = 4
|
||||
ordered = false
|
||||
startLevel = 3
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
---
|
||||
title: About
|
||||
---
|
||||
I'm Daniel, a Computer Science student currently in my third (and final) undergraduate year at Oregon State University.
|
||||
Due my initial interest in calculators and compilers, I got involved in the Programming Language Theory research
|
||||
I'm Daniel, a Computer Science student currently working towards my Master's Degree at Oregon State University.
|
||||
Due to my initial interest in calculators and compilers, I got involved in the Programming Language Theory research
|
||||
group, gaining same experience in formal verification, domain specific language, and explainable computing.
|
||||
|
||||
For work, school, and hobby projects, I use a variety of programming languages, most commonly C/C++,
|
||||
|
||||
304
content/blog/backend_math_rendering.md
Normal file
@@ -0,0 +1,304 @@
|
||||
---
|
||||
title: Rendering Mathematics On The Back End
|
||||
date: 2020-07-21T14:54:26-07:00
|
||||
tags: ["Website", "Nix", "Ruby", "KaTeX"]
|
||||
---
|
||||
|
||||
Due to something of a streak of bad luck when it came to computers, I spent a
|
||||
significant amount of time using a Linux-based Chromebook, and then a
|
||||
Pinebook Pro. It was, in some way, enlightening. The things that I used to take
|
||||
for granted with a 'powerful' machine now became a rare luxury: StackOverflow,
|
||||
and other relatively static websites, took upwards of ten seconds to finish
|
||||
loading. On Slack, each of my keypresses could take longer than 500ms to
|
||||
appear on the screen, and sometimes, it would take several seconds. Some
|
||||
websites would present me with a white screen, and remain that way for much
|
||||
longer than I had time to wait. It was awful.
|
||||
|
||||
At one point, I installed uMatrix, and made it the default policy to block
|
||||
all JavaScript. For the most part, this worked well. Of course, I had to
|
||||
enable JavaScript for applications that needed to be interactive, like
|
||||
Slack, and Discord. But for the most part, I was able to browse the majority
|
||||
of the websites I normally browse. This went on until I started working
|
||||
on the [compiler series]({{< relref "00_compiler_intro.md" >}}) again,
|
||||
and discovered that the LaTeX math on my page, which was required
|
||||
for displaying things like inference rules, didn't work without
|
||||
JavaScript. I was left with two options:
|
||||
|
||||
* Allow JavaScript, and continue using MathJax to render my math.
|
||||
* Make it so that the mathematics are rendered on the back end.
|
||||
|
||||
I've [previously written about math rendering]({{< relref "math_rendering_is_wrong.md" >}}),
|
||||
and made the observation that MathJax's output for LaTeX is __identical__
|
||||
on every computer. From the MathJax 2.6 change log:
|
||||
|
||||
> _Improved CommonHTML output_. The CommonHTML output now provides the same layout quality and MathML support as the HTML-CSS and SVG output. It is on average 40% faster than the other outputs and the markup it produces are identical on all browsers and thus can also be pre-generated on the server via MathJax-node.
|
||||
|
||||
It seems absurd, then, to offload this kind of work into the users, to
|
||||
be done over and over again. As should be clear from the title of
|
||||
this post, this made me settle for the second option: it was
|
||||
__obviously within reach__, especially for a statically-generated website
|
||||
like mine, to render math on the backend.
|
||||
|
||||
I settled on the following architecture:
|
||||
|
||||
* As before, I would generate my pages using Hugo.
|
||||
* I would use the KaTeX NPM package to render math.
|
||||
* To build the website no matter what system I was on, I would use Nix.
|
||||
|
||||
It so happens that Nix isn't really required for using my approach in general.
|
||||
I will give my setup here, but feel free to skip ahead.
|
||||
|
||||
### Setting Up A Nix Build
|
||||
My `default.nix` file looks like this:
|
||||
|
||||
```Nix {linenos=table}
|
||||
{ stdenv, hugo, fetchgit, pkgs, nodejs, ruby }:
|
||||
|
||||
let
|
||||
url = "https://dev.danilafe.com/Web-Projects/blog-static.git";
|
||||
rev = "<commit>";
|
||||
sha256 = "<hash>";
|
||||
requiredPackages = import ./required-packages.nix {
|
||||
inherit pkgs nodejs;
|
||||
};
|
||||
in
|
||||
stdenv.mkDerivation {
|
||||
name = "blog-static";
|
||||
version = rev;
|
||||
src = fetchgit {
|
||||
inherit url rev sha256;
|
||||
};
|
||||
builder = ./builder.sh;
|
||||
converter = ./convert.rb;
|
||||
buildInputs = [
|
||||
hugo
|
||||
requiredPackages.katex
|
||||
(ruby.withPackages (ps: [ ps.nokogiri ]))
|
||||
];
|
||||
}
|
||||
```
|
||||
|
||||
I'm using `node2nix` to generate the `required-packages.nix` file, which allows me,
|
||||
even from a sandboxed Nix build, to download and install `npm` packages. This is needed
|
||||
so that I have access to the `katex` binary at build time. I fed the following JSON file
|
||||
to `node2nix`:
|
||||
|
||||
```JSON {linenos=table}
|
||||
[
|
||||
"katex"
|
||||
]
|
||||
```
|
||||
|
||||
The Ruby script I wrote for this (more on that soon) required the `nokogiri` gem, which
|
||||
I used for traversing the HTML generated for my site. Hugo was obviously required to
|
||||
generate the HTML.
|
||||
|
||||
### Converting LaTeX To HTML
|
||||
After my first post complaining about the state of mathematics on the web, I received
|
||||
the following email (which the author allowed me to share):
|
||||
|
||||
> Sorry for having a random stranger email you, but in your blog post
|
||||
[(link)](https://danilafe.com/blog/math_rendering_is_wrong) you seem to focus on MathJax's
|
||||
difficulty in rendering things server-side, while quietly ignoring that KaTeX's front
|
||||
page advertises server-side rendering. Their documentation [(link)](https://katex.org/docs/options.html)
|
||||
even shows (at least as of the time this email was sent) that it renders both HTML
|
||||
(to be arranged nicely with their CSS) for visuals and MathML for accessibility.
|
||||
|
||||
The author of the email then kindly provided a link to a page they generated using KaTeX and
|
||||
some Bash scripts. The math on this page was rendered at the time it was generated.
|
||||
|
||||
This is a great point, and KaTeX is indeed usable for server-side rendering. But I've
|
||||
seen few people who do actually use it. Unfortunately, as I pointed out in my previous post on the subject,
|
||||
few tools actually take your HTML page and replace LaTeX with rendered math.
|
||||
Here's what I wrote about this last time:
|
||||
|
||||
> [In MathJax,] The bigger issue, though, was that the `page2html`
|
||||
program, which rendered all the mathematics in a single HTML page,
|
||||
was gone. I found `tex2html` and `text2htmlcss`, which could only
|
||||
render equations without the surrounding HTML. I also found `mjpage`,
|
||||
which replaced mathematical expressions in a page with their SVG forms.
|
||||
|
||||
This is still the case, in both MathJax and KaTeX. The ability
|
||||
to render math in one step is the main selling point of front-end LaTeX renderers:
|
||||
all you have to do is drop in a file from a CDN, and voila, you have your
|
||||
math. There are no such easy answers for back-end rendering. In fact,
|
||||
as we will soon see, it's not possible to just search-and-replace occurences
|
||||
of mathematics on your page, either. To actually get KaTeX working
|
||||
on the backend, you need access to tools that handle the potential variety
|
||||
of edge cases associated with HTML. Such tools, to my knowledge, do not
|
||||
currently exist.
|
||||
|
||||
I decided to write my own Ruby script to get the job done. From this script, I
|
||||
would call the `katex` command-line program, which would perform
|
||||
the heavy lifting of rendering the mathematics.
|
||||
|
||||
There are two types of math on my website: inline math and display math.
|
||||
On the command line ([here are the docs](https://katex.org/docs/cli.html)),
|
||||
the distinction is made using the `--display-mode` argument. So, the general algorithm
|
||||
is to replace the code inside the `$$...$$` with their display-rendered version,
|
||||
and the code inside the `\(...\)` with the inline-rendered version. I came up with
|
||||
the following Ruby function:
|
||||
|
||||
```Ruby {linenos=table}
|
||||
def render_cached(cache, command, string, render_comment = nil)
|
||||
cache.fetch(string) do |new|
|
||||
puts " Rendering #{render_comment || new}"
|
||||
cache[string] = Open3.popen3(command) do |i, o, e, t|
|
||||
i.write new
|
||||
i.close
|
||||
o.read.force_encoding(Encoding::UTF_8).strip
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
Here, the `cache` argument is used to prevent re-running the `katex` command
|
||||
on an equation that was already rendered before (the output is the same, after all).
|
||||
The `command` is the specific shell command that we want to invoke; this would
|
||||
be either `katex` or `katex -d`. The `string` is the math equation to render,
|
||||
and the `render_comment` is the string to print to the console instead of the equation
|
||||
(so that long, display math equations are not printed out to standard out).
|
||||
|
||||
Then, given a substring of the HTML file, we use regular expressions
|
||||
to find the `\(...\)` and `$$...$$`s, and use the `render_cached` method
|
||||
on the LaTeX code inside.
|
||||
|
||||
```Ruby {linenos=table}
|
||||
def perform_katex_sub(inline_cache, display_cache, content)
|
||||
rendered = content.gsub /\\\(((?:[^\\]|\\[^\)])*)\\\)/ do |match|
|
||||
render_cached(inline_cache, "katex", $~[1])
|
||||
end
|
||||
rendered = rendered.gsub /\$\$((?:[^\$]|$[^\$])*)\$\$/ do |match|
|
||||
render_cached(display_cache, "katex -d", $~[1], "display")
|
||||
end
|
||||
return rendered
|
||||
end
|
||||
```
|
||||
|
||||
There's a bit of a trick to the final layer of this script. We want to be
|
||||
really careful about where we replace LaTeX, and where we don't. In
|
||||
particular, we _don't_ want to go into the `code` tags. Otherwise,
|
||||
it wouldn't be possible to talk about LaTeX code! I also suspect that
|
||||
some captions, alt texts, and similar elements should also be left alone.
|
||||
However, I don't have those on my website (yet), and I won't worry about
|
||||
them now. Either way, because of the code tags,
|
||||
we can't just search-and-replace over the entire page; we need to be context
|
||||
aware. This is where `nokogiri` comes in. We parse the HTML, and iterate
|
||||
over all of the 'text' nodes, calling `perform_katex_sub` on all
|
||||
of those that _aren't_ inside code tags.
|
||||
|
||||
Fortunately, this kind of iteration is pretty easy to specify thanks to something called XPath.
|
||||
This was my first time encountering it, but it seems extremely useful: it's
|
||||
a sort of language for selecting XML nodes. First, you provide an 'axis',
|
||||
which is used to specify the positions of the nodes you want to look at
|
||||
relative to the root node. The axis `/` looks at the immediate children
|
||||
(this would be the `html` tag in a properly formatted document, I would imagine).
|
||||
The axis `//` looks at all the transitive children. That is, it will look at the
|
||||
children of the root, then its children, and so on. There's also the `self` axis,
|
||||
which looks at the node itself.
|
||||
|
||||
After you provide an axis, you need to specify the type of node that you want to
|
||||
select. We can write `code`, for instance, to pick only the `<code>....</code>` tags
|
||||
from the axis we've chosen. We can also use `*` to select any node, and we can
|
||||
use `text()` to select text nodes, such as the `Hello` inside of `<b>Hello</b>`.
|
||||
|
||||
We can also apply some more conditions to the nodes we pick using `[]`.
|
||||
For us, the relevant feature here is `not(...)`, which allows us to
|
||||
select nodes that do __not__ match a particular condition. This is all
|
||||
we need to know.
|
||||
|
||||
We write:
|
||||
|
||||
* `//`, starting to search for nodes everywhere, not just the root of the document.
|
||||
* `*`, to match _any_ node. We want to replace math inside of `div`s, `span`s, `nav`s,
|
||||
all of the `h`s, and so on.
|
||||
* `[not(self::code)]`, cutting out all the `code` tags.
|
||||
* `/`, now selecting the nodes that are immediate descendants of the nodes we've selected.
|
||||
* `text()`, giving us the text contents of all the nodes we've selected.
|
||||
|
||||
All in all:
|
||||
|
||||
```
|
||||
//*[not(self::code)]/text()
|
||||
```
|
||||
|
||||
Finally, we use this XPath from `nokogiri`:
|
||||
|
||||
```Ruby {linenos=table}
|
||||
files = ARGV[0..-1]
|
||||
inline_cache, display_cache = {}, {}
|
||||
|
||||
files.each do |file|
|
||||
puts "Rendering file: #{file}"
|
||||
document = Nokogiri::HTML.parse(File.open(file))
|
||||
document.search('//*[not(self::code)]/text()').each do |t|
|
||||
t.replace(perform_katex_sub(inline_cache, display_cache, t.content))
|
||||
end
|
||||
File.write(file, document.to_html)
|
||||
end
|
||||
```
|
||||
|
||||
I named this script `convert.rb`; it's used from inside of the Nix expression
|
||||
and its builder, which we will cover below.
|
||||
|
||||
### Tying it All Together
|
||||
Finally, I wanted an end-to-end script to generate HTML pages and render the LaTeX in them.
|
||||
I used Nix for this, but the below script will largely be compatible with a non-Nix system.
|
||||
I came up with the following, commenting on Nix-specific commands:
|
||||
|
||||
```Bash {linenos=table}
|
||||
# Nix-specific; set up paths.
|
||||
source $stdenv/setup
|
||||
|
||||
# Build site with Hugo
|
||||
# The cp is Nix-specific; it copies the blog source into the current directory.
|
||||
cp -r $src/* .
|
||||
hugo --baseUrl="https://danilafe.com"
|
||||
|
||||
# Render math in HTML and XML files.
|
||||
# $converter is Nix-specific; you can just use convert.rb.
|
||||
find public/ -regex "public/.*\.html" | xargs ruby $converter
|
||||
|
||||
# Output result
|
||||
# $out is Nix-specific; you can replace it with your destination folder.
|
||||
mkdir $out
|
||||
cp -r public/* $out/
|
||||
```
|
||||
|
||||
This is it! Using the two scripts, `convert.rb` and `builder.sh`, I
|
||||
was able to generate my blog with the math rendered on the back-end.
|
||||
Please note, though, that I had to add the KaTeX CSS to my website's
|
||||
`<head>`.
|
||||
|
||||
### Caveats
|
||||
The main caveat of my approach is performance. For every piece of
|
||||
mathematics that I render, I invoke the `katex` command. This incurs
|
||||
the penalty of Node's startup time, every time, and makes my approach
|
||||
take a few dozen seconds to run on my relatively small site. The
|
||||
better approach would be to use a NodeJS script, rather than a Ruby one,
|
||||
to perform the conversion. KaTeX also provides an API, so such a NodeJS
|
||||
script can find the files, parse the HTML, and perform the substitutions.
|
||||
I did quite like using `nokogiri` here, though, and I hope that an equivalently
|
||||
pleasant solution exists in JavaScript.
|
||||
|
||||
Re-rendering the whole website is also pretty wasteful. I rarely change the
|
||||
mathematics on more than one page at a time, but every time I do so, I have
|
||||
to re-run the script, and therefore re-render every page. This makes sense
|
||||
for me, since I use Nix, and my builds are pretty much always performed
|
||||
from scratch. On the other hand, for others, this may not be the best solution.
|
||||
|
||||
### Alternatives
|
||||
The same person who sent me the original email above also pointed out
|
||||
[this `pandoc` filter for KaTeX](https://github.com/Zaharid/pandoc_static_katex).
|
||||
I do not use Pandoc, but from what I can see, this fitler relies on
|
||||
Pandoc's `Math` AST nodes, and applies KaTeX to each of those. This
|
||||
should work, but wasn't applicable in my case, since Hugo's shrotcodes
|
||||
don't mix well with Pandoc. However, it certainly seems like a workable
|
||||
solution.
|
||||
|
||||
### Conclusion
|
||||
With the removal of MathJax from my site, it is now completely JavaScript free,
|
||||
and contains virtually the same HTML that it did beforehand. This, I hope,
|
||||
makes it work better on devices where computational power is more limited.
|
||||
I also hope that it illustrates a general principle - it's very possible,
|
||||
and plausible, to render LaTeX on the back-end for a static site.
|
||||
311
content/blog/boolean_values.md
Normal file
@@ -0,0 +1,311 @@
|
||||
---
|
||||
title: "How Many Values Does a Boolean Have?"
|
||||
date: 2020-08-21T23:05:55-07:00
|
||||
tags: ["Java", "Haskell", "C and C++"]
|
||||
---
|
||||
|
||||
A friend of mine recently had an interview for a software
|
||||
engineering position. They later recounted to me the content
|
||||
of the technical questions that they had been asked. Some had
|
||||
been pretty standard:
|
||||
|
||||
* __"What's the difference between concurrency
|
||||
and parallelism?"__ -- a reasonable question given that Go was
|
||||
the company's language of choice.
|
||||
* __"What's the difference between a method and a function?"__ --
|
||||
a little more strange, in my opinion, since the difference
|
||||
is of little _practical_ use.
|
||||
|
||||
But then, they recounted a rather interesting question:
|
||||
|
||||
> How many values does a bool have?
|
||||
|
||||
Innocuous at first, isn't it? Probably a bit simpler, in fact,
|
||||
than the questions about methods and functions, concurrency
|
||||
and parallelism. It's plausible that a candidate
|
||||
has not done much concurrent or parallel programming in their
|
||||
life, or that they came from a language in which functions
|
||||
were rare and methods were ubiquitous. It's not plausible,
|
||||
on the other hand, that a candidate applying to a software
|
||||
engineering position has not encountered booleans.
|
||||
|
||||
If you're genuinely unsure about the answer to the question,
|
||||
I think there's no reason for me to mess with you. The
|
||||
simple answer to the question -- as far as I know -- is that a boolean
|
||||
has two values. They are `true` and `false` in Java, or `True` and `False`
|
||||
in Haskell, and `1` and `0` in C. A boolean value is either true or false.
|
||||
|
||||
So, what's there to think about? There are a few things, _ackshually_.
|
||||
Let's explore them, starting from the theoretical perspective.
|
||||
|
||||
### Types, Values, and Expressions
|
||||
Boolean, or `bool`, is a type. Broadly speaking, a type
|
||||
is a property of _something_ that defines what the _something_
|
||||
means and what you can do with it. That _something_ can be
|
||||
several things; for our purposes, it can either be an
|
||||
_expression_ in a programming language (like those in the form `fact(n)`)
|
||||
or a value in that same programming language (like `5`).
|
||||
|
||||
Dealing with values is rather simple. Most languages have finite numbers,
|
||||
usually with \\(2^{32}\\) values, which have type `int`,
|
||||
`i32`, or something in a similar vein. Most languages also have
|
||||
strings, of which there are as many as you have memory to contain,
|
||||
and which have the type `string`, `String`, or occasionally
|
||||
the more confusing `char*`. Most languages also have booleans,
|
||||
as we discussed above.
|
||||
|
||||
The deal with expressions is a more interesting. Presumably
|
||||
expressions evaluate to values, and the type of an expression
|
||||
is then the type of values it can yield. Consider the following
|
||||
snippet in C++:
|
||||
|
||||
```C
|
||||
int square(int x) {
|
||||
return x * x;
|
||||
}
|
||||
```
|
||||
|
||||
Here, the expression `x` is known to have type `int` from
|
||||
the type signature provided by the user. Multiplication
|
||||
of integers yields an integer, and so the type of `x*x` is also
|
||||
of type `int`. Since `square(x)` returns `x*x`, it is also
|
||||
of type `int`. So far, so good.
|
||||
|
||||
Okay, how about this:
|
||||
|
||||
```C++
|
||||
int meaningOfLife() {
|
||||
return meaningOfLife();
|
||||
}
|
||||
```
|
||||
|
||||
No, wait, doesn't say "stack overflow" just yet. That's no fun.
|
||||
And anyway, this is technically a tail call, so maybe our
|
||||
C++ compiler can avoid growing the stack. And indeed,
|
||||
flicking on the `-O2` flag in this [compiler explorer example](https://godbolt.org/z/9cv4nY),
|
||||
we can see that no stack growth is necessary: it's just
|
||||
an infinite loop. But `meaningOfLife` will never return a value. One could say
|
||||
this computation _diverges_.
|
||||
|
||||
Well, if it diverges, just throw the expression out of the window! That's
|
||||
no `int`! We only want _real_ `int`s!
|
||||
|
||||
And here, we can do that. But what about the following:
|
||||
|
||||
```C++
|
||||
inf_int collatz(inf_int x) {
|
||||
if(x == 1) return 1;
|
||||
if(x % 2 == 0) return collatz(x/2);
|
||||
return collatz(x * 3 + 1);
|
||||
}
|
||||
```
|
||||
|
||||
Notice that I've used the fictitious type
|
||||
`inf_int` to represent integers that can hold
|
||||
arbitrarily large integer values, not just the 32-bit ones.
|
||||
That is important for this example, and I'll explain why shortly.
|
||||
|
||||
The code in the example is a simulation of the process described
|
||||
in the [Collatz conjecture](https://en.wikipedia.org/wiki/Collatz_conjecture).
|
||||
Given an input number `x`, if the number is even, it's divided in half,
|
||||
and the process continues with the halved number. If, on the other
|
||||
hand, the number is odd, it's multiplied by 3, 1 is added to it,
|
||||
and the process continues with _that_ number. The only way for the
|
||||
process to terminate is for the computation to reach the value 1.
|
||||
|
||||
Why does this matter? Because as of right now, __nobody knows__
|
||||
whether or not the process terminates for all possible input numbers.
|
||||
We have a strong hunch that it does; we've checked a __lot__
|
||||
of numbers and found that the process terminates for them.
|
||||
This is why 32-bit integers are not truly sufficient for this example;
|
||||
we know empirically that the function will terminate for them.
|
||||
|
||||
But why does _this_ matter? Well, it matters because we don't know
|
||||
whether or not this function will diverge, and thus, we can't
|
||||
'throw it out of the window' like we wanted to with `meaningOfLife`!
|
||||
In general, it's _impossible to tell_ whether or not a program will
|
||||
terminate; that is the [halting problem](https://en.wikipedia.org/wiki/Halting_problem).
|
||||
So, what do we do?
|
||||
|
||||
It turns out to be convenient -- formally -- to treat the result of a diverging computation
|
||||
as its own value. This value is usually called 'bottom', and written as \\(\\bot\\).
|
||||
Since in most programming languages, you can write a nonterminating expression or
|
||||
function of any type, this 'bottom' is included in _all_ types. So in fact, the
|
||||
possible values of `unsigned int` are \\(\\bot, 0, 1, 2, ...\\) and so on.
|
||||
As you may have by now guessed, the same is true for a boolean: we have \\(\\bot\\), `true`, and `false`.
|
||||
|
||||
### Haskell and Bottom
|
||||
You may be thinking:
|
||||
|
||||
> Now he's done it; he's gone off the deep end with all that programming language
|
||||
theory. Tell me, Daniel, where the heck have you ever encountered \\(\\bot\\) in
|
||||
code? This question was for a software engineering interview, after all!
|
||||
|
||||
You're right; I haven't _specifically_ seen the symbol \\(\\bot\\) in my time
|
||||
programming. But I have frequently used an equivalent notation for the same idea:
|
||||
`undefined`. In fact, here's a possible definition of `undefined` in Haskell:
|
||||
|
||||
```
|
||||
undefined = undefined
|
||||
```
|
||||
|
||||
Just like `meaningOfLife`, this is a divergent computation! What's more is that
|
||||
the type of this computation is, in Haskell, `a`. More explicitly -- and retreating
|
||||
to more mathematical notation -- we can write this type as: \\(\\forall \\alpha . \\alpha\\).
|
||||
That is, for any type \\(\\alpha\\), `undefined` has that type! This means
|
||||
`undefined` can take on _any_ type, and so, we can write:
|
||||
|
||||
```Haskell
|
||||
myTrue :: Bool
|
||||
myTrue = True
|
||||
|
||||
myFalse :: Bool
|
||||
myFalse = False
|
||||
|
||||
myBool :: Bool
|
||||
myBool = undefined
|
||||
```
|
||||
|
||||
In Haskell, this is quite useful. For instance, if one's in the middle
|
||||
of writing a complicated function, and wants to check their work so far,
|
||||
they can put 'undefined' for the part of the function they haven't written.
|
||||
They can then compile their program; the typechecker will find any mistakes
|
||||
they've made so far, but, since the type of `undefined` can be _anything_,
|
||||
that part of the program will be accepted without second thought.
|
||||
|
||||
The language Idris extends this practice with the idea of typed holes,
|
||||
where you can leave fragments of your program unwritten, and ask the
|
||||
compiler what kind of _thing_ you need to write to fill that hole.
|
||||
|
||||
### Java and `null`
|
||||
Now you may be thinking:
|
||||
|
||||
> This whole deal with Haskell's `undefined` is beside the point; it doesn't
|
||||
really count as a value, since it's just a nonterminating
|
||||
expression. What you're doing is a kind of academic autofellatio.
|
||||
|
||||
Alright, I can accept this criticism. Perhaps just calling a nonterminating
|
||||
function a value _is_ far-fetched (even though in [denotational semantics](https://en.wikipedia.org/wiki/Denotational_semantics)
|
||||
we _do_ extend types with \\(\\bot\\)). But denotational semantics are not
|
||||
the only place where types are implicitly extend with an extra value;
|
||||
let's look at Java.
|
||||
|
||||
In Java, we have `null`. At the
|
||||
core language level, any function or method that accepts a class can also take `null`;
|
||||
if `null` is not to that function or method's liking, it has to
|
||||
explicitly check for it using `if(x == null)`.
|
||||
|
||||
This `null` value does not at first interact with booleans.
|
||||
After all, Java's booleans are not classes. Unlike classes, which you have
|
||||
to allocate using `new`, you can just throw around `true` and `false` as you see
|
||||
fit. Also unlike classes, you simply can't assign `null` to a boolean value.
|
||||
|
||||
The trouble is, the parts of Java dealing with _generics_, which allow you to write
|
||||
polymorphic functions, can't handle 'primitives' like `bool`. If you want to have an `ArrayList`
|
||||
of something, that something _must_ be a class.
|
||||
But what if you really _do_ want an `ArrayList` of booleans? Java solves this problem by introducing
|
||||
'boxed' booleans: they're primitives wrapped in a class, called `Boolean`. This class
|
||||
can then be used for generics.
|
||||
|
||||
But see, this is where `null` has snuck in again. By allowing `Boolean` to be a class
|
||||
(thereby granting it access to generics), we've also given it the ability to be null.
|
||||
This example is made especially compelling because Java supports something
|
||||
they call [autoboxing](https://docs.oracle.com/javase/tutorial/java/data/autoboxing.html):
|
||||
you can directly assign a primitive to a variable of the corresponding boxed type.
|
||||
Consider the example:
|
||||
|
||||
```Java
|
||||
Boolean myTrue = true;
|
||||
Boolean myFalse = false;
|
||||
Boolean myBool = null;
|
||||
```
|
||||
|
||||
Beautiful, isn't it? Better yet, unlike Haskell, where you can't _really_
|
||||
check if your `Bool` is `undefined` (because you can't tell whether
|
||||
a non-terminating computation is as such), you can very easily
|
||||
check if your `Boolean` is `true`, `false`, or `null`:
|
||||
|
||||
```Java
|
||||
assert myTrue != myFalse;
|
||||
assert myFalse != myBool;
|
||||
assert myTrue != myBool;
|
||||
```
|
||||
|
||||
We're okay to use `!=` here, instead of `equals`, because it so happens
|
||||
each boxed instance of a `boolean` value
|
||||
[refers to the same `Boolean` object](https://stackoverflow.com/questions/28636738/equality-of-boxed-boolean).
|
||||
In fact, this means that a `Boolean` variable can have __exactly__ 3 values!
|
||||
|
||||
### C and Integers
|
||||
Oh the luxury of having a type representing booleans in your language!
|
||||
It's almost overly indulgent compared to the spartan minimalism of C.
|
||||
In C, boolean conditions are represented as numbers. You can perhaps get
|
||||
away with throwing around `char` or `short int`, but even then,
|
||||
these types allow far more values than two!
|
||||
|
||||
```C
|
||||
unsigned char test = 255;
|
||||
while(test) test -= 1;
|
||||
```
|
||||
|
||||
This loop will run 255 times, thereby demonstrating
|
||||
that C has at least 255 values that can be used
|
||||
to represent the boolean `true`.
|
||||
|
||||
There are other languages
|
||||
with this notion of 'truthy' and 'falsey' values, in which
|
||||
something not exactly `true` or `false` can be used as a condition. However,
|
||||
some of them differ from C in that they also extend this idea
|
||||
to equality. In JavaScript:
|
||||
|
||||
```JavaScript
|
||||
console.assert(true == 1)
|
||||
console.assert(false == 0)
|
||||
```
|
||||
|
||||
Then, there are still exactly two distinct boolean values
|
||||
modulo `==`. No such luck in C, though! We have 256 values that fit in `unsigned char`,
|
||||
all of which are also distinct modulo `==`. Our boolean
|
||||
variable can contain all of these values. And there is no
|
||||
respite to be found with `enum`s, either. We could try define:
|
||||
|
||||
```C
|
||||
enum bool { TRUE, FALSE };
|
||||
```
|
||||
|
||||
Unfortunately, all this does is define `bool` to be a numeric
|
||||
type that can hold at least 2 distinct values, and define
|
||||
numeric constants `TRUE` and `FALSE`. So in fact, you can
|
||||
_still_ write the following code:
|
||||
|
||||
```C
|
||||
enum bool b1 = TRUE;
|
||||
enum bool b2 = FALSE;
|
||||
enum bool b3 = 15;
|
||||
```
|
||||
|
||||
And so, no matter how hard you try, your 'boolean'
|
||||
variable can have many, many values!
|
||||
|
||||
### Conclusion
|
||||
I think that 'how many values does a boolean have' is a strange
|
||||
question. Its purpose can be one of two things:
|
||||
|
||||
* The interviewer expected a long-form response such as this one.
|
||||
This is a weird expectation for a software engineering candidate -
|
||||
how does knowing about \\(\\bot\\), `undefined`, or `null` help in
|
||||
creating software, especially if this information is irrelevant
|
||||
to the company's language of choice?
|
||||
* The interviewer expected the simple answer. In that case,
|
||||
my previous observation applies: what software engineering
|
||||
candidate has _not_ seen a boolean in their time programming?
|
||||
Surely candidates are better screened before they are offered
|
||||
an interview?
|
||||
|
||||
Despite the question's weirdness, I think that the resulting
|
||||
investigation of the matter -- outside of the interview setting --
|
||||
is useful, and perhaps, in a way, enlightening. It may help
|
||||
one understand the design choices made in _their_ language of choice,
|
||||
and how those choices shape the code that they write.
|
||||
|
||||
That's all I have! I hope that you found it interesting.
|
||||
BIN
content/blog/dell_is_horrible/brokenkey.jpg
Normal file
|
After Width: | Height: | Size: 94 KiB |
BIN
content/blog/dell_is_horrible/brokenlcd.jpg
Normal file
|
After Width: | Height: | Size: 476 KiB |
BIN
content/blog/dell_is_horrible/dm_1.png
Normal file
|
After Width: | Height: | Size: 158 KiB |
BIN
content/blog/dell_is_horrible/dm_2.png
Normal file
|
After Width: | Height: | Size: 204 KiB |
BIN
content/blog/dell_is_horrible/dm_3.png
Normal file
|
After Width: | Height: | Size: 81 KiB |
BIN
content/blog/dell_is_horrible/dm_4.png
Normal file
|
After Width: | Height: | Size: 94 KiB |
BIN
content/blog/dell_is_horrible/dm_5.png
Normal file
|
After Width: | Height: | Size: 102 KiB |
381
content/blog/dell_is_horrible/index.md
Normal file
@@ -0,0 +1,381 @@
|
||||
---
|
||||
title: DELL Is A Horrible Company And You Should Avoid Them At All Costs
|
||||
date: 2020-07-23T13:40:05-07:00
|
||||
tags: ["Electronics"]
|
||||
---
|
||||
|
||||
I really do not want this to be a consumer electronics blog. Such things
|
||||
aren't interesting to me, and nor do I have much knowledge
|
||||
about them. However, sometimes, ripples from these areas make their way
|
||||
into my life, and this is one such instance. Let me tell you
|
||||
{{< sidenote "right" "source-note" "a story" >}}
|
||||
I originally wrote about this in
|
||||
<a href="https://www.dell.com/community/XPS/Ridiculously-Bad-Support-Experience/td-p/7554383">a thread on DELL's support website</a>. Some of this post is
|
||||
going to be adapted from the support website, but some things have happened
|
||||
since. You will probably notice the change between the terse language I used
|
||||
in the original post and the fresh text that I'm writing now.
|
||||
{{< /sidenote >}} of
|
||||
my experience with DELL and their XPS 2-in-1 laptop, which has gone on since
|
||||
around January of 2020, and is still going at the time of writing, in July
|
||||
2020, half a year later.
|
||||
|
||||
I was, until recently, an undergraduate student in Computer Science. I will
|
||||
soon be starting my Masters in Computer Science, too. I say this to make one
|
||||
thing clear: I need a computer. Not only is it a necessity for my major,
|
||||
but the majority of my hobbies -- including this blog -- are digital, too.
|
||||
Since my university is a couple of hours from my home, I travel back and forth
|
||||
a lot. I also have a cozy little spot in the
|
||||
{{< sidenote "right" "offices-note" "graduate student offices" >}}
|
||||
They're a bunch of cubicles in a keycard-protected room, really. Nothing fancy.
|
||||
{{< /sidenote >}}at my university, but travel by bus, so I find myself spending
|
||||
roughly equal portions of my work time at home and 'elsewhere'. A laptop
|
||||
as my primary machine, I thought, made sense. But it had to be a decent one.
|
||||
Persuaded by one of my instructors, who stressed the importance of vision and
|
||||
a decent screen, I settled on a DELL XPS, which at the time came with a 4k
|
||||
display.
|
||||
|
||||
As is commonplace, things went great at first. The screen _was_ really nice,
|
||||
all of my code compiled swiftly, and even the games I occasionally played ran
|
||||
at a solid 60fps. I was happy with my purchase.
|
||||
|
||||
There was one hiccup before things went really downhill, a sort of
|
||||
foreshadowing of things to come. My trackpad didn't work at peculiar times.
|
||||
|
||||
### Prologue: Trackpad Hiccups
|
||||
While working, booted into Linux, I noticed that my trackpad was having some
|
||||
trouble. It was stuttering, and occasionally wouldn't work at all for seconds
|
||||
at a time. I assumed that this was a problem with the trackpad drivers on
|
||||
Linux, or perhaps the whole system was freezing up. I rebooted, and the
|
||||
problem went away.
|
||||
|
||||
Until it came back.
|
||||
|
||||
A few days later, my trackpad was freezing virtually every minute.
|
||||
It was strange, but fortunately, I'm used to a keyboard-based workflow, and
|
||||
the malfunctions did not affect me too much. It was just a little troubling.
|
||||
What soon made it more troubling, was that I noticed this exact same issue
|
||||
occurring on Windows. To me, this meant one dreadful thing: it was a hardware
|
||||
issue.
|
||||
|
||||
I poked and prodded for a little bit, and finally discovered the cause:
|
||||
whenever I put my hand on the left palmrest, the trackpad would reliably stop
|
||||
working. Knowing what the issue was, I called DELL. I spoke to a guy on the
|
||||
other end, who had me run through diagnostics, driver updates, and BIOS
|
||||
settings (I imagined this was procedure, so I didn't mind doing the extra
|
||||
work to make the other guy's job easier). Finally, he scheduled a repair
|
||||
appointment. A technician came into my house, took off the laptop cover,
|
||||
and said something along the lines of:
|
||||
|
||||
> Now look. They gave me a whole new motherboard and case to replace yours,
|
||||
but in my personal opinion, this is a bad idea. Things are bound to break
|
||||
when you do this. See how the replacement case has an insulating piece
|
||||
of fabric under the left palmrest, and yours doesn't? Why don't we rip
|
||||
the fabric off the replacement case, and tape it in place on your machine,
|
||||
without any reassembly?
|
||||
|
||||
This man was wiser than any of the other DELL technicians, I now understand.
|
||||
The repair went without a hitch. He grilled me for going to college instead of
|
||||
just picking up a trade, which was cheaper and offered more job security.
|
||||
In the end, I felt a little weird about having a piece of fabric duct taped
|
||||
inside my computer, but the trackpad had no more issues ever since. All was
|
||||
well.
|
||||
|
||||
### Service Request 1: Broken D Key
|
||||
All was well, that is, until the middle of winter term. I was typing up an
|
||||
assignment for a university class. I was working as usual, when I suddenly
|
||||
noticed that the "d" key stopped working - it had to be pressed rather weird
|
||||
to register on the computer. I looked down, and discovered that the key had
|
||||
snapped in half. The top part of the key fell off shortly thereafter.
|
||||
|
||||
{{< figure src="brokenkey.jpg" caption="The broken D key shortly after the above events." >}}
|
||||
|
||||
At that point, I was more surprised than anything. I hadn't heard of something
|
||||
like this ever happening, especially under circumstances as normal as typing.
|
||||
Regardless, I contacted support, and set up a repair appointment. Things only
|
||||
went downhill from there.
|
||||
|
||||
Again, the appointment was scheduled, and only a few days later, another
|
||||
technician arrived at my house. The only way to repair the key, he said,
|
||||
was to replace the whole keyboard. They keyboard happens to be located
|
||||
underneath all the other hardware, and so, the entire laptop had to be
|
||||
disassembled and reassembled from scratch. He worked for about an hour, and
|
||||
eventually, he put the machine together. The words of the previous
|
||||
technician, who wanted to avoid doing exactly what had just been done, echoed
|
||||
in my head:
|
||||
|
||||
> Things are bound to break when you do this.
|
||||
|
||||
I asked him to test it, just to make sure everything works. Sure enough,
|
||||
not everything did work: the machine no longer had sound!
|
||||
|
||||
### Service Request 2: No sound
|
||||
During diagnostics, the laptop did not emit the "beep" it usually does. This
|
||||
was the first sign. Booting into Windows, the sound icon was crossed out in
|
||||
red, and no sound was present. Booting into Linux led to similar results.
|
||||
The microphone on the machine did not seem to work either. The service
|
||||
technician said that he didn't have the parts to repair it, told me he'd call
|
||||
it in, and left. Soon after, I got an email asking for times I'm available to
|
||||
call: I said "any time except for 1-4 pacific time". DELL support proceeded
|
||||
to call me at 3pm pacific time, when I had no service. Unable to reach me,
|
||||
they promptly notified me that they are archiving my service request.
|
||||
|
||||
This all occurred near finals week at my university, so I had to put the issue
|
||||
on hold. I had to maintain my grades, and I had to grade heaps of assignments
|
||||
from other students. Though the lack of sound was annoying, it wasn't as
|
||||
pressing as preparing for exams, so it was during spring break that I finally
|
||||
called again, and scheduled the service appointment. By then,
|
||||
{{< sidenote "right" "pandemic-note" "the pandemic was in full swing," >}}
|
||||
Just for posterity, in 2020, there had been an outbreak of COVID-19,
|
||||
a Coronavirus. Many states in the U.S., including my own, issued
|
||||
the orders for lockdown and social distancing, which meant the closing
|
||||
of schools, restaurants, and, apparently, the cessation of in-person
|
||||
repairs.
|
||||
{{< /sidenote >}}and DELL told me they'd mail me a box to put my laptop in, and
|
||||
I'd have to mail it off to their service center. Sure, I thought, that's
|
||||
fine. If it's at the service center, they won't ever "not have the required
|
||||
parts". I told the tech support person my address, he read it back to me, and
|
||||
so it was settled.
|
||||
|
||||
Until, that is, the box arrived at the wrong address.
|
||||
|
||||
I had received the machine as a gift from my family, who purchased the
|
||||
computer to arrive at their address. The box arrived at that address too,
|
||||
despite my explicit instructions to have it deliver to my current residence.
|
||||
Since my family and I live 2 hours apart, it took 4 total hours to get the box
|
||||
to me (a drive that couldn't be made right away!), and by the time I had it,
|
||||
DELL was already threatening me again with closing the service request.
|
||||
Eventually, I was able to mail the machine off, and about 5 business days
|
||||
later (business days during which I did not have a working machine, which is
|
||||
very necessary for my school and job) I received it back. I was excited to
|
||||
have the machine back, but that didn't last very long. As I was using the
|
||||
computer with Wolfram Mathematica (a rather heavy piece of software running
|
||||
under Linux), I noticed that it was discharging even while plugged in. I
|
||||
booted into Windows, and was greeted with a warning, something along the
|
||||
lines of: "you are using a slow charger. Please use the official adapter".
|
||||
But I was using the official adapter! I also tried to plug my mouse into the
|
||||
relevant USB-C port, only to discover that it did not work. I had to make
|
||||
another service requests.
|
||||
|
||||
### Service Request 3: Broken Charging Port
|
||||
This time, I made sure to tell the person on the other end of the support
|
||||
call to please send it to my address. I asked if there was anything I can do,
|
||||
or anyone I can contact, and was told "no, just mail the computer in again."
|
||||
I obliged. The box arrived at the right address this time, so I was able to
|
||||
ship it off.
|
||||
|
||||
In the "describe your issue" field on the provided form, I begged the
|
||||
technicians to send me a working machine. "Please", I wrote "Last time I got
|
||||
a machine back from support, it was still broken. I really need it for school
|
||||
and work!". 5 business days later, I received the machine back. I plugged it
|
||||
in to make sure it worked, only to find out . . . that the very same charging
|
||||
port that I requested be repaired, is still broken! It would've been funny,
|
||||
if it wasn't infuriating. How is it possible for me to receive a machine from
|
||||
repairs, without the thing I asked to repair being as much as improved?!
|
||||
|
||||
Worse, a day after I received the machine back (I was able to keep using it
|
||||
thanks to it having two USB-C ports capable of charging), the LCD suddenly
|
||||
flashed, and started flickering. Thinking it was a software glitch, I
|
||||
restarted the machine, only to discover the same flickering during the boot
|
||||
animation and menu. Not only was the charging port not repaired, but now my
|
||||
LCD was broken! (in the below picture, the screen is meant to be blue, but
|
||||
the bottom part of the display is purple and flickering).
|
||||
|
||||
{{< figure src="brokenlcd.jpg" caption="The broken LCD." >}}
|
||||
|
||||
### Service Request 4: Broken LCD
|
||||
I called in to support again, and they once again told me to ship the machine
|
||||
off. What's worse, they accused me of breaking the port myself, and told me
|
||||
this was no longer covered under basic warranty. I had to explain all over
|
||||
again that the port worked fine before the fateful day the D-key snapped. They
|
||||
told me they'd "look into it". Eventually, I received a box in the mail. I
|
||||
wasn't told I would be receiving a box, but that wasn't a big deal. I mailed
|
||||
off the machine.
|
||||
|
||||
The UPS shipping was always the most streamlined part of the process. A day
|
||||
later, I was told my machine was received intact. Another day, and I was
|
||||
informed that the technicians are starting to work on it. And then,
|
||||
a few hours later:
|
||||
|
||||
> __Current Status:__
|
||||
> The part(s) needed to repair your system are not currently in stock.
|
||||
> __What's Next:__
|
||||
> In most cases the parts are available is less than five days.
|
||||
|
||||
A few days is no big deal, and it made sense that DELL wouldn't just
|
||||
have screens lying around. So I waited. And waited. And waited. Two weeks
|
||||
later, I got a little tired of waiting, and called the repair center.
|
||||
An automated message told me:
|
||||
|
||||
> We're currently experiencing heavy call volumes. Please try again later. Goodbye.
|
||||
|
||||
And the call was dropped. This happened every time I tried to call, no matter
|
||||
the hour. The original status update -- the one that notified me about the
|
||||
part shortage -- came on May 8th, but the machine finally arrived to me
|
||||
(without prior warning) on June 2nd, almost a month later.
|
||||
|
||||
The charging port worked. Sound
|
||||
worked. The screen wasn't flickering. I was happy for the brief moments that
|
||||
my computer was loading. As soon as I started vim, though, I noticed something
|
||||
was off: the fonts looked more pixelated. The DPI settings I'd painstakingly
|
||||
tweaked were wrong. Now that I thought about it, even the GRUB menu was
|
||||
larger. My suspicion growing, I booted into Windows, and looked at the display
|
||||
settings. Noticeably fewer resolutions were listed in the drop-down menu;
|
||||
worse, the highest resolution was 1080p. After almost a month of waiting,
|
||||
DELL replaced my 4k laptop display with a 1080p one.
|
||||
|
||||
### System Replacement: Worse LCD Screen
|
||||
|
||||
I admit, I was angry. At the same time, the absurdity of it all was also
|
||||
unbearable. Was this constant loop of hardware damage, the endless number of
|
||||
support calls filled with hoarse jazz music, part of some kind of Kafkaesque
|
||||
dream? I didn't know. I was at the end of my wits as to what to do. As a last
|
||||
resort, I made [a tweet](https://twitter.com/DanilaTheWalrus/status/1268056637383692289)
|
||||
from my almost-abandoned account. DELL Support's Twitter
|
||||
account [quickly responded](https://twitter.com/DellCares/status/1268064691416334344), eager as always to destroy any semblance of
|
||||
transparency by switching to private messages. I let them know my thoughts on the matter. I wanted a new machine.
|
||||
|
||||
{{< figure src="dm_1.png" caption="The first real exchange between me and DELL support." >}}
|
||||
|
||||
Of course we can proceed further. I wanted to know what kind of machine I was getting,
|
||||
though. As long as it was the same model that I originally bought,
|
||||
{{< sidenote "right" "replacement-note" "it would be better than what I have." >}}
|
||||
At least in principle, it would be. Perhaps the wear and tear on the replacement
|
||||
parts would be greater, but at least I would have, presumably, a machine
|
||||
in good condition that had the 4k screen that made me buy it in the first place.
|
||||
{{< /sidenote >}}
|
||||
Despite this, I knew that the machine I was getting was likely refurbished.
|
||||
This _had_ to mean that some of the parts would come from other, used, machines.
|
||||
This irked me, because, well, I payed for a new machine.
|
||||
|
||||
{{< figure src="dm_2.png" caption="Ah, the classic use of canned responses." >}}
|
||||
|
||||
Their use of the canned response, and their unwillingness to answer this simple
|
||||
question, was transparent. Indeed, the machine would be made of used
|
||||
parts. I still wanted to proceed. DELL requested that I sent an image of
|
||||
my machine which included its service tag, together with a piece of
|
||||
paper which included my name and email address. I obliged, and quickly got a response:
|
||||
|
||||
{{< figure src="dm_3.png" caption="If it was me who was silent for 4 days, my request would've long been cancelled. " >}}
|
||||
|
||||
Thanks, Kalpana. You will never hear this name again, not in this post.
|
||||
Only one or two messages from DELL support are ever from the same person.
|
||||
About a week later, I get the following beauty:
|
||||
|
||||
{{< figure src="dm_4.png" caption="Excuse me? What's going on?" >}}
|
||||
|
||||
My initial request was cancelled? Why wasn't I told? What was the reason?
|
||||
What the heck was going on at DELL Support? Should I be worried?
|
||||
My question of "Why" was answered with the apt response of "Yes",
|
||||
and a message meant to pacify me. While this was going on, I ordered
|
||||
a
|
||||
{{< sidenote "right" "pinebook-note" "Pinebook Pro." >}}
|
||||
The Pinebook – a $200 machine – has, thus far, worked more reliably than any DELL product
|
||||
I've had the misfortune of owning.
|
||||
{{< /sidenote >}} It was not a replacement for the DELL machine, but rather
|
||||
the first step towards migrating my setup to a stationary computer,
|
||||
and a small, lightweight SSH device. At this point,
|
||||
there was no more faith in DELL left in my mind.
|
||||
|
||||
Soon, DELL required my attention, only to tell me that they put in
|
||||
a request to see that status of my request. How bureaucratic. Two
|
||||
more names -- Kareem and JKC -- flickered through the chats,
|
||||
also never to be seen again.
|
||||
|
||||
{{< figure src="dm_5.png" caption="Not much of a conversation, really." >}}
|
||||
|
||||
Finally, on July 9th (a month and six days after my first real message to DELL
|
||||
support), I was notified by my roommates that FedEx tried to deliver a package
|
||||
to our house, but gave up when no one came to sign for it. On one hand, this
|
||||
is great: FedEx didn't just leave my laptop on the porch. On the other hand,
|
||||
though, this was the first time I heard about receiving the machine. I got
|
||||
to the house the next day, unpacked the new computer, and tested all the things
|
||||
that had, at one point, failed. Everything seemed to work. I transfered all my
|
||||
files, wiped the old computer clean, and mailed it off. I also spent some
|
||||
time dealing with the fallout of DELL PremierColor starting on its own,
|
||||
and permanently altering the color profile of my display. I don't have the
|
||||
special, physical calibration device, and therefore still suspect that my
|
||||
screen is somewhat green.
|
||||
|
||||
Today, I discovered that the microphone of the replacement machine didn't work.
|
||||
|
||||
### Am I The Problem?
|
||||
When the mysterious FedEx package arrived at my door on July 9th, I did some
|
||||
digging to verify my suspicion that it was from DELL. I discovered their
|
||||
HQ in Lebanon, TN. This gave me an opportunity to
|
||||
{{< sidenote "right" "reviews-note" "see" >}}
|
||||
See, of course, modulo whatever bias arises when only those who feel strongly leave reviews.
|
||||
{{< /sidenote >}} whether or not I was alone in this situation. I was genuinely
|
||||
worried that I was suffering from the technical variant of
|
||||
[Munchausen Syndrome](https://www.webmd.com/mental-health/munchausen-syndrome#1),
|
||||
and that I was compulsively breaking my electronics. These worries were
|
||||
dispelled by the reviews on Google:
|
||||
|
||||
{{< figure src="reviews_1.png" caption="Most of the reviews are pretty terse, but the ratings convey the general idea." >}}
|
||||
|
||||
There were even some that were shockingly similar in terms of the apparent
|
||||
incompetence of the DELL technicians:
|
||||
|
||||
{{< figure src="reviews_2.png" caption="Now, now, Maggie, I wouldn't go as far as recommending Apple." >}}
|
||||
|
||||
So, this is not uncommon. This is how DELL deals with customers now. It's
|
||||
awfully tiring, really; I've been in and out of repairs continuously for
|
||||
almost half a year, now. That's 2.5% of my life at the time of writing,
|
||||
all non-stop since the D-key. And these people probably have spent considerable
|
||||
amounts of time, too.
|
||||
|
||||
### It's About the Principle
|
||||
The microphone on my machine is rather inconsequential to me. I can, and regularly do,
|
||||
teleconference from my phone (a habit that I developed thanks to DELL, since
|
||||
my computer was so often unavailable). I don't need to dictate anything. Most
|
||||
of my communication is via chat.
|
||||
|
||||
Really, compared to the other issues (keyboard, sound, charging, USB ports, the broken or low-resolution screen)
|
||||
the microphone is a benign problem. As I have now learned, things could be worse.
|
||||
|
||||
But why should the thought, _"It could be worse"_, even cross my mind
|
||||
when dealing with such a matter? Virtually every issue that has
|
||||
occurred with my computer thus far could -- should! -- have been diagnosed
|
||||
at the repair center. The 'slow charger' warning shows up in BIOS,
|
||||
so just turning the computer on while plugged in should make it obvious something
|
||||
is wrong; doubly so when the very reason that the laptop was in repairs
|
||||
in the first place was because of the faulty charger. I refuse to believe
|
||||
that screens with different resolutions have the same part identifier,
|
||||
either. How have the standards of service from DELL fallen so low?
|
||||
How come this absurd scenario plays out not just for me, but
|
||||
for others as well? It would be comforting, in a way, to think
|
||||
that I was just the 'exceptional case'. But apparently, I'm not.
|
||||
This is standard practice.
|
||||
|
||||
### Tl;DR
|
||||
Here are he problems I've had with DELL:
|
||||
|
||||
* The machine shipped, apparently, with a missing piece of insulation.
|
||||
* The "D" key on the keyboard snapped after only a few months of use.
|
||||
* While repairing the "D" key, the DELL technician broke the computer's sound and microphone.
|
||||
* While repairing the sound and microphone, the DELL technicians broke a charging port.
|
||||
* The DELL technicians failed to repair the charging port, mailing me back a machine
|
||||
exhibiting the same issues, in addition to a broken LCD screen.
|
||||
* The repair of the LCD screen took almost a month, and concluded
|
||||
with me receiving a worse quality screen than I originally had.
|
||||
* The system replacement that followed the botched LCD repair took
|
||||
over a month to go through.
|
||||
* The replaced system was made partially of used parts, which
|
||||
DELL refused to admit.
|
||||
* The microphone on the replacement system was broken.
|
||||
|
||||
### Closing Thoughts
|
||||
I will not be sending my system in again. It doesn't make sense to do so -
|
||||
after mailing my system in for repairs three times, I've measured empirically that
|
||||
the chance of failure is 100%. Every service request is a lottery, dutifully
|
||||
giving out a random prize of another broken part. I no longer wish to play;
|
||||
as any person who gambles should, I will quit while I'm ahead, and cut my losses.
|
||||
However, I hope for this story, which may be unusual in its level of detail,
|
||||
but not its content, to be seen by seen by someone. I hope to prevent
|
||||
someone out there from feeling the frustration, and anger, and peculiar amusement
|
||||
that I felt during this process. I hope for someone else to purchase a computer
|
||||
with money, and not with their sanity. A guy can hope.
|
||||
|
||||
If you're reading this, please take this as a warning. __DELL is a horrible
|
||||
company. They have the lowest standards of customer support of any
|
||||
U.S. company that I've encountered. Their technicians are largely incompetent.
|
||||
Their quality assurance is non-existent. Stay away from them.__
|
||||
BIN
content/blog/dell_is_horrible/reviews_1.png
Normal file
|
After Width: | Height: | Size: 180 KiB |
BIN
content/blog/dell_is_horrible/reviews_2.png
Normal file
|
After Width: | Height: | Size: 227 KiB |
@@ -1,95 +0,0 @@
|
||||
---
|
||||
title: "Clairvoyance for Good: Using Lazy Evaluation in Haskell"
|
||||
date: 2020-05-03T20:05:29-07:00
|
||||
tags: ["Haskell"]
|
||||
draft: true
|
||||
---
|
||||
|
||||
While tackling a project for work, I ran across a rather unpleasant problem.
|
||||
I don't think it's valuable to go into the specifics here (it's rather
|
||||
large and convoluted); however, the outcome of this experience led me to
|
||||
discover a very interesting technique for lazy functional languages,
|
||||
and I want to share what I learned.
|
||||
|
||||
### Time Traveling
|
||||
Some time ago, I read [this post](https://kcsongor.github.io/time-travel-in-haskell-for-dummies/) by Csongor Kiss about time traveling in Haskell. It's
|
||||
really cool, and makes a lot of sense if you have wrapped your head around
|
||||
lazy evaluation. I'm going to present my take on it here, but please check out
|
||||
Csongor's original post if you are interested.
|
||||
|
||||
Say that you have a list of integers, like `[3,2,6]`. Next, suppose that
|
||||
you want to find the maximum value in the list. You can implement such
|
||||
behavior quite simply with pattern matching:
|
||||
|
||||
```Haskell
|
||||
myMax :: [Int] -> Int
|
||||
myMax [] = error "Being total sucks"
|
||||
myMax (x:xs) = max x $ myMax xs
|
||||
```
|
||||
|
||||
You could even get fancy with a `fold`:
|
||||
|
||||
```Haskell
|
||||
myMax :: [Int] -> Int
|
||||
myMax = foldr1 max
|
||||
```
|
||||
|
||||
All is well, and this is rather elementary Haskell. But now let's look at
|
||||
something that Csongor calls the `repMax` problem:
|
||||
|
||||
> Imagine you had a list, and you wanted to replace all the elements of the
|
||||
> list with the largest element, by only passing the list once.
|
||||
|
||||
How can we possibly do this in one pass? First, we need to find the maximum
|
||||
element, and only then can we have something to replace the other numbers
|
||||
with! It turns out, though, that we can just expect to have the future
|
||||
value, and all will be well. Csongor provides the following example:
|
||||
|
||||
```Haskell {linenos=table}
|
||||
repMax :: [Int] -> Int -> (Int, [Int])
|
||||
repMax [] rep = (rep, [])
|
||||
repMax [x] rep = (x, [rep])
|
||||
repMax (l : ls) rep = (m', rep : ls')
|
||||
where (m, ls') = repMax ls rep
|
||||
m' = max m l
|
||||
|
||||
doRepMax :: [Int] -> [Int]
|
||||
doRepMax xs = xs'
|
||||
where (largest, xs') = repMax xs largest
|
||||
```
|
||||
|
||||
In the above snippet, `repMax` expects to receive the maximum value of
|
||||
its input list. At the same time, it also computes that maximum value,
|
||||
returning it and the newly created list. `doRepMax` is where the magic happens:
|
||||
the `where` clauses receives the maximum number from `repMax`, while at the
|
||||
same time using that maximum number to call `repMax`!
|
||||
|
||||
This works because Haskell's evaluation model is, effectively,
|
||||
[lazy graph reduction](https://en.wikipedia.org/wiki/Graph_reduction). That is,
|
||||
you can think of Haskell as manipulating your code as
|
||||
{{< sidenote "right" "tree-note" "a syntax tree," >}}
|
||||
Why is it called graph reduction, you may be wondering, if the runtime is
|
||||
manipulating syntax trees? To save on work, if a program refers to the
|
||||
same value twice, Haskell has both of those references point to the
|
||||
exact same graph. This violates the tree's property of having only one path
|
||||
from the root to any node, and makes our program a graph. Graphs that
|
||||
refer to themselves also violate the properties of a tree.
|
||||
{{< /sidenote >}} performing
|
||||
substitutions and simplifications as necessary until it reaches a final answer.
|
||||
What the lazy part means is that parts of the syntax tree that are not yet
|
||||
needed to compute the final answer can exist, unsimplied, in the tree. This is
|
||||
what allows us to write the code above: the graph of `repMax xs largest`
|
||||
effectively refers to itself. While traversing the list, it places references
|
||||
to itself in place of each of the elements, and thanks to laziness, these
|
||||
references are not evaluated.
|
||||
|
||||
Let's try a more complicated example. How about instead of creating a new list,
|
||||
we return a `Map` containing the number of times each number occured, but only
|
||||
when those numbers were a factor of the maximum numbers. Our expected output
|
||||
will be:
|
||||
|
||||
```
|
||||
>>> countMaxFactors [1,3,3,9]
|
||||
|
||||
fromList [(1, 1), (3, 2), (9, 1)]
|
||||
```
|
||||
BIN
content/blog/haskell_lazy_evaluation/fixpoint_1.png
Normal file
|
After Width: | Height: | Size: 27 KiB |
BIN
content/blog/haskell_lazy_evaluation/fixpoint_2.png
Normal file
|
After Width: | Height: | Size: 48 KiB |
575
content/blog/haskell_lazy_evaluation/index.md
Normal file
@@ -0,0 +1,575 @@
|
||||
---
|
||||
title: "Time Traveling In Haskell: How It Works And How To Use It"
|
||||
date: 2020-07-30T00:58:10-07:00
|
||||
tags: ["Haskell"]
|
||||
---
|
||||
|
||||
I recently got to use a very curious Haskell technique
|
||||
{{< sidenote "right" "production-note" "in production:" >}}
|
||||
As production as research code gets, anyway!
|
||||
{{< /sidenote >}} time traveling. I say this with
|
||||
the utmost seriousness. This technique worked like
|
||||
magic for the problem I was trying to solve, and so
|
||||
I thought I'd share what I learned. In addition
|
||||
to the technique and its workings, I will also explain how
|
||||
time traveling can be misused, yielding computations that
|
||||
never terminate.
|
||||
|
||||
### Time Traveling
|
||||
Some time ago, I read [this post](https://kcsongor.github.io/time-travel-in-haskell-for-dummies/) by Csongor Kiss about time traveling in Haskell. It's
|
||||
really cool, and makes a lot of sense if you have wrapped your head around
|
||||
lazy evaluation. I'm going to present my take on it here, but please check out
|
||||
Csongor's original post if you are interested.
|
||||
|
||||
Say that you have a list of integers, like `[3,2,6]`. Next, suppose that
|
||||
you want to find the maximum value in the list. You can implement such
|
||||
behavior quite simply with pattern matching:
|
||||
|
||||
```Haskell
|
||||
myMax :: [Int] -> Int
|
||||
myMax [] = error "Being total sucks"
|
||||
myMax (x:xs) = max x $ myMax xs
|
||||
```
|
||||
|
||||
You could even get fancy with a `fold`:
|
||||
|
||||
```Haskell
|
||||
myMax :: [Int] -> Int
|
||||
myMax = foldr1 max
|
||||
```
|
||||
|
||||
All is well, and this is rather elementary Haskell. But now let's look at
|
||||
something that Csongor calls the `repMax` problem:
|
||||
|
||||
> Imagine you had a list, and you wanted to replace all the elements of the
|
||||
> list with the largest element, by only passing the list once.
|
||||
|
||||
How can we possibly do this in one pass? First, we need to find the maximum
|
||||
element, and only then can we have something to replace the other numbers
|
||||
with! It turns out, though, that we can just expect to have the future
|
||||
value, and all will be well. Csongor provides the following example:
|
||||
|
||||
```Haskell
|
||||
repMax :: [Int] -> Int -> (Int, [Int])
|
||||
repMax [] rep = (rep, [])
|
||||
repMax [x] rep = (x, [rep])
|
||||
repMax (l : ls) rep = (m', rep : ls')
|
||||
where (m, ls') = repMax ls rep
|
||||
m' = max m l
|
||||
```
|
||||
In this example, `repMax` takes the list of integers,
|
||||
each of which it must replace with their maximum element.
|
||||
It also takes __as an argument__ the maximum element,
|
||||
as if it had already been computed. It does, however,
|
||||
still compute the intermediate maximum element,
|
||||
in the form of `m'`. Otherwise, where would the future
|
||||
value even come from?
|
||||
|
||||
Thus far, nothing too magical has happened. It's a little
|
||||
strange to expect the result of the computation to be
|
||||
given to us; it just looks like wishful
|
||||
thinking. The real magic happens in Csongor's `doRepMax`
|
||||
function:
|
||||
|
||||
```Haskell
|
||||
doRepMax :: [Int] -> [Int]
|
||||
doRepMax xs = xs'
|
||||
where (largest, xs') = repMax xs largest
|
||||
```
|
||||
|
||||
Look, in particular, on the line with the `where` clause.
|
||||
We see that `repMax` returns the maximum element of the
|
||||
list, `largest`, and the resulting list `xs'` consisting
|
||||
only of `largest` repeated as many times as `xs` had elements.
|
||||
But what's curious is the call to `repMax` itself. It takes
|
||||
as input `xs`, the list we're supposed to process... and
|
||||
`largest`, the value that _it itself returns_.
|
||||
|
||||
This works because Haskell's evaluation model is, effectively,
|
||||
[lazy graph reduction](https://en.wikipedia.org/wiki/Graph_reduction). That is,
|
||||
you can think of Haskell as manipulating your code as
|
||||
{{< sidenote "right" "tree-note" "a syntax tree," >}}
|
||||
Why is it called graph reduction, you may be wondering, if the runtime is
|
||||
manipulating syntax trees? To save on work, if a program refers to the
|
||||
same value twice, Haskell has both of those references point to the
|
||||
exact same graph. This violates the tree's property of having only one path
|
||||
from the root to any node, and makes our program a DAG (at least). Graph nodes that
|
||||
refer to themselves (which are also possible in the model) also violate the properties of a
|
||||
a DAG, and thus, in general, we are working with graphs.
|
||||
{{< /sidenote >}} performing
|
||||
substitutions and simplifications as necessary until it reaches a final answer.
|
||||
What the lazy part means is that parts of the syntax tree that are not yet
|
||||
needed to compute the final answer can exist, unsimplified, in the tree.
|
||||
Why don't we draw a few graphs to get familiar with the idea?
|
||||
|
||||
### Visualizing Graphs and Their Reduction
|
||||
__A word of caution__: the steps presented below may significantly differ
|
||||
from the actual graph reduction algorithms used by modern compilers.
|
||||
In particular, this section draws a lot of ideas from Simon Peyton Jones' book,
|
||||
[_Implementing functional languages: a tutorial_](https://www.microsoft.com/en-us/research/publication/implementing-functional-languages-a-tutorial/).
|
||||
However, modern functional compilers (i.e. GHC) use a much more
|
||||
complicated abstract machine for evaluating graph-based code,
|
||||
based on -- from what I know -- the [spineless tagless G-machine](https://www.microsoft.com/en-us/research/wp-content/uploads/1992/04/spineless-tagless-gmachine.pdf).
|
||||
In short, this section, in order to build intuition, walks through how a functional program
|
||||
evaluated using graph reduction _may_ behave; the actual details
|
||||
depend on the compiler.
|
||||
|
||||
Let's start with something that doesn't have anything fancy. We can
|
||||
take a look at the graph of the expression:
|
||||
|
||||
```Haskell
|
||||
length [1]
|
||||
```
|
||||
|
||||
Stripping away Haskell's syntax sugar for lists, we can write this expression as:
|
||||
|
||||
```Haskell
|
||||
length (1:[])
|
||||
```
|
||||
|
||||
Then, recalling that `(:)`, or 'cons', is just a binary function, we rewrite:
|
||||
|
||||
```Haskell
|
||||
length ((:) 1 [])
|
||||
```
|
||||
|
||||
We're now ready to draw the graph; in this case, it's pretty much identical
|
||||
to the syntax tree of the last form of our expression:
|
||||
|
||||
{{< figure src="length_1.png" caption="The initial graph of `length [1]`." class="small" >}}
|
||||
|
||||
In this image, the `@` nodes represent function application. The
|
||||
root node is an application of the function `length` to the graph that
|
||||
represents the list `[1]`. The list itself is represented using two
|
||||
application nodes: `(:)` takes two arguments, the head and tail of the
|
||||
list, and function applications in Haskell are
|
||||
[curried](https://en.wikipedia.org/wiki/Currying). Eventually,
|
||||
in the process of evaluation, the body of `length` will be reached,
|
||||
and leave us with the following graph:
|
||||
|
||||
{{< figure src="length_2.png" caption="The graph of `length [1]` after the body of `length` is expanded." class="small" >}}
|
||||
|
||||
Conceptually, we only took one reduction step, and thus, we haven't yet gotten
|
||||
to evaluating the recursive call to `length`. Since `(+)`
|
||||
is also a binary function, `1+length xs` is represented in this
|
||||
new graph as two applications of `(+)`, first to `1`, and then
|
||||
to `length []`.
|
||||
|
||||
But what is that box at the root? This box _used to be_ the root of the
|
||||
first graph, which was an application node. However, it is now a
|
||||
an _indirection_. Conceptually, reducing
|
||||
this indirection amounts to reducing the graph
|
||||
it points to. But why have we {{< sidenote "right" "altered-note" "altered the graph" >}}
|
||||
This is a key aspect of implementing functional languages.
|
||||
The language itself may be pure, while the runtime
|
||||
can be, and usually is, impure and stateful. After all,
|
||||
computers are impure and stateful, too!
|
||||
{{< /sidenote >}} in this manner? Because Haskell is a pure language,
|
||||
of course! If we know that a particular graph reduces to some value,
|
||||
there's no reason to reduce it again. However, as we will
|
||||
soon see, it may be _used_ again, so we want to preserve its value.
|
||||
Thus, when we're done reducing a graph, we replace its root node with
|
||||
an indirection that points to its result.
|
||||
|
||||
When can a graph be used more than once? Well, how about this:
|
||||
|
||||
```Haskell
|
||||
let x = square 5 in x + x
|
||||
```
|
||||
|
||||
Here, the initial graph looks as follows:
|
||||
|
||||
{{< figure src="square_1.png" caption="The initial graph of `let x = square 5 in x + x`." class="small" >}}
|
||||
|
||||
As you can see, this _is_ a graph, but not a tree! Since both
|
||||
variables `x` refer to the same expression, `square 5`, they
|
||||
are represented by the same subgraph. Then, when we evaluate `square 5`
|
||||
for the first time, and replace its root node with an indirection,
|
||||
we end up with the following:
|
||||
|
||||
{{< figure src="square_2.png" caption="The graph of `let x = square 5 in x + x` after `square 5` is reduced." class="small" >}}
|
||||
|
||||
There are two `25`s in the graph, and no more `square`s! We only
|
||||
had to evaluate `square 5` exactly once, even though `(+)`
|
||||
will use it twice (once for the left argument, and once for the right).
|
||||
|
||||
Our graphs can also include cycles.
|
||||
A simple, perhaps _the most_ simple example of this in practice is Haskell's
|
||||
`fix` function. It computes a function's fixed point,
|
||||
{{< sidenote "right" "fixpoint-note" "and can be used to write recursive functions." >}}
|
||||
In fact, in the lambda calculus, <code>fix</code> is pretty much <em>the only</em>
|
||||
way to write recursive functions. In the untyped lambda calculus, it can
|
||||
be written as: $$\lambda f . (\lambda x . f (x \ x)) \ (\lambda x . f (x \ x))$$
|
||||
In the simply typed lambda calculus, it cannot be written in any way, and
|
||||
needs to be added as an extension, typically written as \(\textbf{fix}\).
|
||||
{{< /sidenote >}}
|
||||
It's implemented as follows:
|
||||
|
||||
```Haskell
|
||||
fix f = let x = f x in x
|
||||
```
|
||||
|
||||
See how the definition of `x` refers to itself? This is what
|
||||
it looks like in graph form:
|
||||
|
||||
{{< figure src="fixpoint_1.png" caption="The initial graph of `let x = f x in x`." class="tiny" >}}
|
||||
|
||||
I think it's useful to take a look at how this graph is processed. Let's
|
||||
pick `f = (1:)`. That is, `f` is a function that takes a list,
|
||||
and prepends `1` to it. Then, after constructing the graph of `f x`,
|
||||
we end up with the following:
|
||||
|
||||
{{< figure src="fixpoint_2.png" caption="The graph of `fix (1:)` after it's been reduced." class="small" >}}
|
||||
|
||||
We see the body of `f`, which is the application of `(:)` first to the
|
||||
constant `1`, and then to `f`'s argument (`x`, in this case). As
|
||||
before, once we evaluated `f x`, we replaced the application with
|
||||
an indirection; in the image, this indirection is the top box. But the
|
||||
argument, `x`, is itself an indirection which points to the root of `f x`,
|
||||
thereby creating a cycle in our graph. Traversing this graph looks like
|
||||
traversing an infinite list of `1`s.
|
||||
|
||||
Almost there! A node can refer to itself, and, when evaluated, it
|
||||
is replaced with its own value. Thus, a node can effectively reference
|
||||
its own value! The last piece of the puzzle is how a node can access
|
||||
_parts_ of its own value: recall that `doRepMax` calls `repMax`
|
||||
with only `largest`, while `repMax` returns `(largest, xs')`.
|
||||
I have to admit, I don't know the internals of GHC, but I suspect
|
||||
this is done by translating the code into something like:
|
||||
|
||||
```Haskell
|
||||
doRepMax :: [Int] -> [Int]
|
||||
doRepMax xs = snd t
|
||||
where t = repMax xs (fst t)
|
||||
```
|
||||
|
||||
#### Detailed Example: Reducing `doRepMax`
|
||||
|
||||
If the above examples haven't elucidated how `doRepMax` works,
|
||||
stick around in this section and we will go through it step-by-step.
|
||||
This is a rather long and detailed example, so feel free to skip
|
||||
this section to read more about actually using time traveling.
|
||||
|
||||
If you're sticking around, why don't we watch how the graph of `doRepMax [1, 2]` unfolds.
|
||||
This example will be more complex than the ones we've seen
|
||||
so far; to avoid overwhelming ourselves with notation,
|
||||
let's adopt a different convention of writing functions. Instead
|
||||
of using application nodes `@`, let's draw an application of a
|
||||
function `f` to arguments `x1` through `xn` as a subgraph with root `f`
|
||||
and children `x`s. The below figure demonstrates what I mean:
|
||||
|
||||
{{< figure src="notation.png" caption="The new visual notation used in this section." class="small" >}}
|
||||
|
||||
Now, let's write the initial graph for `doRepMax [1,2]`:
|
||||
|
||||
{{< figure src="repmax_1.png" caption="The initial graph of `doRepMax [1,2]`." class="small" >}}
|
||||
|
||||
Other than our new notation, there's nothing too surprising here.
|
||||
The first step of our hypothetical reduction would replace the application of `doRepMax` with its
|
||||
body, and create our graph's first cycle. At a high level, all we want is the second element of the tuple
|
||||
returned by `repMax`, which contains the output list. To get
|
||||
the tuple, we apply `repMax` to the list `[1,2]` and the first element
|
||||
of its result. The list `[1,2]` itself
|
||||
consists of two uses of the `(:)` function.
|
||||
|
||||
{{< figure src="repmax_2.png" caption="The first step of reducing `doRepMax [1,2]`." class="small" >}}
|
||||
|
||||
Next, we would also expand the body of `repMax`. In
|
||||
the following diagram, to avoid drawing a noisy amount of
|
||||
crossing lines, I marked the application of `fst` with
|
||||
a star, and replaced the two edges to `fst` with
|
||||
edges to similar looking stars. This is merely
|
||||
a visual trick; an edge leading to a little star is
|
||||
actually an edge leading to `fst`. Take a look:
|
||||
|
||||
{{< figure src="repmax_3.png" caption="The second step of reducing `doRepMax [1,2]`." class="medium" >}}
|
||||
|
||||
Since `(,)` is a constructor, let's say that it doesn't
|
||||
need to be evaluated, and that its
|
||||
{{< sidenote "right" "normal-note" "graph cannot be reduced further" >}}
|
||||
A graph that can't be reduced further is said to be in <em>normal form</em>,
|
||||
by the way.
|
||||
{{< /sidenote >}} (in practice, other things like
|
||||
packing may occur here, but they are irrelevant to us).
|
||||
If `(,)` can't be reduced, we can move on to evaluating `snd`. Given a pair, `snd`
|
||||
simply returns the second element, which in our
|
||||
case is the subgraph starting at `(:)`. We
|
||||
thus replace the application of `snd` with an
|
||||
indirection to this subgraph. This leaves us
|
||||
with the following:
|
||||
|
||||
{{< figure src="repmax_4.png" caption="The third step of reducing `doRepMax [1,2]`." class="medium" >}}
|
||||
|
||||
Since it's becoming hard to keep track of what part of the graph
|
||||
is actually being evaluated, I marked the former root of `doRepMax [1,2]` with
|
||||
a blue star. If our original expression occured at the top level,
|
||||
the graph reduction would probably stop here. After all,
|
||||
we're evaluating our graphs using call-by-need, and there
|
||||
doesn't seem to be a need for knowing the what the arguments of `(:)` are.
|
||||
However, stopping at `(:)` wouldn't be very interesting,
|
||||
and we wouldn't learn much from doing so. So instead, let's assume
|
||||
that _something_ is trying to read the elements of our list;
|
||||
perhaps we are trying to print this list to the screen in GHCi.
|
||||
|
||||
In this case, our mysterious external force starts unpacking and
|
||||
inspecting the arguments to `(:)`. The first argument to `(:)` is
|
||||
the list's head, which is the subgraph starting with the starred application
|
||||
of `fst`. We evaluate it in a similar manner to `snd`. That is,
|
||||
we replace this `fst` with an indirection to the first element
|
||||
of the argument tuple, which happens to be the subgraph starting with `max`:
|
||||
|
||||
{{< figure src="repmax_5.png" caption="The fourth step of reducing `doRepMax [1,2]`." class="medium" >}}
|
||||
|
||||
Phew! Next, we need to evaluate the body of `max`. Let's make one more
|
||||
simplification here: rather than substitututing `max` for its body
|
||||
here, let's just reason about what evaluating `max` would entail.
|
||||
We would need to evaluate its two arguments, compare them,
|
||||
and return the larger one. The argument `1` can't be reduced
|
||||
any more (it's just a number!), but the second argument,
|
||||
a call to `fst`, needs to be processed. To do so, we need to
|
||||
evaluate the call to `repMax`. We thus replace `repMax`
|
||||
with its body:
|
||||
|
||||
{{< figure src="repmax_6.png" caption="The fifth step of reducing `doRepMax [1,2]`." class="medium" >}}
|
||||
|
||||
We've reached one of the base cases here, and there
|
||||
are no more calls to `max` or `repMax`. The whole reason
|
||||
we're here is to evaluate the call to `fst` that's one
|
||||
of the arguments to `max`. Given this graph, doing so is easy.
|
||||
We can clearly see that `2` is the first element of the tuple
|
||||
returned by `repMax [2]`. We thus replace `fst` with
|
||||
an indirection to this node:
|
||||
|
||||
{{< figure src="repmax_7.png" caption="The sixth step of reducing `doRepMax [1,2]`." class="medium" >}}
|
||||
|
||||
This concludes our task of evaluating the arguments to `max`.
|
||||
Comparing them, we see that `2` is greater than `1`, and thus,
|
||||
we replace `max` with an indirection to `2`:
|
||||
|
||||
{{< figure src="repmax_8.png" caption="The seventh step of reducing `doRepMax [1,2]`." class="medium" >}}
|
||||
|
||||
The node that we starred in our graph is now an indirection (the
|
||||
one that used to be the call to `fst`) which points to
|
||||
another indirection (formerly the call to `max`), which
|
||||
points to `2`. Thus, any edge pointing to a star now
|
||||
points to the value 2.
|
||||
|
||||
By finding the value of the starred node, we have found the first
|
||||
argument of `(:)`, and returned it to our mysterious external force.
|
||||
If we were printing to GHCi, the number `2` would appear on the screen
|
||||
right about now. The force then moves on to the second argument of `(:)`,
|
||||
which is the call to `snd`. This `snd` is applied to an instance of `(,)`, which
|
||||
can't be reduced any further. Thus, all we have to do is take the second
|
||||
element of the tuple, and replace `snd` with an indirection to it:
|
||||
|
||||
{{< figure src="repmax_9.png" caption="The eighth step of reducing `doRepMax [1,2]`." class="medium" >}}
|
||||
|
||||
The second element of the tuple was a call to `(:)`, and that's what the mysterious
|
||||
force is processing now. Just like it did before, it starts by looking at the
|
||||
first argument of this list, which is the list's head. This argument is a reference to
|
||||
the starred node, which, as we've established, eventually points to `2`.
|
||||
Another `2` pops up on the console.
|
||||
|
||||
Finally, the mysterious force reaches the second argument of the `(:)`,
|
||||
which is the empty list. The empty list also cannot be evaluated any
|
||||
further, so that's what the mysterious force receives. Just like that,
|
||||
there's nothing left to print to the console. The mysterious force ceases.
|
||||
After removing the unused nodes, we are left with the following graph:
|
||||
|
||||
{{< figure src="repmax_10.png" caption="The result of reducing `doRepMax [1,2]`." class="small" >}}
|
||||
|
||||
As we would have expected, two `2`s were printed to the console, and our
|
||||
final graph represents the list `[2,2]`.
|
||||
|
||||
### Using Time Traveling
|
||||
Is time tarveling even useful? I would argue yes, especially
|
||||
in cases where Haskell's purity can make certain things
|
||||
difficult.
|
||||
|
||||
As a first example, Csongor provides an assembler that works
|
||||
in a single pass. The challenge in this case is to resolve
|
||||
jumps to code segments occuring _after_ the jump itself;
|
||||
in essence, the address of the target code segment needs to be
|
||||
known before the segment itself is processed. Csongor's
|
||||
code uses the [Tardis monad](https://hackage.haskell.org/package/tardis-0.4.1.0/docs/Control-Monad-Tardis.html),
|
||||
which combines regular state, to which you can write and then
|
||||
later read from, and future state, from which you can
|
||||
read values before your write them. Check out
|
||||
[his complete example](https://kcsongor.github.io/time-travel-in-haskell-for-dummies/#a-single-pass-assembler-an-example) here.
|
||||
|
||||
Alternatively, here's an example from my research, which my
|
||||
coworker and coauthor Kai helped me formulate. I'll be fairly
|
||||
vague, since all of this is still in progress. The gist is that
|
||||
we have some kind of data structure (say, a list or a tree),
|
||||
and we want to associate with each element in this data
|
||||
structure a 'score' of how useful it is. There are many possible
|
||||
heuristics of picking 'scores'; a very simple one is
|
||||
to make it inversely propertional to the number of times
|
||||
an element occurs. To be more concrete, suppose
|
||||
we have some element type `Element`:
|
||||
|
||||
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 5 6 >}}
|
||||
|
||||
Suppose also that our data structure is a binary tree:
|
||||
|
||||
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 14 16 >}}
|
||||
|
||||
We then want to transform an input `ElementTree`, such as:
|
||||
|
||||
```Haskell
|
||||
Node A (Node A Empty Empty) Empty
|
||||
```
|
||||
|
||||
Into a scored tree, like:
|
||||
|
||||
```Haskell
|
||||
Node (A,0.5) (Node (A,0.5) Empty Empty) Empty
|
||||
```
|
||||
|
||||
Since `A` occured twice, its score is `1/2 = 0.5`.
|
||||
|
||||
Let's define some utility functions before we get to the
|
||||
meat of the implementation:
|
||||
|
||||
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 8 12 >}}
|
||||
|
||||
The `addElement` function simply increments the counter for a particular
|
||||
element in the map, adding the number `1` if it doesn't exist. The `getScore`
|
||||
function computes the score of a particular element, defaulting to `1.0` if
|
||||
it's not found in the map.
|
||||
|
||||
Just as before -- noticing that passing around the future values is getting awfully
|
||||
bothersome -- we write our scoring function as though we have
|
||||
a 'future value'.
|
||||
|
||||
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 18 24 >}}
|
||||
|
||||
The actual `doAssignScores` function is pretty much identical to
|
||||
`doRepMax`:
|
||||
|
||||
{{< codelines "Haskell" "time-traveling/ValueScore.hs" 26 28 >}}
|
||||
|
||||
There's quite a bit of repetition here, especially in the handling
|
||||
of future values - all of our functions now accept an extra
|
||||
future argument, and return a work-in-progress future value.
|
||||
This is what the `Tardis` monad, and its corresponding
|
||||
`TardisT` monad transformer, aim to address. Just like the
|
||||
`State` monad helps us avoid writing plumbing code for
|
||||
forward-traveling values, `Tardis` helps us do the same
|
||||
for backward-traveling ones.
|
||||
|
||||
#### Cycles in Monadic Bind
|
||||
|
||||
We've seen that we're able to write code like the following:
|
||||
|
||||
```Haskell
|
||||
(a, b) = f a c
|
||||
```
|
||||
|
||||
That is, we were able to write function calls that referenced
|
||||
their own return values. What if we try doing this inside
|
||||
a `do` block? Say, for example, we want to sprinkle some time
|
||||
traveling into our program, but don't want to add a whole new
|
||||
transformer into our monad stack. We could write code as follows:
|
||||
|
||||
```Haskell
|
||||
do
|
||||
(a, b) <- f a c
|
||||
return b
|
||||
```
|
||||
|
||||
Unfortunately, this doesn't work. However, it's entirely
|
||||
possible to enable this using the `RecursiveDo` language
|
||||
extension:
|
||||
|
||||
```Haskell
|
||||
{-# LANGUAGE RecursiveDo #-}
|
||||
```
|
||||
|
||||
Then, we can write the above as follows:
|
||||
|
||||
```Haskell
|
||||
do
|
||||
rec (a, b) <- f a c
|
||||
return b
|
||||
```
|
||||
|
||||
This power, however, comes at a price. It's not as straightforward
|
||||
to build graphs from recursive monadic computations; in fact,
|
||||
it's not possible in general. The translation of the above
|
||||
code uses `MonadFix`. A monad that satisfies `MonadFix` has
|
||||
an operation `mfix`, which is the monadic version of the `fix`
|
||||
function we saw earlier:
|
||||
|
||||
```Haskell
|
||||
mfix :: Monad m => (a -> m a) -> m a
|
||||
-- Regular fix, for comparison
|
||||
fix :: (a -> a) -> a
|
||||
```
|
||||
|
||||
To really understand how the translation works, check out the
|
||||
[paper on recursive do notation](http://leventerkok.github.io/papers/recdo.pdf).
|
||||
|
||||
### Beware The Strictness
|
||||
Though Csongor points out other problems with the
|
||||
time traveling approach, I think he doesn't mention
|
||||
an important idea: you have to be _very_ careful about introducing
|
||||
strictness into your programs when running time-traveling code.
|
||||
For example, suppose we wanted to write a function,
|
||||
`takeUntilMax`, which would return the input list,
|
||||
cut off after the first occurence of the maximum number.
|
||||
Following the same strategy, we come up with:
|
||||
|
||||
{{< codelines "Haskell" "time-traveling/TakeMax.hs" 1 12 >}}
|
||||
|
||||
In short, if we encounter our maximum number, we just return
|
||||
a list of that maximum number, since we do not want to recurse
|
||||
further. On the other hand, if we encounter a number that's
|
||||
_not_ the maximum, we continue our recursion.
|
||||
|
||||
Unfortunately, this doesn't work; our program never terminates.
|
||||
You may be thinking:
|
||||
|
||||
> Well, obviously this doesn't work! We didn't actually
|
||||
compute the maximum number properly, since we stopped
|
||||
recursing too early. We need to traverse the whole list,
|
||||
and not just the part before the maximum number.
|
||||
|
||||
To address this, we can reformulate our `takeUntilMax`
|
||||
function as follows:
|
||||
|
||||
{{< codelines "Haskell" "time-traveling/TakeMax.hs" 14 21 >}}
|
||||
|
||||
Now we definitely compute the maximum correctly! Alas,
|
||||
this doesn't work either. The issue lies on lines 5 and 18,
|
||||
more specifically in the comparison `x == m`. Here, we
|
||||
are trying to base the decision of what branch to take
|
||||
on a future value. This is simply impossible; to compute
|
||||
the value, we need to know the value!
|
||||
|
||||
This is no 'silly mistake', either! In complicated programs
|
||||
that use time traveling, strictness lurks behind every corner.
|
||||
In my research work, I was at one point inserting a data structure into
|
||||
a set; however, deep in the structure was a data type containing
|
||||
a 'future' value, and using the default `Eq` instance!
|
||||
Adding the data structure to a set ended up invoking `(==)` (or perhaps
|
||||
some function from the `Ord` typeclass),
|
||||
which, in turn, tried to compare the lazily evaluated values.
|
||||
My code therefore didn't terminate, much like `takeUntilMax`.
|
||||
|
||||
Debugging time traveling code is, in general,
|
||||
a pain. This is especially true since future values don't look any different
|
||||
from regular values. You can see it in the type signatures
|
||||
of `repMax` and `takeUntilMax`: the maximum number is just an `Int`!
|
||||
And yet, trying to see what its value is will kill the entire program.
|
||||
As always, remember Brian W. Kernighan's wise words:
|
||||
|
||||
> Debugging is twice as hard as writing the code in the first place.
|
||||
Therefore, if you write the code as cleverly as possible, you are,
|
||||
by definition, not smart enough to debug it.
|
||||
|
||||
### Conclusion
|
||||
This is about it! In a way, time traveling can make code performing
|
||||
certain operations more expressive. Furthermore, even if it's not groundbreaking,
|
||||
thinking about time traveling is a good exercise to get familiar
|
||||
with lazy evaluation in general. I hope you found this useful!
|
||||
BIN
content/blog/haskell_lazy_evaluation/length_1.png
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
content/blog/haskell_lazy_evaluation/length_2.png
Normal file
|
After Width: | Height: | Size: 72 KiB |
BIN
content/blog/haskell_lazy_evaluation/notation.png
Normal file
|
After Width: | Height: | Size: 99 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_1.png
Normal file
|
After Width: | Height: | Size: 44 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_10.png
Normal file
|
After Width: | Height: | Size: 53 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_2.png
Normal file
|
After Width: | Height: | Size: 70 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_3.png
Normal file
|
After Width: | Height: | Size: 130 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_4.png
Normal file
|
After Width: | Height: | Size: 123 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_5.png
Normal file
|
After Width: | Height: | Size: 118 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_6.png
Normal file
|
After Width: | Height: | Size: 122 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_7.png
Normal file
|
After Width: | Height: | Size: 132 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_8.png
Normal file
|
After Width: | Height: | Size: 125 KiB |
BIN
content/blog/haskell_lazy_evaluation/repmax_9.png
Normal file
|
After Width: | Height: | Size: 102 KiB |