Compare commits
48 Commits
a69f9f633e
...
sidenotes
| Author | SHA1 | Date | |
|---|---|---|---|
| f75a47e273 | |||
| 9eae560cae | |||
| b0529a9124 | |||
| 3df9c57482 | |||
| cb5163e1d9 | |||
| c309ac4c14 | |||
| 58c9d5f982 | |||
| dc9a68ad10 | |||
| db16dbda18 | |||
| 172630c2ee | |||
| 6dc7734c70 | |||
| 19a1ffbc98 | |||
| 2cce2859bb | |||
| 654239e29f | |||
| 50fbe3e196 | |||
| 1a8a1c3052 | |||
| 2994f8983d | |||
| 64227f2873 | |||
| 9aef499deb | |||
| c79b5a4120 | |||
| 81ee50d0d4 | |||
| 43b140285f | |||
| adb894869e | |||
| 1f6032a30e | |||
| 9531f4d8e3 | |||
| 37097d3a40 | |||
| 3aa468c2f6 | |||
| c704187012 | |||
| a834fd578e | |||
| 4b5e2f4454 | |||
| 7812b1064b | |||
| 65b9f385cf | |||
| ed88d54aa6 | |||
| d1b515ec5b | |||
| 1ffc43af98 | |||
| b27dc19e57 | |||
| df0b819b0e | |||
| 21f90d85c5 | |||
| 18e3f2af55 | |||
| 3901c9b115 | |||
| d9486d08ae | |||
| d90993a93c | |||
| 7e9bd95846 | |||
| d3d73e0e9c | |||
| d9c151d774 | |||
| 64f4abb8d6 | |||
| bcaa67cc7a | |||
| 8c0a6c834e |
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
**/build/*
|
||||||
@@ -1,14 +1,13 @@
|
|||||||
$basic-border: 1px solid #bfbfbf;
|
@import "style.scss";
|
||||||
|
|
||||||
.gmachine-instruction {
|
.gmachine-instruction {
|
||||||
display: flex;
|
display: flex;
|
||||||
border: $basic-border;
|
@include bordered-block;
|
||||||
border-radius: 2px;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.gmachine-instruction-name {
|
.gmachine-instruction-name {
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
border-right: $basic-border;
|
border-right: $standard-border;
|
||||||
flex-grow: 1;
|
flex-grow: 1;
|
||||||
flex-basis: 20%;
|
flex-basis: 20%;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
@@ -20,7 +19,7 @@ $basic-border: 1px solid #bfbfbf;
|
|||||||
}
|
}
|
||||||
|
|
||||||
.gmachine-inner {
|
.gmachine-inner {
|
||||||
border-bottom: $basic-border;
|
border-bottom: $standard-border;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
|
|
||||||
&:last-child {
|
&:last-child {
|
||||||
|
|||||||
@@ -97,10 +97,6 @@ type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
|||||||
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||||
type_ptr branch_type = mgr.new_type();
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
if(!dynamic_cast<type_base*>(case_type.get())) {
|
|
||||||
throw type_error("attempting case analysis of non-data type");
|
|
||||||
}
|
|
||||||
|
|
||||||
for(auto& branch : branches) {
|
for(auto& branch : branches) {
|
||||||
type_env new_env = env.scope();
|
type_env new_env = env.scope();
|
||||||
branch->pat->match(case_type, mgr, new_env);
|
branch->pat->match(case_type, mgr, new_env);
|
||||||
@@ -108,6 +104,11 @@ type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
|||||||
mgr.unify(branch_type, curr_branch_type);
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case_type = mgr.resolve(case_type, var);
|
||||||
|
if(!dynamic_cast<type_base*>(case_type.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
return branch_type;
|
return branch_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
28
code/compiler/06/CMakeLists.txt
Normal file
28
code/compiler/06/CMakeLists.txt
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.1)
|
||||||
|
project(compiler)
|
||||||
|
|
||||||
|
find_package(BISON)
|
||||||
|
find_package(FLEX)
|
||||||
|
bison_target(parser
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
|
||||||
|
COMPILE_FLAGS "-d")
|
||||||
|
flex_target(scanner
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
|
||||||
|
add_flex_bison_dependency(scanner parser)
|
||||||
|
|
||||||
|
add_executable(compiler
|
||||||
|
ast.cpp ast.hpp definition.cpp
|
||||||
|
type_env.cpp type_env.hpp
|
||||||
|
env.cpp env.hpp
|
||||||
|
type.cpp type.hpp
|
||||||
|
error.cpp error.hpp
|
||||||
|
binop.cpp binop.hpp
|
||||||
|
instruction.cpp instruction.hpp
|
||||||
|
${BISON_parser_OUTPUTS}
|
||||||
|
${FLEX_scanner_OUTPUTS}
|
||||||
|
main.cpp
|
||||||
|
)
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
262
code/compiler/06/ast.cpp
Normal file
262
code/compiler/06/ast.cpp
Normal file
@@ -0,0 +1,262 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <ostream>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
|
||||||
|
node_type = typecheck(mgr, env);
|
||||||
|
return node_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast::resolve_common(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr resolved_type = mgr.resolve(node_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
|
||||||
|
resolve(mgr);
|
||||||
|
node_type = std::move(resolved_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "INT: " << value << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return type_ptr(new type_base("Int"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(
|
||||||
|
env->has_variable(id) ?
|
||||||
|
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||||
|
(instruction*) new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "UID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BINOP: " << op_name(op) << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
type_ptr ftype = env.lookup(op_name(op));
|
||||||
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
|
||||||
|
|
||||||
|
mgr.unify(arrow_two, ftype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "APP:" << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
mgr.unify(arrow, ltype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "CASE: " << std::endl;
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
print_indent(indent + 1, to);
|
||||||
|
branch->pat->print(to);
|
||||||
|
to << std::endl;
|
||||||
|
branch->expr->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
|
||||||
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
branch->pat->match(case_type, mgr, new_env);
|
||||||
|
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
case_type = mgr.resolve(case_type, var);
|
||||||
|
if(!dynamic_cast<type_data*>(case_type.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
|
return branch_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::resolve(const type_mgr& mgr) const {
|
||||||
|
of->resolve_common(mgr);
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
branch->expr->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
|
||||||
|
|
||||||
|
of->compile(env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|
||||||
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
|
into.push_back(instruction_ptr(jump_instruction));
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
std::vector<instruction_ptr> branch_instructions;
|
||||||
|
pattern_var* vpat;
|
||||||
|
pattern_constr* cpat;
|
||||||
|
|
||||||
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
break;
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
}
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_split()));
|
||||||
|
branch->expr->compile(new_env, branch_instructions);
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||||
|
cpat->params.size())));
|
||||||
|
|
||||||
|
int new_tag = type->constructors[cpat->constr].tag;
|
||||||
|
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("technically not a type error: duplicate pattern");
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[new_tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("non-total pattern");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::print(std::ostream& to) const {
|
||||||
|
to << var;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
env.bind(var, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::print(std::ostream& to) const {
|
||||||
|
to << constr;
|
||||||
|
for(auto& param : params) {
|
||||||
|
to << " " << param;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
type_ptr constructor_type = env.lookup(constr);
|
||||||
|
if(!constructor_type) {
|
||||||
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < params.size(); i++) {
|
||||||
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
|
env.bind(params[i], arr->left);
|
||||||
|
constructor_type = arr->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr.unify(t, constructor_type);
|
||||||
|
}
|
||||||
197
code/compiler/06/ast.hpp
Normal file
197
code/compiler/06/ast.hpp
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
struct ast {
|
||||||
|
type_ptr node_type;
|
||||||
|
|
||||||
|
virtual ~ast() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||||
|
virtual void compile(const env_ptr& env,
|
||||||
|
std::vector<instruction_ptr>& into) const = 0;
|
||||||
|
|
||||||
|
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||||
|
void resolve_common(const type_mgr& mgr);
|
||||||
|
};
|
||||||
|
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct pattern {
|
||||||
|
virtual ~pattern() = default;
|
||||||
|
|
||||||
|
virtual void print(std::ostream& to) const = 0;
|
||||||
|
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using pattern_ptr = std::unique_ptr<pattern>;
|
||||||
|
|
||||||
|
struct branch {
|
||||||
|
pattern_ptr pat;
|
||||||
|
ast_ptr expr;
|
||||||
|
|
||||||
|
branch(pattern_ptr p, ast_ptr a)
|
||||||
|
: pat(std::move(p)), expr(std::move(a)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using branch_ptr = std::unique_ptr<branch>;
|
||||||
|
|
||||||
|
struct constructor {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> types;
|
||||||
|
int8_t tag;
|
||||||
|
|
||||||
|
constructor(std::string n, std::vector<std::string> ts)
|
||||||
|
: name(std::move(n)), types(std::move(ts)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using constructor_ptr = std::unique_ptr<constructor>;
|
||||||
|
|
||||||
|
struct definition {
|
||||||
|
virtual ~definition() = default;
|
||||||
|
|
||||||
|
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
|
||||||
|
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) = 0;
|
||||||
|
virtual void compile() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using definition_ptr = std::unique_ptr<definition>;
|
||||||
|
|
||||||
|
struct ast_int : public ast {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
explicit ast_int(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_lid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_lid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_uid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_uid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_binop : public ast {
|
||||||
|
binop op;
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_binop(binop o, ast_ptr l, ast_ptr r)
|
||||||
|
: op(o), left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_app : public ast {
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_app(ast_ptr l, ast_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_case : public ast {
|
||||||
|
ast_ptr of;
|
||||||
|
std::vector<branch_ptr> branches;
|
||||||
|
|
||||||
|
ast_case(ast_ptr o, std::vector<branch_ptr> b)
|
||||||
|
: of(std::move(o)), branches(std::move(b)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_var : public pattern {
|
||||||
|
std::string var;
|
||||||
|
|
||||||
|
pattern_var(std::string v)
|
||||||
|
: var(std::move(v)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_constr : public pattern {
|
||||||
|
std::string constr;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
|
||||||
|
pattern_constr(std::string c, std::vector<std::string> p)
|
||||||
|
: constr(std::move(c)), params(std::move(p)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr&, type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct definition_defn : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
type_ptr return_type;
|
||||||
|
std::vector<type_ptr> param_types;
|
||||||
|
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
|
||||||
|
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
|
||||||
|
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
};
|
||||||
|
|
||||||
|
struct definition_data : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<constructor_ptr> constructors;
|
||||||
|
|
||||||
|
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
||||||
|
: name(std::move(n)), constructors(std::move(cs)) {}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
};
|
||||||
21
code/compiler/06/binop.cpp
Normal file
21
code/compiler/06/binop.cpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#include "binop.hpp"
|
||||||
|
|
||||||
|
std::string op_name(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "+";
|
||||||
|
case MINUS: return "-";
|
||||||
|
case TIMES: return "*";
|
||||||
|
case DIVIDE: return "/";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string op_action(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "plus";
|
||||||
|
case MINUS: return "minus";
|
||||||
|
case TIMES: return "times";
|
||||||
|
case DIVIDE: return "divide";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
12
code/compiler/06/binop.hpp
Normal file
12
code/compiler/06/binop.hpp
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum binop {
|
||||||
|
PLUS,
|
||||||
|
MINUS,
|
||||||
|
TIMES,
|
||||||
|
DIVIDE
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string op_name(binop op);
|
||||||
|
std::string op_action(binop op);
|
||||||
83
code/compiler/06/definition.cpp
Normal file
83
code/compiler/06/definition.cpp
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
return_type = mgr.new_type();
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
type_ptr param_type = mgr.new_type();
|
||||||
|
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||||
|
param_types.push_back(param_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(name, full_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
auto param_it = params.begin();
|
||||||
|
auto type_it = param_types.rbegin();
|
||||||
|
|
||||||
|
while(param_it != params.end() && type_it != param_types.rend()) {
|
||||||
|
new_env.bind(*param_it, *type_it);
|
||||||
|
param_it++;
|
||||||
|
type_it++;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr body_type = body->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(return_type, body_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::resolve(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
body->resolve_common(mgr);
|
||||||
|
|
||||||
|
return_type = mgr.resolve(return_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
for(auto& param_type : param_types) {
|
||||||
|
param_type = mgr.resolve(param_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::compile() {
|
||||||
|
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
body->compile(new_env, instructions);
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
type_data* this_type = new type_data(name);
|
||||||
|
type_ptr return_type = type_ptr(this_type);
|
||||||
|
int next_tag = 0;
|
||||||
|
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
constructor->tag = next_tag;
|
||||||
|
this_type->constructors[constructor->name] = { next_tag++ };
|
||||||
|
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||||
|
type_ptr type = type_ptr(new type_base(*it));
|
||||||
|
full_type = type_ptr(new type_arr(type, full_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(constructor->name, full_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::resolve(const type_mgr& mgr) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::compile() {
|
||||||
|
|
||||||
|
}
|
||||||
23
code/compiler/06/env.cpp
Normal file
23
code/compiler/06/env.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
int env_var::get_offset(const std::string& name) const {
|
||||||
|
if(name == this->name) return 0;
|
||||||
|
if(parent) return parent->get_offset(name) + 1;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_var::has_variable(const std::string& name) const {
|
||||||
|
if(name == this->name) return true;
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int env_offset::get_offset(const std::string& name) const {
|
||||||
|
if(parent) return parent->get_offset(name) + offset;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_offset::has_variable(const std::string& name) const {
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
34
code/compiler/06/env.hpp
Normal file
34
code/compiler/06/env.hpp
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct env {
|
||||||
|
virtual ~env() = default;
|
||||||
|
|
||||||
|
virtual int get_offset(const std::string& name) const = 0;
|
||||||
|
virtual bool has_variable(const std::string& name) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using env_ptr = std::shared_ptr<env>;
|
||||||
|
|
||||||
|
struct env_var : public env {
|
||||||
|
std::string name;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_var(std::string& n, env_ptr p)
|
||||||
|
: name(std::move(n)), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct env_offset : public env {
|
||||||
|
int offset;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_offset(int o, env_ptr p)
|
||||||
|
: offset(o), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
5
code/compiler/06/error.cpp
Normal file
5
code/compiler/06/error.cpp
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* type_error::what() const noexcept {
|
||||||
|
return "an error occured while checking the types of the program";
|
||||||
|
}
|
||||||
21
code/compiler/06/error.hpp
Normal file
21
code/compiler/06/error.hpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_error : std::exception {
|
||||||
|
std::string description;
|
||||||
|
|
||||||
|
type_error(std::string d)
|
||||||
|
: description(std::move(d)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct unification_error : public type_error {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
unification_error(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)),
|
||||||
|
type_error("failed to unify types") {}
|
||||||
|
};
|
||||||
2
code/compiler/06/examples/bad1.txt
Normal file
2
code/compiler/06/examples/bad1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
defn main = { 3 + True }
|
||||||
1
code/compiler/06/examples/bad2.txt
Normal file
1
code/compiler/06/examples/bad2.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
defn main = { 1 2 3 4 5 }
|
||||||
8
code/compiler/06/examples/bad3.txt
Normal file
8
code/compiler/06/examples/bad3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x y z -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
2
code/compiler/06/examples/works1.txt
Normal file
2
code/compiler/06/examples/works1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
defn main = { plus 320 6 }
|
||||||
|
defn plus x y = { x + y }
|
||||||
3
code/compiler/06/examples/works2.txt
Normal file
3
code/compiler/06/examples/works2.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
defn add x y = { x + y }
|
||||||
|
defn double x = { add x x }
|
||||||
|
defn main = { double 163 }
|
||||||
7
code/compiler/06/examples/works3.txt
Normal file
7
code/compiler/06/examples/works3.txt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
defn length l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> { 1 + length xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
83
code/compiler/06/instruction.cpp
Normal file
83
code/compiler/06/instruction.cpp
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
#include "instruction.hpp"
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushInt(" << value << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushGlobal(" << name << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Push(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pop(" << count << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "MkApp()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Update(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pack(" << tag << ", " << size << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Split()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Jump(" << std::endl;
|
||||||
|
for(auto& instruction_set : branches) {
|
||||||
|
for(auto& instruction : instruction_set) {
|
||||||
|
instruction->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
to << std::endl;
|
||||||
|
}
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Slide(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BinOp(" << op_action(op) << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Eval()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Alloc(" << amount << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Unwind()" << std::endl;
|
||||||
|
}
|
||||||
120
code/compiler/06/instruction.hpp
Normal file
120
code/compiler/06/instruction.hpp
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
|
||||||
|
struct instruction {
|
||||||
|
virtual ~instruction() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using instruction_ptr = std::unique_ptr<instruction>;
|
||||||
|
|
||||||
|
struct instruction_pushint : public instruction {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
instruction_pushint(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pushglobal : public instruction {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
instruction_pushglobal(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_push : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_push(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pop : public instruction {
|
||||||
|
int count;
|
||||||
|
|
||||||
|
instruction_pop(int c)
|
||||||
|
: count(c) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_mkapp : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_update : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_update(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pack : public instruction {
|
||||||
|
int tag;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_pack(int t, int s)
|
||||||
|
: tag(t), size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_split : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_jump : public instruction {
|
||||||
|
std::vector<std::vector<instruction_ptr>> branches;
|
||||||
|
std::map<int, int> tag_mappings;
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_slide : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_slide(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_binop : public instruction {
|
||||||
|
binop op;
|
||||||
|
|
||||||
|
instruction_binop(binop o)
|
||||||
|
: op(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_eval : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_alloc : public instruction {
|
||||||
|
int amount;
|
||||||
|
|
||||||
|
instruction_alloc(int a)
|
||||||
|
: amount(a) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_unwind : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
88
code/compiler/06/main.cpp
Normal file
88
code/compiler/06/main.cpp
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include "parser.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
void yy::parser::error(const std::string& msg) {
|
||||||
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern std::vector<definition_ptr> program;
|
||||||
|
|
||||||
|
void typecheck_program(
|
||||||
|
const std::vector<definition_ptr>& prog,
|
||||||
|
type_mgr& mgr, type_env& env) {
|
||||||
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
|
int_type,
|
||||||
|
type_ptr(new type_arr(int_type, int_type))));
|
||||||
|
|
||||||
|
env.bind("+", binop_type);
|
||||||
|
env.bind("-", binop_type);
|
||||||
|
env.bind("*", binop_type);
|
||||||
|
env.bind("/", binop_type);
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_first(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_second(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& pair : env.names) {
|
||||||
|
std::cout << pair.first << ": ";
|
||||||
|
pair.second->print(mgr, std::cout);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->resolve(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile_program(const std::vector<definition_ptr>& prog) {
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->compile();
|
||||||
|
|
||||||
|
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
|
||||||
|
if(!defn) continue;
|
||||||
|
for(auto& instruction : defn->instructions) {
|
||||||
|
instruction->print(0, std::cout);
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
yy::parser parser;
|
||||||
|
type_mgr mgr;
|
||||||
|
type_env env;
|
||||||
|
|
||||||
|
parser.parse();
|
||||||
|
for(auto& definition : program) {
|
||||||
|
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
||||||
|
if(!def) continue;
|
||||||
|
|
||||||
|
std::cout << def->name;
|
||||||
|
for(auto& param : def->params) std::cout << " " << param;
|
||||||
|
std::cout << ":" << std::endl;
|
||||||
|
|
||||||
|
def->body->print(1, std::cout);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
typecheck_program(program, mgr, env);
|
||||||
|
compile_program(program);
|
||||||
|
} catch(unification_error& err) {
|
||||||
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
|
std::cout << " (1) \033[34m";
|
||||||
|
err.left->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
std::cout << " (2) \033[32m";
|
||||||
|
err.right->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
} catch(type_error& err) {
|
||||||
|
std::cout << "failed to type check program: " << err.description << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
140
code/compiler/06/parser.y
Normal file
140
code/compiler/06/parser.y
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
%{
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
std::vector<definition_ptr> program;
|
||||||
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%token PLUS
|
||||||
|
%token TIMES
|
||||||
|
%token MINUS
|
||||||
|
%token DIVIDE
|
||||||
|
%token <int> INT
|
||||||
|
%token DEFN
|
||||||
|
%token DATA
|
||||||
|
%token CASE
|
||||||
|
%token OF
|
||||||
|
%token OCURLY
|
||||||
|
%token CCURLY
|
||||||
|
%token OPAREN
|
||||||
|
%token CPAREN
|
||||||
|
%token COMMA
|
||||||
|
%token ARROW
|
||||||
|
%token EQUAL
|
||||||
|
%token <std::string> LID
|
||||||
|
%token <std::string> UID
|
||||||
|
|
||||||
|
%language "c++"
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.token.constructor
|
||||||
|
|
||||||
|
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
||||||
|
%type <std::vector<definition_ptr>> program definitions
|
||||||
|
%type <std::vector<branch_ptr>> branches
|
||||||
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
|
%type <definition_ptr> definition defn data
|
||||||
|
%type <branch_ptr> branch
|
||||||
|
%type <pattern_ptr> pattern
|
||||||
|
%type <constructor_ptr> constructor
|
||||||
|
|
||||||
|
%start program
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
program
|
||||||
|
: definitions { program = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definitions
|
||||||
|
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definition
|
||||||
|
: defn { $$ = std::move($1); }
|
||||||
|
| data { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
defn
|
||||||
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
|
{ $$ = definition_ptr(
|
||||||
|
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lowercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
uppercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aAdd
|
||||||
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||||
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||||
|
| aMul { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aMul
|
||||||
|
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
|
||||||
|
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
|
||||||
|
| app { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
app
|
||||||
|
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
|
||||||
|
| appBase { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
appBase
|
||||||
|
: INT { $$ = ast_ptr(new ast_int($1)); }
|
||||||
|
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
|
||||||
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||||
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
|
| case { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
case
|
||||||
|
: CASE aAdd OF OCURLY branches CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
branches
|
||||||
|
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||||
|
;
|
||||||
|
|
||||||
|
branch
|
||||||
|
: pattern ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
pattern
|
||||||
|
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
|
||||||
|
| UID lowercaseParams
|
||||||
|
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
data
|
||||||
|
: DATA UID EQUAL OCURLY constructors CCURLY
|
||||||
|
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructors
|
||||||
|
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
|
||||||
|
| constructor
|
||||||
|
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructor
|
||||||
|
: UID uppercaseParams
|
||||||
|
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
34
code/compiler/06/scanner.l
Normal file
34
code/compiler/06/scanner.l
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
%option noyywrap
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
#define YY_DECL yy::parser::symbol_type yylex()
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
[ \n]+ {}
|
||||||
|
\+ { return yy::parser::make_PLUS(); }
|
||||||
|
\* { return yy::parser::make_TIMES(); }
|
||||||
|
- { return yy::parser::make_MINUS(); }
|
||||||
|
\/ { return yy::parser::make_DIVIDE(); }
|
||||||
|
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
|
||||||
|
defn { return yy::parser::make_DEFN(); }
|
||||||
|
data { return yy::parser::make_DATA(); }
|
||||||
|
case { return yy::parser::make_CASE(); }
|
||||||
|
of { return yy::parser::make_OF(); }
|
||||||
|
\{ { return yy::parser::make_OCURLY(); }
|
||||||
|
\} { return yy::parser::make_CCURLY(); }
|
||||||
|
\( { return yy::parser::make_OPAREN(); }
|
||||||
|
\) { return yy::parser::make_CPAREN(); }
|
||||||
|
, { return yy::parser::make_COMMA(); }
|
||||||
|
-> { return yy::parser::make_ARROW(); }
|
||||||
|
= { return yy::parser::make_EQUAL(); }
|
||||||
|
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
|
||||||
|
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
|
||||||
|
|
||||||
|
%%
|
||||||
99
code/compiler/06/type.cpp
Normal file
99
code/compiler/06/type.cpp
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#include "type.hpp"
|
||||||
|
#include <sstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
auto it = mgr.types.find(name);
|
||||||
|
if(it != mgr.types.end()) {
|
||||||
|
it->second->print(mgr, to);
|
||||||
|
} else {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
left->print(mgr, to);
|
||||||
|
to << " -> (";
|
||||||
|
right->print(mgr, to);
|
||||||
|
to << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string type_mgr::new_type_name() {
|
||||||
|
int temp = last_id++;
|
||||||
|
std::string str = "";
|
||||||
|
|
||||||
|
while(temp != -1) {
|
||||||
|
str += (char) ('a' + (temp % 26));
|
||||||
|
temp = temp / 26 - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse(str.begin(), str.end());
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_type() {
|
||||||
|
return type_ptr(new type_var(new_type_name()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_arrow_type() {
|
||||||
|
return type_ptr(new type_arr(new_type(), new_type()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||||
|
type_var* cast;
|
||||||
|
|
||||||
|
var = nullptr;
|
||||||
|
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||||
|
auto it = types.find(cast->name);
|
||||||
|
|
||||||
|
if(it == types.end()) {
|
||||||
|
var = cast;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
t = it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
|
type_var* lvar;
|
||||||
|
type_var* rvar;
|
||||||
|
type_arr* larr;
|
||||||
|
type_arr* rarr;
|
||||||
|
type_base* lid;
|
||||||
|
type_base* rid;
|
||||||
|
|
||||||
|
l = resolve(l, lvar);
|
||||||
|
r = resolve(r, rvar);
|
||||||
|
|
||||||
|
if(lvar) {
|
||||||
|
bind(lvar->name, r);
|
||||||
|
return;
|
||||||
|
} else if(rvar) {
|
||||||
|
bind(rvar->name, l);
|
||||||
|
return;
|
||||||
|
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||||
|
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||||
|
unify(larr->left, rarr->left);
|
||||||
|
unify(larr->right, rarr->right);
|
||||||
|
return;
|
||||||
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
|
if(lid->name == rid->name) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw unification_error(l, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||||
|
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||||
|
|
||||||
|
if(other && other->name == s) return;
|
||||||
|
types[s] = t;
|
||||||
|
}
|
||||||
65
code/compiler/06/type.hpp
Normal file
65
code/compiler/06/type.hpp
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct type_mgr;
|
||||||
|
|
||||||
|
struct type {
|
||||||
|
virtual ~type() = default;
|
||||||
|
|
||||||
|
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using type_ptr = std::shared_ptr<type>;
|
||||||
|
|
||||||
|
struct type_var : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_var(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_base : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_base(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_data : public type_base {
|
||||||
|
struct constructor {
|
||||||
|
int tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<std::string, constructor> constructors;
|
||||||
|
|
||||||
|
type_data(std::string n)
|
||||||
|
: type_base(std::move(n)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_arr : public type {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
type_arr(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_mgr {
|
||||||
|
int last_id = 0;
|
||||||
|
std::map<std::string, type_ptr> types;
|
||||||
|
|
||||||
|
std::string new_type_name();
|
||||||
|
type_ptr new_type();
|
||||||
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
|
void unify(type_ptr l, type_ptr r);
|
||||||
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
|
void bind(const std::string& s, type_ptr t);
|
||||||
|
};
|
||||||
16
code/compiler/06/type_env.cpp
Normal file
16
code/compiler/06/type_env.cpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
type_ptr type_env::lookup(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) return it->second;
|
||||||
|
if(parent) return parent->lookup(name);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::bind(const std::string& name, type_ptr t) {
|
||||||
|
names[name] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_env type_env::scope() const {
|
||||||
|
return type_env(this);
|
||||||
|
}
|
||||||
16
code/compiler/06/type_env.hpp
Normal file
16
code/compiler/06/type_env.hpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <map>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_env {
|
||||||
|
std::map<std::string, type_ptr> names;
|
||||||
|
type_env const* parent = nullptr;
|
||||||
|
|
||||||
|
type_env(type_env const* p)
|
||||||
|
: parent(p) {}
|
||||||
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
|
type_ptr lookup(const std::string& name) const;
|
||||||
|
void bind(const std::string& name, type_ptr t);
|
||||||
|
type_env scope() const;
|
||||||
|
};
|
||||||
28
code/compiler/07/CMakeLists.txt
Normal file
28
code/compiler/07/CMakeLists.txt
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.1)
|
||||||
|
project(compiler)
|
||||||
|
|
||||||
|
find_package(BISON)
|
||||||
|
find_package(FLEX)
|
||||||
|
bison_target(parser
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
|
||||||
|
COMPILE_FLAGS "-d")
|
||||||
|
flex_target(scanner
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
|
||||||
|
add_flex_bison_dependency(scanner parser)
|
||||||
|
|
||||||
|
add_executable(compiler
|
||||||
|
ast.cpp ast.hpp definition.cpp
|
||||||
|
type_env.cpp type_env.hpp
|
||||||
|
env.cpp env.hpp
|
||||||
|
type.cpp type.hpp
|
||||||
|
error.cpp error.hpp
|
||||||
|
binop.cpp binop.hpp
|
||||||
|
instruction.cpp instruction.hpp
|
||||||
|
${BISON_parser_OUTPUTS}
|
||||||
|
${FLEX_scanner_OUTPUTS}
|
||||||
|
main.cpp
|
||||||
|
)
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
262
code/compiler/07/ast.cpp
Normal file
262
code/compiler/07/ast.cpp
Normal file
@@ -0,0 +1,262 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <ostream>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
|
||||||
|
node_type = typecheck(mgr, env);
|
||||||
|
return node_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast::resolve_common(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr resolved_type = mgr.resolve(node_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
|
||||||
|
resolve(mgr);
|
||||||
|
node_type = std::move(resolved_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "INT: " << value << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return type_ptr(new type_base("Int"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(
|
||||||
|
env->has_variable(id) ?
|
||||||
|
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||||
|
(instruction*) new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "UID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BINOP: " << op_name(op) << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
type_ptr ftype = env.lookup(op_name(op));
|
||||||
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
|
||||||
|
|
||||||
|
mgr.unify(arrow_two, ftype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "APP:" << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
mgr.unify(arrow, ltype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "CASE: " << std::endl;
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
print_indent(indent + 1, to);
|
||||||
|
branch->pat->print(to);
|
||||||
|
to << std::endl;
|
||||||
|
branch->expr->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
|
||||||
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
branch->pat->match(case_type, mgr, new_env);
|
||||||
|
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
case_type = mgr.resolve(case_type, var);
|
||||||
|
if(!dynamic_cast<type_data*>(case_type.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
|
return branch_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::resolve(const type_mgr& mgr) const {
|
||||||
|
of->resolve_common(mgr);
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
branch->expr->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
|
||||||
|
|
||||||
|
of->compile(env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|
||||||
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
|
into.push_back(instruction_ptr(jump_instruction));
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
std::vector<instruction_ptr> branch_instructions;
|
||||||
|
pattern_var* vpat;
|
||||||
|
pattern_constr* cpat;
|
||||||
|
|
||||||
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
break;
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
}
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_split()));
|
||||||
|
branch->expr->compile(new_env, branch_instructions);
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||||
|
cpat->params.size())));
|
||||||
|
|
||||||
|
int new_tag = type->constructors[cpat->constr].tag;
|
||||||
|
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("technically not a type error: duplicate pattern");
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[new_tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("non-total pattern");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::print(std::ostream& to) const {
|
||||||
|
to << var;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
env.bind(var, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::print(std::ostream& to) const {
|
||||||
|
to << constr;
|
||||||
|
for(auto& param : params) {
|
||||||
|
to << " " << param;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
type_ptr constructor_type = env.lookup(constr);
|
||||||
|
if(!constructor_type) {
|
||||||
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < params.size(); i++) {
|
||||||
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
|
env.bind(params[i], arr->left);
|
||||||
|
constructor_type = arr->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr.unify(t, constructor_type);
|
||||||
|
}
|
||||||
197
code/compiler/07/ast.hpp
Normal file
197
code/compiler/07/ast.hpp
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
struct ast {
|
||||||
|
type_ptr node_type;
|
||||||
|
|
||||||
|
virtual ~ast() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||||
|
virtual void compile(const env_ptr& env,
|
||||||
|
std::vector<instruction_ptr>& into) const = 0;
|
||||||
|
|
||||||
|
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||||
|
void resolve_common(const type_mgr& mgr);
|
||||||
|
};
|
||||||
|
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct pattern {
|
||||||
|
virtual ~pattern() = default;
|
||||||
|
|
||||||
|
virtual void print(std::ostream& to) const = 0;
|
||||||
|
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using pattern_ptr = std::unique_ptr<pattern>;
|
||||||
|
|
||||||
|
struct branch {
|
||||||
|
pattern_ptr pat;
|
||||||
|
ast_ptr expr;
|
||||||
|
|
||||||
|
branch(pattern_ptr p, ast_ptr a)
|
||||||
|
: pat(std::move(p)), expr(std::move(a)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using branch_ptr = std::unique_ptr<branch>;
|
||||||
|
|
||||||
|
struct constructor {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> types;
|
||||||
|
int8_t tag;
|
||||||
|
|
||||||
|
constructor(std::string n, std::vector<std::string> ts)
|
||||||
|
: name(std::move(n)), types(std::move(ts)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using constructor_ptr = std::unique_ptr<constructor>;
|
||||||
|
|
||||||
|
struct definition {
|
||||||
|
virtual ~definition() = default;
|
||||||
|
|
||||||
|
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
|
||||||
|
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) = 0;
|
||||||
|
virtual void compile() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using definition_ptr = std::unique_ptr<definition>;
|
||||||
|
|
||||||
|
struct ast_int : public ast {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
explicit ast_int(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_lid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_lid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_uid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_uid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_binop : public ast {
|
||||||
|
binop op;
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_binop(binop o, ast_ptr l, ast_ptr r)
|
||||||
|
: op(o), left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_app : public ast {
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_app(ast_ptr l, ast_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_case : public ast {
|
||||||
|
ast_ptr of;
|
||||||
|
std::vector<branch_ptr> branches;
|
||||||
|
|
||||||
|
ast_case(ast_ptr o, std::vector<branch_ptr> b)
|
||||||
|
: of(std::move(o)), branches(std::move(b)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_var : public pattern {
|
||||||
|
std::string var;
|
||||||
|
|
||||||
|
pattern_var(std::string v)
|
||||||
|
: var(std::move(v)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_constr : public pattern {
|
||||||
|
std::string constr;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
|
||||||
|
pattern_constr(std::string c, std::vector<std::string> p)
|
||||||
|
: constr(std::move(c)), params(std::move(p)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr&, type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct definition_defn : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
type_ptr return_type;
|
||||||
|
std::vector<type_ptr> param_types;
|
||||||
|
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
|
||||||
|
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
|
||||||
|
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
};
|
||||||
|
|
||||||
|
struct definition_data : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<constructor_ptr> constructors;
|
||||||
|
|
||||||
|
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
||||||
|
: name(std::move(n)), constructors(std::move(cs)) {}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
};
|
||||||
21
code/compiler/07/binop.cpp
Normal file
21
code/compiler/07/binop.cpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#include "binop.hpp"
|
||||||
|
|
||||||
|
std::string op_name(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "+";
|
||||||
|
case MINUS: return "-";
|
||||||
|
case TIMES: return "*";
|
||||||
|
case DIVIDE: return "/";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string op_action(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "plus";
|
||||||
|
case MINUS: return "minus";
|
||||||
|
case TIMES: return "times";
|
||||||
|
case DIVIDE: return "divide";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
12
code/compiler/07/binop.hpp
Normal file
12
code/compiler/07/binop.hpp
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum binop {
|
||||||
|
PLUS,
|
||||||
|
MINUS,
|
||||||
|
TIMES,
|
||||||
|
DIVIDE
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string op_name(binop op);
|
||||||
|
std::string op_action(binop op);
|
||||||
83
code/compiler/07/definition.cpp
Normal file
83
code/compiler/07/definition.cpp
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
return_type = mgr.new_type();
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
type_ptr param_type = mgr.new_type();
|
||||||
|
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||||
|
param_types.push_back(param_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(name, full_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
auto param_it = params.begin();
|
||||||
|
auto type_it = param_types.rbegin();
|
||||||
|
|
||||||
|
while(param_it != params.end() && type_it != param_types.rend()) {
|
||||||
|
new_env.bind(*param_it, *type_it);
|
||||||
|
param_it++;
|
||||||
|
type_it++;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr body_type = body->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(return_type, body_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::resolve(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
body->resolve_common(mgr);
|
||||||
|
|
||||||
|
return_type = mgr.resolve(return_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
for(auto& param_type : param_types) {
|
||||||
|
param_type = mgr.resolve(param_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::compile() {
|
||||||
|
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
body->compile(new_env, instructions);
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
type_data* this_type = new type_data(name);
|
||||||
|
type_ptr return_type = type_ptr(this_type);
|
||||||
|
int next_tag = 0;
|
||||||
|
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
constructor->tag = next_tag;
|
||||||
|
this_type->constructors[constructor->name] = { next_tag++ };
|
||||||
|
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||||
|
type_ptr type = type_ptr(new type_base(*it));
|
||||||
|
full_type = type_ptr(new type_arr(type, full_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(constructor->name, full_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::resolve(const type_mgr& mgr) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::compile() {
|
||||||
|
|
||||||
|
}
|
||||||
23
code/compiler/07/env.cpp
Normal file
23
code/compiler/07/env.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
int env_var::get_offset(const std::string& name) const {
|
||||||
|
if(name == this->name) return 0;
|
||||||
|
if(parent) return parent->get_offset(name) + 1;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_var::has_variable(const std::string& name) const {
|
||||||
|
if(name == this->name) return true;
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int env_offset::get_offset(const std::string& name) const {
|
||||||
|
if(parent) return parent->get_offset(name) + offset;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_offset::has_variable(const std::string& name) const {
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
34
code/compiler/07/env.hpp
Normal file
34
code/compiler/07/env.hpp
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct env {
|
||||||
|
virtual ~env() = default;
|
||||||
|
|
||||||
|
virtual int get_offset(const std::string& name) const = 0;
|
||||||
|
virtual bool has_variable(const std::string& name) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using env_ptr = std::shared_ptr<env>;
|
||||||
|
|
||||||
|
struct env_var : public env {
|
||||||
|
std::string name;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_var(std::string& n, env_ptr p)
|
||||||
|
: name(std::move(n)), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct env_offset : public env {
|
||||||
|
int offset;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_offset(int o, env_ptr p)
|
||||||
|
: offset(o), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
5
code/compiler/07/error.cpp
Normal file
5
code/compiler/07/error.cpp
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* type_error::what() const noexcept {
|
||||||
|
return "an error occured while checking the types of the program";
|
||||||
|
}
|
||||||
21
code/compiler/07/error.hpp
Normal file
21
code/compiler/07/error.hpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_error : std::exception {
|
||||||
|
std::string description;
|
||||||
|
|
||||||
|
type_error(std::string d)
|
||||||
|
: description(std::move(d)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct unification_error : public type_error {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
unification_error(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)),
|
||||||
|
type_error("failed to unify types") {}
|
||||||
|
};
|
||||||
2
code/compiler/07/examples/bad1.txt
Normal file
2
code/compiler/07/examples/bad1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
defn main = { 3 + True }
|
||||||
1
code/compiler/07/examples/bad2.txt
Normal file
1
code/compiler/07/examples/bad2.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
defn main = { 1 2 3 4 5 }
|
||||||
8
code/compiler/07/examples/bad3.txt
Normal file
8
code/compiler/07/examples/bad3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x y z -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
31
code/compiler/07/examples/runtime1.c
Normal file
31
code/compiler/07/examples/runtime1.c
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#include "../runtime.h"
|
||||||
|
|
||||||
|
void f_add(struct stack* s) {
|
||||||
|
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
|
||||||
|
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
|
||||||
|
}
|
||||||
|
|
||||||
|
void f_main(struct stack* s) {
|
||||||
|
// PushInt 320
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(320));
|
||||||
|
|
||||||
|
// PushInt 6
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(6));
|
||||||
|
|
||||||
|
// PushGlobal f_add (the function for +)
|
||||||
|
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
|
||||||
|
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
}
|
||||||
2
code/compiler/07/examples/works1.txt
Normal file
2
code/compiler/07/examples/works1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
defn main = { plus 320 6 }
|
||||||
|
defn plus x y = { x + y }
|
||||||
3
code/compiler/07/examples/works2.txt
Normal file
3
code/compiler/07/examples/works2.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
defn add x y = { x + y }
|
||||||
|
defn double x = { add x x }
|
||||||
|
defn main = { double 163 }
|
||||||
7
code/compiler/07/examples/works3.txt
Normal file
7
code/compiler/07/examples/works3.txt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
defn length l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> { 1 + length xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
83
code/compiler/07/instruction.cpp
Normal file
83
code/compiler/07/instruction.cpp
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
#include "instruction.hpp"
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushInt(" << value << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushGlobal(" << name << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Push(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pop(" << count << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "MkApp()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Update(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pack(" << tag << ", " << size << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Split()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Jump(" << std::endl;
|
||||||
|
for(auto& instruction_set : branches) {
|
||||||
|
for(auto& instruction : instruction_set) {
|
||||||
|
instruction->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
to << std::endl;
|
||||||
|
}
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Slide(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BinOp(" << op_action(op) << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Eval()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Alloc(" << amount << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Unwind()" << std::endl;
|
||||||
|
}
|
||||||
120
code/compiler/07/instruction.hpp
Normal file
120
code/compiler/07/instruction.hpp
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
|
||||||
|
struct instruction {
|
||||||
|
virtual ~instruction() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using instruction_ptr = std::unique_ptr<instruction>;
|
||||||
|
|
||||||
|
struct instruction_pushint : public instruction {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
instruction_pushint(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pushglobal : public instruction {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
instruction_pushglobal(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_push : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_push(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pop : public instruction {
|
||||||
|
int count;
|
||||||
|
|
||||||
|
instruction_pop(int c)
|
||||||
|
: count(c) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_mkapp : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_update : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_update(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pack : public instruction {
|
||||||
|
int tag;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_pack(int t, int s)
|
||||||
|
: tag(t), size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_split : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_jump : public instruction {
|
||||||
|
std::vector<std::vector<instruction_ptr>> branches;
|
||||||
|
std::map<int, int> tag_mappings;
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_slide : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_slide(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_binop : public instruction {
|
||||||
|
binop op;
|
||||||
|
|
||||||
|
instruction_binop(binop o)
|
||||||
|
: op(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_eval : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_alloc : public instruction {
|
||||||
|
int amount;
|
||||||
|
|
||||||
|
instruction_alloc(int a)
|
||||||
|
: amount(a) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_unwind : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
};
|
||||||
88
code/compiler/07/main.cpp
Normal file
88
code/compiler/07/main.cpp
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include "parser.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
void yy::parser::error(const std::string& msg) {
|
||||||
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern std::vector<definition_ptr> program;
|
||||||
|
|
||||||
|
void typecheck_program(
|
||||||
|
const std::vector<definition_ptr>& prog,
|
||||||
|
type_mgr& mgr, type_env& env) {
|
||||||
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
|
int_type,
|
||||||
|
type_ptr(new type_arr(int_type, int_type))));
|
||||||
|
|
||||||
|
env.bind("+", binop_type);
|
||||||
|
env.bind("-", binop_type);
|
||||||
|
env.bind("*", binop_type);
|
||||||
|
env.bind("/", binop_type);
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_first(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_second(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& pair : env.names) {
|
||||||
|
std::cout << pair.first << ": ";
|
||||||
|
pair.second->print(mgr, std::cout);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->resolve(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile_program(const std::vector<definition_ptr>& prog) {
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->compile();
|
||||||
|
|
||||||
|
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
|
||||||
|
if(!defn) continue;
|
||||||
|
for(auto& instruction : defn->instructions) {
|
||||||
|
instruction->print(0, std::cout);
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
yy::parser parser;
|
||||||
|
type_mgr mgr;
|
||||||
|
type_env env;
|
||||||
|
|
||||||
|
parser.parse();
|
||||||
|
for(auto& definition : program) {
|
||||||
|
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
||||||
|
if(!def) continue;
|
||||||
|
|
||||||
|
std::cout << def->name;
|
||||||
|
for(auto& param : def->params) std::cout << " " << param;
|
||||||
|
std::cout << ":" << std::endl;
|
||||||
|
|
||||||
|
def->body->print(1, std::cout);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
typecheck_program(program, mgr, env);
|
||||||
|
compile_program(program);
|
||||||
|
} catch(unification_error& err) {
|
||||||
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
|
std::cout << " (1) \033[34m";
|
||||||
|
err.left->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
std::cout << " (2) \033[32m";
|
||||||
|
err.right->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
} catch(type_error& err) {
|
||||||
|
std::cout << "failed to type check program: " << err.description << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
140
code/compiler/07/parser.y
Normal file
140
code/compiler/07/parser.y
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
%{
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
std::vector<definition_ptr> program;
|
||||||
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%token PLUS
|
||||||
|
%token TIMES
|
||||||
|
%token MINUS
|
||||||
|
%token DIVIDE
|
||||||
|
%token <int> INT
|
||||||
|
%token DEFN
|
||||||
|
%token DATA
|
||||||
|
%token CASE
|
||||||
|
%token OF
|
||||||
|
%token OCURLY
|
||||||
|
%token CCURLY
|
||||||
|
%token OPAREN
|
||||||
|
%token CPAREN
|
||||||
|
%token COMMA
|
||||||
|
%token ARROW
|
||||||
|
%token EQUAL
|
||||||
|
%token <std::string> LID
|
||||||
|
%token <std::string> UID
|
||||||
|
|
||||||
|
%language "c++"
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.token.constructor
|
||||||
|
|
||||||
|
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
||||||
|
%type <std::vector<definition_ptr>> program definitions
|
||||||
|
%type <std::vector<branch_ptr>> branches
|
||||||
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
|
%type <definition_ptr> definition defn data
|
||||||
|
%type <branch_ptr> branch
|
||||||
|
%type <pattern_ptr> pattern
|
||||||
|
%type <constructor_ptr> constructor
|
||||||
|
|
||||||
|
%start program
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
program
|
||||||
|
: definitions { program = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definitions
|
||||||
|
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definition
|
||||||
|
: defn { $$ = std::move($1); }
|
||||||
|
| data { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
defn
|
||||||
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
|
{ $$ = definition_ptr(
|
||||||
|
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lowercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
uppercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aAdd
|
||||||
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||||
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||||
|
| aMul { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aMul
|
||||||
|
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
|
||||||
|
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
|
||||||
|
| app { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
app
|
||||||
|
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
|
||||||
|
| appBase { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
appBase
|
||||||
|
: INT { $$ = ast_ptr(new ast_int($1)); }
|
||||||
|
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
|
||||||
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||||
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
|
| case { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
case
|
||||||
|
: CASE aAdd OF OCURLY branches CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
branches
|
||||||
|
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||||
|
;
|
||||||
|
|
||||||
|
branch
|
||||||
|
: pattern ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
pattern
|
||||||
|
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
|
||||||
|
| UID lowercaseParams
|
||||||
|
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
data
|
||||||
|
: DATA UID EQUAL OCURLY constructors CCURLY
|
||||||
|
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructors
|
||||||
|
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
|
||||||
|
| constructor
|
||||||
|
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructor
|
||||||
|
: UID uppercaseParams
|
||||||
|
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
159
code/compiler/07/runtime.c
Normal file
159
code/compiler/07/runtime.c
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <memory.h>
|
||||||
|
#include "runtime.h"
|
||||||
|
|
||||||
|
struct node_base* alloc_node() {
|
||||||
|
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||||
|
assert(new_node != NULL);
|
||||||
|
return new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
||||||
|
struct node_app* node = (struct node_app*) alloc_node();
|
||||||
|
node->base.tag = NODE_APP;
|
||||||
|
node->left = l;
|
||||||
|
node->right = r;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_num* alloc_num(int32_t n) {
|
||||||
|
struct node_num* node = (struct node_num*) alloc_node();
|
||||||
|
node->base.tag = NODE_NUM;
|
||||||
|
node->value = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a) {
|
||||||
|
struct node_global* node = (struct node_global*) alloc_node();
|
||||||
|
node->base.tag = NODE_GLOBAL;
|
||||||
|
node->arity = a;
|
||||||
|
node->function = f;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n) {
|
||||||
|
struct node_ind* node = (struct node_ind*) alloc_node();
|
||||||
|
node->base.tag = NODE_IND;
|
||||||
|
node->next = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_init(struct stack* s) {
|
||||||
|
s->size = 4;
|
||||||
|
s->count = 0;
|
||||||
|
s->data = malloc(sizeof(*s->data) * s->size);
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_free(struct stack* s) {
|
||||||
|
free(s->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_push(struct stack* s, struct node_base* n) {
|
||||||
|
while(s->count >= s->size) {
|
||||||
|
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
s->data[s->count++] = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_pop(struct stack* s) {
|
||||||
|
assert(s->count > 0);
|
||||||
|
return s->data[--s->count];
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o) {
|
||||||
|
assert(s->count > o);
|
||||||
|
return s->data[s->count - o - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_popn(struct stack* s, size_t n) {
|
||||||
|
assert(s->count >= n);
|
||||||
|
s->count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_slide(struct stack* s, size_t n) {
|
||||||
|
assert(s->count > n);
|
||||||
|
s->data[s->count - n - 1] = s->data[s->count - 1];
|
||||||
|
s->count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_update(struct stack* s, size_t o) {
|
||||||
|
assert(s->count > o + 1);
|
||||||
|
struct node_ind* ind = (struct node_ind*) s->data[s->count - o - 2];
|
||||||
|
ind->base.tag = NODE_IND;
|
||||||
|
ind->next = s->data[s->count -= 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_alloc(struct stack* s, size_t o) {
|
||||||
|
while(o--) {
|
||||||
|
stack_push(s, (struct node_base*) alloc_ind(NULL));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_pack(struct stack* s, size_t n, int8_t t) {
|
||||||
|
assert(s->count >= n);
|
||||||
|
|
||||||
|
struct node_base** data = malloc(sizeof(*data) * n);
|
||||||
|
assert(data != NULL);
|
||||||
|
memcpy(data, &s->data[s->count - 1 - n], n * sizeof(*data));
|
||||||
|
|
||||||
|
struct node_data* new_node = (struct node_data*) alloc_node();
|
||||||
|
new_node->array = data;
|
||||||
|
new_node->base.tag = NODE_DATA;
|
||||||
|
new_node->tag = t;
|
||||||
|
|
||||||
|
stack_popn(s, n);
|
||||||
|
stack_push(s, (struct node_base*) new_node);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_split(struct stack* s, size_t n) {
|
||||||
|
struct node_data* node = (struct node_data*) stack_pop(s);
|
||||||
|
for(size_t i = 0; i < n; i++) {
|
||||||
|
stack_push(s, node->array[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void unwind(struct stack* s) {
|
||||||
|
while(1) {
|
||||||
|
struct node_base* peek = stack_peek(s, 0);
|
||||||
|
if(peek->tag == NODE_APP) {
|
||||||
|
struct node_app* n = (struct node_app*) peek;
|
||||||
|
stack_push(s, n->left);
|
||||||
|
} else if(peek->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* n = (struct node_global*) peek;
|
||||||
|
assert(s->count > n->arity);
|
||||||
|
|
||||||
|
for(size_t i = 1; i <= n->arity; i++) {
|
||||||
|
s->data[s->count - i]
|
||||||
|
= ((struct node_app*) s->data[s->count - i - 1])->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
n->function(s);
|
||||||
|
} else if(peek->tag == NODE_IND) {
|
||||||
|
struct node_ind* n = (struct node_ind*) peek;
|
||||||
|
stack_pop(s);
|
||||||
|
stack_push(s, n->next);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* eval(struct node_base* n) {
|
||||||
|
struct stack program_stack;
|
||||||
|
stack_init(&program_stack);
|
||||||
|
stack_push(&program_stack, n);
|
||||||
|
unwind(&program_stack);
|
||||||
|
struct node_base* result = stack_pop(&program_stack);
|
||||||
|
stack_free(&program_stack);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void f_main(struct stack* s);
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
struct node_global* first_node = alloc_global(f_main, 0);
|
||||||
|
struct node_base* result = eval((struct node_base*) first_node);
|
||||||
|
}
|
||||||
70
code/compiler/07/runtime.h
Normal file
70
code/compiler/07/runtime.h
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
struct stack;
|
||||||
|
|
||||||
|
enum node_tag {
|
||||||
|
NODE_APP,
|
||||||
|
NODE_NUM,
|
||||||
|
NODE_GLOBAL,
|
||||||
|
NODE_IND,
|
||||||
|
NODE_DATA
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base {
|
||||||
|
enum node_tag tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_app {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_num {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_global {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t arity;
|
||||||
|
void (*function)(struct stack*);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_ind {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_data {
|
||||||
|
struct node_base base;
|
||||||
|
int8_t tag;
|
||||||
|
struct node_base** array;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base* alloc_node();
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
|
||||||
|
struct node_num* alloc_num(int32_t n);
|
||||||
|
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a);
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n);
|
||||||
|
|
||||||
|
struct stack {
|
||||||
|
size_t size;
|
||||||
|
size_t count;
|
||||||
|
struct node_base** data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void stack_init(struct stack* s);
|
||||||
|
void stack_free(struct stack* s);
|
||||||
|
void stack_push(struct stack* s, struct node_base* n);
|
||||||
|
struct node_base* stack_pop(struct stack* s);
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o);
|
||||||
|
void stack_popn(struct stack* s, size_t n);
|
||||||
|
void stack_slide(struct stack* s, size_t n);
|
||||||
|
void stack_update(struct stack* s, size_t o);
|
||||||
|
void stack_alloc(struct stack* s, size_t o);
|
||||||
|
void stack_pack(struct stack* s, size_t n, int8_t t);
|
||||||
|
void stack_split(struct stack* s, size_t n);
|
||||||
|
|
||||||
|
struct node_base* eval(struct node_base* n);
|
||||||
34
code/compiler/07/scanner.l
Normal file
34
code/compiler/07/scanner.l
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
%option noyywrap
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
#define YY_DECL yy::parser::symbol_type yylex()
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
[ \n]+ {}
|
||||||
|
\+ { return yy::parser::make_PLUS(); }
|
||||||
|
\* { return yy::parser::make_TIMES(); }
|
||||||
|
- { return yy::parser::make_MINUS(); }
|
||||||
|
\/ { return yy::parser::make_DIVIDE(); }
|
||||||
|
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
|
||||||
|
defn { return yy::parser::make_DEFN(); }
|
||||||
|
data { return yy::parser::make_DATA(); }
|
||||||
|
case { return yy::parser::make_CASE(); }
|
||||||
|
of { return yy::parser::make_OF(); }
|
||||||
|
\{ { return yy::parser::make_OCURLY(); }
|
||||||
|
\} { return yy::parser::make_CCURLY(); }
|
||||||
|
\( { return yy::parser::make_OPAREN(); }
|
||||||
|
\) { return yy::parser::make_CPAREN(); }
|
||||||
|
, { return yy::parser::make_COMMA(); }
|
||||||
|
-> { return yy::parser::make_ARROW(); }
|
||||||
|
= { return yy::parser::make_EQUAL(); }
|
||||||
|
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
|
||||||
|
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
|
||||||
|
|
||||||
|
%%
|
||||||
99
code/compiler/07/type.cpp
Normal file
99
code/compiler/07/type.cpp
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#include "type.hpp"
|
||||||
|
#include <sstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
auto it = mgr.types.find(name);
|
||||||
|
if(it != mgr.types.end()) {
|
||||||
|
it->second->print(mgr, to);
|
||||||
|
} else {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
left->print(mgr, to);
|
||||||
|
to << " -> (";
|
||||||
|
right->print(mgr, to);
|
||||||
|
to << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string type_mgr::new_type_name() {
|
||||||
|
int temp = last_id++;
|
||||||
|
std::string str = "";
|
||||||
|
|
||||||
|
while(temp != -1) {
|
||||||
|
str += (char) ('a' + (temp % 26));
|
||||||
|
temp = temp / 26 - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse(str.begin(), str.end());
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_type() {
|
||||||
|
return type_ptr(new type_var(new_type_name()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_arrow_type() {
|
||||||
|
return type_ptr(new type_arr(new_type(), new_type()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||||
|
type_var* cast;
|
||||||
|
|
||||||
|
var = nullptr;
|
||||||
|
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||||
|
auto it = types.find(cast->name);
|
||||||
|
|
||||||
|
if(it == types.end()) {
|
||||||
|
var = cast;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
t = it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
|
type_var* lvar;
|
||||||
|
type_var* rvar;
|
||||||
|
type_arr* larr;
|
||||||
|
type_arr* rarr;
|
||||||
|
type_base* lid;
|
||||||
|
type_base* rid;
|
||||||
|
|
||||||
|
l = resolve(l, lvar);
|
||||||
|
r = resolve(r, rvar);
|
||||||
|
|
||||||
|
if(lvar) {
|
||||||
|
bind(lvar->name, r);
|
||||||
|
return;
|
||||||
|
} else if(rvar) {
|
||||||
|
bind(rvar->name, l);
|
||||||
|
return;
|
||||||
|
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||||
|
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||||
|
unify(larr->left, rarr->left);
|
||||||
|
unify(larr->right, rarr->right);
|
||||||
|
return;
|
||||||
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
|
if(lid->name == rid->name) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw unification_error(l, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||||
|
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||||
|
|
||||||
|
if(other && other->name == s) return;
|
||||||
|
types[s] = t;
|
||||||
|
}
|
||||||
65
code/compiler/07/type.hpp
Normal file
65
code/compiler/07/type.hpp
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct type_mgr;
|
||||||
|
|
||||||
|
struct type {
|
||||||
|
virtual ~type() = default;
|
||||||
|
|
||||||
|
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using type_ptr = std::shared_ptr<type>;
|
||||||
|
|
||||||
|
struct type_var : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_var(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_base : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_base(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_data : public type_base {
|
||||||
|
struct constructor {
|
||||||
|
int tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<std::string, constructor> constructors;
|
||||||
|
|
||||||
|
type_data(std::string n)
|
||||||
|
: type_base(std::move(n)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_arr : public type {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
type_arr(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_mgr {
|
||||||
|
int last_id = 0;
|
||||||
|
std::map<std::string, type_ptr> types;
|
||||||
|
|
||||||
|
std::string new_type_name();
|
||||||
|
type_ptr new_type();
|
||||||
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
|
void unify(type_ptr l, type_ptr r);
|
||||||
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
|
void bind(const std::string& s, type_ptr t);
|
||||||
|
};
|
||||||
16
code/compiler/07/type_env.cpp
Normal file
16
code/compiler/07/type_env.cpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
type_ptr type_env::lookup(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) return it->second;
|
||||||
|
if(parent) return parent->lookup(name);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::bind(const std::string& name, type_ptr t) {
|
||||||
|
names[name] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_env type_env::scope() const {
|
||||||
|
return type_env(this);
|
||||||
|
}
|
||||||
16
code/compiler/07/type_env.hpp
Normal file
16
code/compiler/07/type_env.hpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <map>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_env {
|
||||||
|
std::map<std::string, type_ptr> names;
|
||||||
|
type_env const* parent = nullptr;
|
||||||
|
|
||||||
|
type_env(type_env const* p)
|
||||||
|
: parent(p) {}
|
||||||
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
|
type_ptr lookup(const std::string& name) const;
|
||||||
|
void bind(const std::string& name, type_ptr t);
|
||||||
|
type_env scope() const;
|
||||||
|
};
|
||||||
42
code/compiler/08/CMakeLists.txt
Normal file
42
code/compiler/08/CMakeLists.txt
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.1)
|
||||||
|
project(compiler)
|
||||||
|
|
||||||
|
# Find all the required packages
|
||||||
|
find_package(BISON)
|
||||||
|
find_package(FLEX)
|
||||||
|
find_package(LLVM REQUIRED CONFIG)
|
||||||
|
|
||||||
|
# Set up the flex and bison targets
|
||||||
|
bison_target(parser
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
|
||||||
|
COMPILE_FLAGS "-d")
|
||||||
|
flex_target(scanner
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
|
||||||
|
add_flex_bison_dependency(scanner parser)
|
||||||
|
|
||||||
|
# Find all the relevant LLVM components
|
||||||
|
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||||
|
|
||||||
|
# Create compiler executable
|
||||||
|
add_executable(compiler
|
||||||
|
ast.cpp ast.hpp definition.cpp
|
||||||
|
llvm_context.cpp llvm_context.hpp
|
||||||
|
type_env.cpp type_env.hpp
|
||||||
|
env.cpp env.hpp
|
||||||
|
type.cpp type.hpp
|
||||||
|
error.cpp error.hpp
|
||||||
|
binop.cpp binop.hpp
|
||||||
|
instruction.cpp instruction.hpp
|
||||||
|
${BISON_parser_OUTPUTS}
|
||||||
|
${FLEX_scanner_OUTPUTS}
|
||||||
|
main.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configure compiler executable
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS})
|
||||||
|
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
|
||||||
|
target_link_libraries(compiler ${LLVM_LIBS})
|
||||||
264
code/compiler/08/ast.cpp
Normal file
264
code/compiler/08/ast.cpp
Normal file
@@ -0,0 +1,264 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
|
||||||
|
node_type = typecheck(mgr, env);
|
||||||
|
return node_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast::resolve_common(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr resolved_type = mgr.resolve(node_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
|
||||||
|
resolve(mgr);
|
||||||
|
node_type = std::move(resolved_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "INT: " << value << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return type_ptr(new type_base("Int"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(
|
||||||
|
env->has_variable(id) ?
|
||||||
|
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||||
|
(instruction*) new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "UID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BINOP: " << op_name(op) << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
type_ptr ftype = env.lookup(op_name(op));
|
||||||
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
|
||||||
|
|
||||||
|
mgr.unify(arrow_two, ftype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "APP:" << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
mgr.unify(arrow, ltype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "CASE: " << std::endl;
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
print_indent(indent + 1, to);
|
||||||
|
branch->pat->print(to);
|
||||||
|
to << std::endl;
|
||||||
|
branch->expr->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
|
||||||
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
branch->pat->match(case_type, mgr, new_env);
|
||||||
|
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
case_type = mgr.resolve(case_type, var);
|
||||||
|
if(!dynamic_cast<type_data*>(case_type.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
|
return branch_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::resolve(const type_mgr& mgr) const {
|
||||||
|
of->resolve_common(mgr);
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
branch->expr->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
|
||||||
|
|
||||||
|
of->compile(env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|
||||||
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
|
into.push_back(instruction_ptr(jump_instruction));
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
std::vector<instruction_ptr> branch_instructions;
|
||||||
|
pattern_var* vpat;
|
||||||
|
pattern_constr* cpat;
|
||||||
|
|
||||||
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
break;
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
}
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
||||||
|
cpat->params.size())));
|
||||||
|
branch->expr->compile(new_env, branch_instructions);
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||||
|
cpat->params.size())));
|
||||||
|
|
||||||
|
int new_tag = type->constructors[cpat->constr].tag;
|
||||||
|
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("technically not a type error: duplicate pattern");
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[new_tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("non-total pattern");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::print(std::ostream& to) const {
|
||||||
|
to << var;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
env.bind(var, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::print(std::ostream& to) const {
|
||||||
|
to << constr;
|
||||||
|
for(auto& param : params) {
|
||||||
|
to << " " << param;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
type_ptr constructor_type = env.lookup(constr);
|
||||||
|
if(!constructor_type) {
|
||||||
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < params.size(); i++) {
|
||||||
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
|
env.bind(params[i], arr->left);
|
||||||
|
constructor_type = arr->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr.unify(t, constructor_type);
|
||||||
|
}
|
||||||
141
code/compiler/08/ast.hpp
Normal file
141
code/compiler/08/ast.hpp
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
struct ast {
|
||||||
|
type_ptr node_type;
|
||||||
|
|
||||||
|
virtual ~ast() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||||
|
virtual void compile(const env_ptr& env,
|
||||||
|
std::vector<instruction_ptr>& into) const = 0;
|
||||||
|
|
||||||
|
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||||
|
void resolve_common(const type_mgr& mgr);
|
||||||
|
};
|
||||||
|
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct pattern {
|
||||||
|
virtual ~pattern() = default;
|
||||||
|
|
||||||
|
virtual void print(std::ostream& to) const = 0;
|
||||||
|
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using pattern_ptr = std::unique_ptr<pattern>;
|
||||||
|
|
||||||
|
struct branch {
|
||||||
|
pattern_ptr pat;
|
||||||
|
ast_ptr expr;
|
||||||
|
|
||||||
|
branch(pattern_ptr p, ast_ptr a)
|
||||||
|
: pat(std::move(p)), expr(std::move(a)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using branch_ptr = std::unique_ptr<branch>;
|
||||||
|
|
||||||
|
struct ast_int : public ast {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
explicit ast_int(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_lid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_lid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_uid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_uid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_binop : public ast {
|
||||||
|
binop op;
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_binop(binop o, ast_ptr l, ast_ptr r)
|
||||||
|
: op(o), left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_app : public ast {
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_app(ast_ptr l, ast_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_case : public ast {
|
||||||
|
ast_ptr of;
|
||||||
|
std::vector<branch_ptr> branches;
|
||||||
|
|
||||||
|
ast_case(ast_ptr o, std::vector<branch_ptr> b)
|
||||||
|
: of(std::move(o)), branches(std::move(b)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_var : public pattern {
|
||||||
|
std::string var;
|
||||||
|
|
||||||
|
pattern_var(std::string v)
|
||||||
|
: var(std::move(v)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_constr : public pattern {
|
||||||
|
std::string constr;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
|
||||||
|
pattern_constr(std::string c, std::vector<std::string> p)
|
||||||
|
: constr(std::move(c)), params(std::move(p)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr&, type_env& env) const;
|
||||||
|
};
|
||||||
21
code/compiler/08/binop.cpp
Normal file
21
code/compiler/08/binop.cpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#include "binop.hpp"
|
||||||
|
|
||||||
|
std::string op_name(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "+";
|
||||||
|
case MINUS: return "-";
|
||||||
|
case TIMES: return "*";
|
||||||
|
case DIVIDE: return "/";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string op_action(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "plus";
|
||||||
|
case MINUS: return "minus";
|
||||||
|
case TIMES: return "times";
|
||||||
|
case DIVIDE: return "divide";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
12
code/compiler/08/binop.hpp
Normal file
12
code/compiler/08/binop.hpp
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum binop {
|
||||||
|
PLUS,
|
||||||
|
MINUS,
|
||||||
|
TIMES,
|
||||||
|
DIVIDE
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string op_name(binop op);
|
||||||
|
std::string op_action(binop op);
|
||||||
116
code/compiler/08/definition.cpp
Normal file
116
code/compiler/08/definition.cpp
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
#include "definition.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/Type.h>
|
||||||
|
|
||||||
|
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
return_type = mgr.new_type();
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
type_ptr param_type = mgr.new_type();
|
||||||
|
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||||
|
param_types.push_back(param_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(name, full_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
auto param_it = params.begin();
|
||||||
|
auto type_it = param_types.rbegin();
|
||||||
|
|
||||||
|
while(param_it != params.end() && type_it != param_types.rend()) {
|
||||||
|
new_env.bind(*param_it, *type_it);
|
||||||
|
param_it++;
|
||||||
|
type_it++;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr body_type = body->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(return_type, body_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::resolve(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
body->resolve_common(mgr);
|
||||||
|
|
||||||
|
return_type = mgr.resolve(return_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
for(auto& param_type : param_types) {
|
||||||
|
param_type = mgr.resolve(param_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::compile() {
|
||||||
|
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
body->compile(new_env, instructions);
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||||
|
}
|
||||||
|
void definition_defn::gen_llvm_first(llvm_context& ctx) {
|
||||||
|
generated_function = ctx.create_custom_function(name, params.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::gen_llvm_second(llvm_context& ctx) {
|
||||||
|
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, generated_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
type_data* this_type = new type_data(name);
|
||||||
|
type_ptr return_type = type_ptr(this_type);
|
||||||
|
int next_tag = 0;
|
||||||
|
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
constructor->tag = next_tag;
|
||||||
|
this_type->constructors[constructor->name] = { next_tag++ };
|
||||||
|
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||||
|
type_ptr type = type_ptr(new type_base(*it));
|
||||||
|
full_type = type_ptr(new type_arr(type, full_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(constructor->name, full_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::resolve(const type_mgr& mgr) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::compile() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_first(llvm_context& ctx) {
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
auto new_function =
|
||||||
|
ctx.create_custom_function(constructor->name, constructor->types.size());
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
ctx.create_pack(new_function,
|
||||||
|
ctx.create_size(constructor->types.size()),
|
||||||
|
ctx.create_i8(constructor->tag)
|
||||||
|
);
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_second(llvm_context& ctx) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
73
code/compiler/08/definition.hpp
Normal file
73
code/compiler/08/definition.hpp
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
struct ast;
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct definition {
|
||||||
|
virtual ~definition() = default;
|
||||||
|
|
||||||
|
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
|
||||||
|
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) = 0;
|
||||||
|
virtual void compile() = 0;
|
||||||
|
virtual void gen_llvm_first(llvm_context& ctx) = 0;
|
||||||
|
virtual void gen_llvm_second(llvm_context& ctx) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using definition_ptr = std::unique_ptr<definition>;
|
||||||
|
|
||||||
|
struct constructor {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> types;
|
||||||
|
int8_t tag;
|
||||||
|
|
||||||
|
constructor(std::string n, std::vector<std::string> ts)
|
||||||
|
: name(std::move(n)), types(std::move(ts)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using constructor_ptr = std::unique_ptr<constructor>;
|
||||||
|
|
||||||
|
struct definition_defn : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
type_ptr return_type;
|
||||||
|
std::vector<type_ptr> param_types;
|
||||||
|
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
|
||||||
|
llvm::Function* generated_function;
|
||||||
|
|
||||||
|
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
|
||||||
|
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct definition_data : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<constructor_ptr> constructors;
|
||||||
|
|
||||||
|
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
||||||
|
: name(std::move(n)), constructors(std::move(cs)) {}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
|
};
|
||||||
23
code/compiler/08/env.cpp
Normal file
23
code/compiler/08/env.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
int env_var::get_offset(const std::string& name) const {
|
||||||
|
if(name == this->name) return 0;
|
||||||
|
if(parent) return parent->get_offset(name) + 1;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_var::has_variable(const std::string& name) const {
|
||||||
|
if(name == this->name) return true;
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int env_offset::get_offset(const std::string& name) const {
|
||||||
|
if(parent) return parent->get_offset(name) + offset;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_offset::has_variable(const std::string& name) const {
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
34
code/compiler/08/env.hpp
Normal file
34
code/compiler/08/env.hpp
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct env {
|
||||||
|
virtual ~env() = default;
|
||||||
|
|
||||||
|
virtual int get_offset(const std::string& name) const = 0;
|
||||||
|
virtual bool has_variable(const std::string& name) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using env_ptr = std::shared_ptr<env>;
|
||||||
|
|
||||||
|
struct env_var : public env {
|
||||||
|
std::string name;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_var(std::string& n, env_ptr p)
|
||||||
|
: name(std::move(n)), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct env_offset : public env {
|
||||||
|
int offset;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_offset(int o, env_ptr p)
|
||||||
|
: offset(o), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
5
code/compiler/08/error.cpp
Normal file
5
code/compiler/08/error.cpp
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* type_error::what() const noexcept {
|
||||||
|
return "an error occured while checking the types of the program";
|
||||||
|
}
|
||||||
21
code/compiler/08/error.hpp
Normal file
21
code/compiler/08/error.hpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_error : std::exception {
|
||||||
|
std::string description;
|
||||||
|
|
||||||
|
type_error(std::string d)
|
||||||
|
: description(std::move(d)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct unification_error : public type_error {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
unification_error(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)),
|
||||||
|
type_error("failed to unify types") {}
|
||||||
|
};
|
||||||
2
code/compiler/08/examples/bad1.txt
Normal file
2
code/compiler/08/examples/bad1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
defn main = { 3 + True }
|
||||||
1
code/compiler/08/examples/bad2.txt
Normal file
1
code/compiler/08/examples/bad2.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
defn main = { 1 2 3 4 5 }
|
||||||
8
code/compiler/08/examples/bad3.txt
Normal file
8
code/compiler/08/examples/bad3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x y z -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
31
code/compiler/08/examples/runtime1.c
Normal file
31
code/compiler/08/examples/runtime1.c
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#include "../runtime.h"
|
||||||
|
|
||||||
|
void f_add(struct stack* s) {
|
||||||
|
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
|
||||||
|
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
|
||||||
|
}
|
||||||
|
|
||||||
|
void f_main(struct stack* s) {
|
||||||
|
// PushInt 320
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(320));
|
||||||
|
|
||||||
|
// PushInt 6
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(6));
|
||||||
|
|
||||||
|
// PushGlobal f_add (the function for +)
|
||||||
|
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
|
||||||
|
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
}
|
||||||
3
code/compiler/08/examples/works1.txt
Normal file
3
code/compiler/08/examples/works1.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
defn main = { sum 320 6 }
|
||||||
|
defn sum x y = { x + y }
|
||||||
|
|
||||||
3
code/compiler/08/examples/works2.txt
Normal file
3
code/compiler/08/examples/works2.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
defn add x y = { x + y }
|
||||||
|
defn double x = { add x x }
|
||||||
|
defn main = { double 163 }
|
||||||
8
code/compiler/08/examples/works3.txt
Normal file
8
code/compiler/08/examples/works3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
defn length l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> { 1 + length xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||||
16
code/compiler/08/examples/works4.txt
Normal file
16
code/compiler/08/examples/works4.txt
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn add x y = { x + y }
|
||||||
|
defn mul x y = { x * y }
|
||||||
|
|
||||||
|
defn foldr f b l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { b }
|
||||||
|
Cons x xs -> { f x (foldr f b xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +
|
||||||
|
foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))))
|
||||||
|
}
|
||||||
17
code/compiler/08/examples/works5.txt
Normal file
17
code/compiler/08/examples/works5.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn sumZip l m = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> {
|
||||||
|
case m of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons y ys -> { x + y + sumZip xs ys }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn ones = { Cons 1 ones }
|
||||||
|
|
||||||
|
defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||||
177
code/compiler/08/instruction.cpp
Normal file
177
code/compiler/08/instruction.cpp
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/BasicBlock.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushInt(" << value << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_num(ctx.create_i32(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushGlobal(" << name << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto& global_f = ctx.custom_functions.at("f_" + name);
|
||||||
|
auto arity = ctx.create_i32(global_f->arity);
|
||||||
|
ctx.create_push(f, ctx.create_global(global_f->function, arity));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Push(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pop(" << count << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_popn(f, ctx.create_size(count));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "MkApp()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left = ctx.create_pop(f);
|
||||||
|
auto right = ctx.create_pop(f);
|
||||||
|
ctx.create_push(f, ctx.create_app(left, right));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Update(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_update(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pack(" << tag << ", " << size << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Split()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_split(f, ctx.create_size(size));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Jump(" << std::endl;
|
||||||
|
for(auto& instruction_set : branches) {
|
||||||
|
for(auto& instruction : instruction_set) {
|
||||||
|
instruction->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
to << std::endl;
|
||||||
|
}
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||||
|
auto tag = ctx.unwrap_data_tag(top_node);
|
||||||
|
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
|
||||||
|
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||||
|
std::vector<BasicBlock*> blocks;
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
|
||||||
|
ctx.builder.SetInsertPoint(branch_block);
|
||||||
|
for(auto& instruction : branch) {
|
||||||
|
instruction->gen_llvm(ctx, f);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateBr(safety_block);
|
||||||
|
blocks.push_back(branch_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& mapping : tag_mappings) {
|
||||||
|
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.builder.SetInsertPoint(safety_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Slide(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_slide(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BinOp(" << op_action(op) << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
llvm::Value* result;
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
|
||||||
|
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
|
||||||
|
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
|
||||||
|
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
|
||||||
|
}
|
||||||
|
ctx.create_push(f, ctx.create_num(result));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Eval()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_eval(ctx.create_pop(f)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Alloc(" << amount << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_alloc(f, ctx.create_size(amount));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Unwind()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
142
code/compiler/08/instruction.hpp
Normal file
142
code/compiler/08/instruction.hpp
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
|
||||||
|
struct instruction {
|
||||||
|
virtual ~instruction() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using instruction_ptr = std::unique_ptr<instruction>;
|
||||||
|
|
||||||
|
struct instruction_pushint : public instruction {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
instruction_pushint(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pushglobal : public instruction {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
instruction_pushglobal(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_push : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_push(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pop : public instruction {
|
||||||
|
int count;
|
||||||
|
|
||||||
|
instruction_pop(int c)
|
||||||
|
: count(c) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_mkapp : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_update : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_update(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pack : public instruction {
|
||||||
|
int tag;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_pack(int t, int s)
|
||||||
|
: tag(t), size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_split : public instruction {
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_split(int s)
|
||||||
|
: size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_jump : public instruction {
|
||||||
|
std::vector<std::vector<instruction_ptr>> branches;
|
||||||
|
std::map<int, int> tag_mappings;
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_slide : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_slide(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_binop : public instruction {
|
||||||
|
binop op;
|
||||||
|
|
||||||
|
instruction_binop(binop o)
|
||||||
|
: op(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_eval : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_alloc : public instruction {
|
||||||
|
int amount;
|
||||||
|
|
||||||
|
instruction_alloc(int a)
|
||||||
|
: amount(a) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_unwind : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
252
code/compiler/08/llvm_context.cpp
Normal file
252
code/compiler/08/llvm_context.cpp
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
void llvm_context::create_types() {
|
||||||
|
stack_type = StructType::create(ctx, "stack");
|
||||||
|
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||||
|
tag_type = IntegerType::getInt8Ty(ctx);
|
||||||
|
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||||
|
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||||
|
struct_types["node_num"] = StructType::create(ctx, "node_num");
|
||||||
|
struct_types["node_global"] = StructType::create(ctx, "node_global");
|
||||||
|
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||||
|
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||||
|
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||||
|
function_type = FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false);
|
||||||
|
|
||||||
|
struct_types.at("node_base")->setBody(
|
||||||
|
IntegerType::getInt32Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_app")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type,
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_num")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt32Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_global")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false)
|
||||||
|
);
|
||||||
|
struct_types.at("node_ind")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_data")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt8Ty(ctx),
|
||||||
|
PointerType::getUnqual(node_ptr_type)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void llvm_context::create_functions() {
|
||||||
|
auto void_type = Type::getVoidTy(ctx);
|
||||||
|
auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8);
|
||||||
|
functions["stack_init"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_init",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_free"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_free",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_push"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_pop"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_pop",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_peek"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_peek",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_popn"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_popn",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_slide"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_slide",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_update"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_update",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_alloc"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_alloc",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_pack"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type, tag_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_pack",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_split"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_split",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
auto int32_type = IntegerType::getInt32Ty(ctx);
|
||||||
|
functions["alloc_app"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_app",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_num"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_num",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_global"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { function_type, int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_global",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_ind"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_ind",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
functions["eval"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"eval",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantInt* llvm_context::create_i8(int8_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(8, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_i32(int32_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(32, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_size(size_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_pop(Function* f) {
|
||||||
|
auto pop_f = functions.at("stack_pop");
|
||||||
|
return builder.CreateCall(pop_f, { f->arg_begin() });
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_peek(Function* f, Value* off) {
|
||||||
|
auto peek_f = functions.at("stack_peek");
|
||||||
|
return builder.CreateCall(peek_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_push(Function* f, Value* v) {
|
||||||
|
auto push_f = functions.at("stack_push");
|
||||||
|
builder.CreateCall(push_f, { f->arg_begin(), v });
|
||||||
|
}
|
||||||
|
void llvm_context::create_popn(Function* f, Value* off) {
|
||||||
|
auto popn_f = functions.at("stack_popn");
|
||||||
|
builder.CreateCall(popn_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_update(Function* f, Value* off) {
|
||||||
|
auto update_f = functions.at("stack_update");
|
||||||
|
builder.CreateCall(update_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
|
||||||
|
auto pack_f = functions.at("stack_pack");
|
||||||
|
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
|
||||||
|
}
|
||||||
|
void llvm_context::create_split(Function* f, Value* c) {
|
||||||
|
auto split_f = functions.at("stack_split");
|
||||||
|
builder.CreateCall(split_f, { f->arg_begin(), c });
|
||||||
|
}
|
||||||
|
void llvm_context::create_slide(Function* f, Value* off) {
|
||||||
|
auto slide_f = functions.at("stack_slide");
|
||||||
|
builder.CreateCall(slide_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_alloc(Function* f, Value* n) {
|
||||||
|
auto alloc_f = functions.at("stack_alloc");
|
||||||
|
builder.CreateCall(alloc_f, { f->arg_begin(), n });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_eval(Value* e) {
|
||||||
|
auto eval_f = functions.at("eval");
|
||||||
|
return builder.CreateCall(eval_f, { e });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_num(Value* v) {
|
||||||
|
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, num_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(int_ptr);
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_num(Value* v) {
|
||||||
|
auto alloc_num_f = functions.at("alloc_num");
|
||||||
|
return builder.CreateCall(alloc_num_f, { v });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_data_tag(Value* v) {
|
||||||
|
auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, data_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(tag_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_global(Value* f, Value* a) {
|
||||||
|
auto alloc_global_f = functions.at("alloc_global");
|
||||||
|
return builder.CreateCall(alloc_global_f, { f, a });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_app(Value* l, Value* r) {
|
||||||
|
auto alloc_app_f = functions.at("alloc_app");
|
||||||
|
return builder.CreateCall(alloc_app_f, { l, r });
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
|
||||||
|
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||||
|
auto function_type =
|
||||||
|
llvm::FunctionType::get(void_type, { stack_ptr_type }, false);
|
||||||
|
auto new_function = llvm::Function::Create(
|
||||||
|
function_type,
|
||||||
|
llvm::Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"f_" + name,
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function);
|
||||||
|
|
||||||
|
auto new_custom_f = custom_function_ptr(new custom_function());
|
||||||
|
new_custom_f->arity = arity;
|
||||||
|
new_custom_f->function = new_function;
|
||||||
|
custom_functions["f_" + name] = std::move(new_custom_f);
|
||||||
|
|
||||||
|
return new_function;
|
||||||
|
}
|
||||||
66
code/compiler/08/llvm_context.hpp
Normal file
66
code/compiler/08/llvm_context.hpp
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/LLVMContext.h>
|
||||||
|
#include <llvm/IR/IRBuilder.h>
|
||||||
|
#include <llvm/IR/Module.h>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct llvm_context {
|
||||||
|
struct custom_function {
|
||||||
|
llvm::Function* function;
|
||||||
|
int32_t arity;
|
||||||
|
};
|
||||||
|
|
||||||
|
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||||
|
|
||||||
|
llvm::LLVMContext ctx;
|
||||||
|
llvm::IRBuilder<> builder;
|
||||||
|
llvm::Module module;
|
||||||
|
|
||||||
|
std::map<std::string, custom_function_ptr> custom_functions;
|
||||||
|
std::map<std::string, llvm::Function*> functions;
|
||||||
|
std::map<std::string, llvm::StructType*> struct_types;
|
||||||
|
|
||||||
|
llvm::StructType* stack_type;
|
||||||
|
llvm::PointerType* stack_ptr_type;
|
||||||
|
llvm::PointerType* node_ptr_type;
|
||||||
|
llvm::IntegerType* tag_type;
|
||||||
|
llvm::FunctionType* function_type;
|
||||||
|
|
||||||
|
llvm_context()
|
||||||
|
: builder(ctx), module("bloglang", ctx) {
|
||||||
|
create_types();
|
||||||
|
create_functions();
|
||||||
|
}
|
||||||
|
|
||||||
|
void create_types();
|
||||||
|
void create_functions();
|
||||||
|
|
||||||
|
llvm::ConstantInt* create_i8(int8_t);
|
||||||
|
llvm::ConstantInt* create_i32(int32_t);
|
||||||
|
llvm::ConstantInt* create_size(size_t);
|
||||||
|
|
||||||
|
llvm::Value* create_pop(llvm::Function*);
|
||||||
|
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||||
|
void create_push(llvm::Function*, llvm::Value*);
|
||||||
|
void create_popn(llvm::Function*, llvm::Value*);
|
||||||
|
void create_update(llvm::Function*, llvm::Value*);
|
||||||
|
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
void create_split(llvm::Function*, llvm::Value*);
|
||||||
|
void create_slide(llvm::Function*, llvm::Value*);
|
||||||
|
void create_alloc(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_eval(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_num(llvm::Value*);
|
||||||
|
llvm::Value* create_num(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_global(llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_app(llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Function* create_custom_function(std::string name, int32_t arity);
|
||||||
|
};
|
||||||
174
code/compiler/08/main.cpp
Normal file
174
code/compiler/08/main.cpp
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
|
#include "llvm/IR/Verifier.h"
|
||||||
|
#include "llvm/Support/TargetSelect.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "llvm/Support/FileSystem.h"
|
||||||
|
#include "llvm/Target/TargetOptions.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
void yy::parser::error(const std::string& msg) {
|
||||||
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern std::vector<definition_ptr> program;
|
||||||
|
|
||||||
|
void typecheck_program(
|
||||||
|
const std::vector<definition_ptr>& prog,
|
||||||
|
type_mgr& mgr, type_env& env) {
|
||||||
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
|
int_type,
|
||||||
|
type_ptr(new type_arr(int_type, int_type))));
|
||||||
|
|
||||||
|
env.bind("+", binop_type);
|
||||||
|
env.bind("-", binop_type);
|
||||||
|
env.bind("*", binop_type);
|
||||||
|
env.bind("/", binop_type);
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_first(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_second(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& pair : env.names) {
|
||||||
|
std::cout << pair.first << ": ";
|
||||||
|
pair.second->print(mgr, std::cout);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->resolve(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile_program(const std::vector<definition_ptr>& prog) {
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->compile();
|
||||||
|
|
||||||
|
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
|
||||||
|
if(!defn) continue;
|
||||||
|
for(auto& instruction : defn->instructions) {
|
||||||
|
instruction->print(0, std::cout);
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
||||||
|
auto new_function = ctx.create_custom_function(op_action(op), 2);
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void output_llvm(llvm_context& ctx, const std::string& filename) {
|
||||||
|
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||||
|
|
||||||
|
llvm::InitializeNativeTarget();
|
||||||
|
llvm::InitializeNativeTargetAsmParser();
|
||||||
|
llvm::InitializeNativeTargetAsmPrinter();
|
||||||
|
|
||||||
|
std::string error;
|
||||||
|
const llvm::Target* target =
|
||||||
|
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||||
|
if (!target) {
|
||||||
|
std::cerr << error << std::endl;
|
||||||
|
} else {
|
||||||
|
std::string cpu = "generic";
|
||||||
|
std::string features = "";
|
||||||
|
llvm::TargetOptions options;
|
||||||
|
llvm::TargetMachine* targetMachine =
|
||||||
|
target->createTargetMachine(targetTriple, cpu, features,
|
||||||
|
options, llvm::Optional<llvm::Reloc::Model>());
|
||||||
|
|
||||||
|
ctx.module.setDataLayout(targetMachine->createDataLayout());
|
||||||
|
ctx.module.setTargetTriple(targetTriple);
|
||||||
|
|
||||||
|
std::error_code ec;
|
||||||
|
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
||||||
|
if (ec) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
llvm::TargetMachine::CodeGenFileType type = llvm::TargetMachine::CGFT_ObjectFile;
|
||||||
|
llvm::legacy::PassManager pm;
|
||||||
|
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
pm.run(ctx.module);
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm(const std::vector<definition_ptr>& prog) {
|
||||||
|
llvm_context ctx;
|
||||||
|
gen_llvm_internal_op(ctx, PLUS);
|
||||||
|
gen_llvm_internal_op(ctx, MINUS);
|
||||||
|
gen_llvm_internal_op(ctx, TIMES);
|
||||||
|
gen_llvm_internal_op(ctx, DIVIDE);
|
||||||
|
|
||||||
|
for(auto& definition : prog) {
|
||||||
|
definition->gen_llvm_first(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& definition : prog) {
|
||||||
|
definition->gen_llvm_second(ctx);
|
||||||
|
}
|
||||||
|
ctx.module.print(llvm::outs(), nullptr);
|
||||||
|
output_llvm(ctx, "program.o");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
yy::parser parser;
|
||||||
|
type_mgr mgr;
|
||||||
|
type_env env;
|
||||||
|
|
||||||
|
parser.parse();
|
||||||
|
for(auto& definition : program) {
|
||||||
|
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
||||||
|
if(!def) continue;
|
||||||
|
|
||||||
|
std::cout << def->name;
|
||||||
|
for(auto& param : def->params) std::cout << " " << param;
|
||||||
|
std::cout << ":" << std::endl;
|
||||||
|
|
||||||
|
def->body->print(1, std::cout);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
typecheck_program(program, mgr, env);
|
||||||
|
compile_program(program);
|
||||||
|
gen_llvm(program);
|
||||||
|
} catch(unification_error& err) {
|
||||||
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
|
std::cout << " (1) \033[34m";
|
||||||
|
err.left->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
std::cout << " (2) \033[32m";
|
||||||
|
err.right->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
} catch(type_error& err) {
|
||||||
|
std::cout << "failed to type check program: " << err.description << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
141
code/compiler/08/parser.y
Normal file
141
code/compiler/08/parser.y
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
%{
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
std::vector<definition_ptr> program;
|
||||||
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%token PLUS
|
||||||
|
%token TIMES
|
||||||
|
%token MINUS
|
||||||
|
%token DIVIDE
|
||||||
|
%token <int> INT
|
||||||
|
%token DEFN
|
||||||
|
%token DATA
|
||||||
|
%token CASE
|
||||||
|
%token OF
|
||||||
|
%token OCURLY
|
||||||
|
%token CCURLY
|
||||||
|
%token OPAREN
|
||||||
|
%token CPAREN
|
||||||
|
%token COMMA
|
||||||
|
%token ARROW
|
||||||
|
%token EQUAL
|
||||||
|
%token <std::string> LID
|
||||||
|
%token <std::string> UID
|
||||||
|
|
||||||
|
%language "c++"
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.token.constructor
|
||||||
|
|
||||||
|
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
||||||
|
%type <std::vector<definition_ptr>> program definitions
|
||||||
|
%type <std::vector<branch_ptr>> branches
|
||||||
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
|
%type <definition_ptr> definition defn data
|
||||||
|
%type <branch_ptr> branch
|
||||||
|
%type <pattern_ptr> pattern
|
||||||
|
%type <constructor_ptr> constructor
|
||||||
|
|
||||||
|
%start program
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
program
|
||||||
|
: definitions { program = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definitions
|
||||||
|
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definition
|
||||||
|
: defn { $$ = std::move($1); }
|
||||||
|
| data { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
defn
|
||||||
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
|
{ $$ = definition_ptr(
|
||||||
|
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lowercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
uppercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aAdd
|
||||||
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||||
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||||
|
| aMul { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aMul
|
||||||
|
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
|
||||||
|
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
|
||||||
|
| app { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
app
|
||||||
|
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
|
||||||
|
| appBase { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
appBase
|
||||||
|
: INT { $$ = ast_ptr(new ast_int($1)); }
|
||||||
|
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
|
||||||
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||||
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
|
| case { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
case
|
||||||
|
: CASE aAdd OF OCURLY branches CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
branches
|
||||||
|
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||||
|
;
|
||||||
|
|
||||||
|
branch
|
||||||
|
: pattern ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
pattern
|
||||||
|
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
|
||||||
|
| UID lowercaseParams
|
||||||
|
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
data
|
||||||
|
: DATA UID EQUAL OCURLY constructors CCURLY
|
||||||
|
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructors
|
||||||
|
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
|
||||||
|
| constructor
|
||||||
|
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructor
|
||||||
|
: UID uppercaseParams
|
||||||
|
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
183
code/compiler/08/runtime.c
Normal file
183
code/compiler/08/runtime.c
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <memory.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "runtime.h"
|
||||||
|
|
||||||
|
struct node_base* alloc_node() {
|
||||||
|
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||||
|
assert(new_node != NULL);
|
||||||
|
return new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
||||||
|
struct node_app* node = (struct node_app*) alloc_node();
|
||||||
|
node->base.tag = NODE_APP;
|
||||||
|
node->left = l;
|
||||||
|
node->right = r;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_num* alloc_num(int32_t n) {
|
||||||
|
struct node_num* node = (struct node_num*) alloc_node();
|
||||||
|
node->base.tag = NODE_NUM;
|
||||||
|
node->value = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a) {
|
||||||
|
struct node_global* node = (struct node_global*) alloc_node();
|
||||||
|
node->base.tag = NODE_GLOBAL;
|
||||||
|
node->arity = a;
|
||||||
|
node->function = f;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n) {
|
||||||
|
struct node_ind* node = (struct node_ind*) alloc_node();
|
||||||
|
node->base.tag = NODE_IND;
|
||||||
|
node->next = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_init(struct stack* s) {
|
||||||
|
s->size = 4;
|
||||||
|
s->count = 0;
|
||||||
|
s->data = malloc(sizeof(*s->data) * s->size);
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_free(struct stack* s) {
|
||||||
|
free(s->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_push(struct stack* s, struct node_base* n) {
|
||||||
|
while(s->count >= s->size) {
|
||||||
|
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
s->data[s->count++] = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_pop(struct stack* s) {
|
||||||
|
assert(s->count > 0);
|
||||||
|
return s->data[--s->count];
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o) {
|
||||||
|
assert(s->count > o);
|
||||||
|
return s->data[s->count - o - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_popn(struct stack* s, size_t n) {
|
||||||
|
assert(s->count >= n);
|
||||||
|
s->count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_slide(struct stack* s, size_t n) {
|
||||||
|
assert(s->count > n);
|
||||||
|
s->data[s->count - n - 1] = s->data[s->count - 1];
|
||||||
|
s->count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_update(struct stack* s, size_t o) {
|
||||||
|
assert(s->count > o + 1);
|
||||||
|
struct node_ind* ind = (struct node_ind*) s->data[s->count - o - 2];
|
||||||
|
ind->base.tag = NODE_IND;
|
||||||
|
ind->next = s->data[s->count -= 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_alloc(struct stack* s, size_t o) {
|
||||||
|
while(o--) {
|
||||||
|
stack_push(s, (struct node_base*) alloc_ind(NULL));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_pack(struct stack* s, size_t n, int8_t t) {
|
||||||
|
assert(s->count >= n);
|
||||||
|
|
||||||
|
struct node_base** data = malloc(sizeof(*data) * n);
|
||||||
|
assert(data != NULL);
|
||||||
|
memcpy(data, &s->data[s->count - n], n * sizeof(*data));
|
||||||
|
|
||||||
|
struct node_data* new_node = (struct node_data*) alloc_node();
|
||||||
|
new_node->array = data;
|
||||||
|
new_node->base.tag = NODE_DATA;
|
||||||
|
new_node->tag = t;
|
||||||
|
|
||||||
|
stack_popn(s, n);
|
||||||
|
stack_push(s, (struct node_base*) new_node);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_split(struct stack* s, size_t n) {
|
||||||
|
struct node_data* node = (struct node_data*) stack_pop(s);
|
||||||
|
for(size_t i = 0; i < n; i++) {
|
||||||
|
stack_push(s, node->array[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void unwind(struct stack* s) {
|
||||||
|
while(1) {
|
||||||
|
struct node_base* peek = stack_peek(s, 0);
|
||||||
|
if(peek->tag == NODE_APP) {
|
||||||
|
struct node_app* n = (struct node_app*) peek;
|
||||||
|
stack_push(s, n->left);
|
||||||
|
} else if(peek->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* n = (struct node_global*) peek;
|
||||||
|
assert(s->count > n->arity);
|
||||||
|
|
||||||
|
for(size_t i = 1; i <= n->arity; i++) {
|
||||||
|
s->data[s->count - i]
|
||||||
|
= ((struct node_app*) s->data[s->count - i - 1])->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
n->function(s);
|
||||||
|
} else if(peek->tag == NODE_IND) {
|
||||||
|
struct node_ind* n = (struct node_ind*) peek;
|
||||||
|
stack_pop(s);
|
||||||
|
stack_push(s, n->next);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* eval(struct node_base* n) {
|
||||||
|
struct stack program_stack;
|
||||||
|
stack_init(&program_stack);
|
||||||
|
stack_push(&program_stack, n);
|
||||||
|
unwind(&program_stack);
|
||||||
|
struct node_base* result = stack_pop(&program_stack);
|
||||||
|
stack_free(&program_stack);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void f_main(struct stack* s);
|
||||||
|
|
||||||
|
void print_node(struct node_base* n) {
|
||||||
|
if(n->tag == NODE_APP) {
|
||||||
|
struct node_app* app = (struct node_app*) n;
|
||||||
|
print_node(app->left);
|
||||||
|
putchar(' ');
|
||||||
|
print_node(app->right);
|
||||||
|
} else if(n->tag == NODE_DATA) {
|
||||||
|
printf("(Packed)");
|
||||||
|
} else if(n->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* global = (struct node_global*) n;
|
||||||
|
printf("(Global: %p)", global->function);
|
||||||
|
} else if(n->tag == NODE_IND) {
|
||||||
|
print_node(((struct node_ind*) n)->next);
|
||||||
|
} else if(n->tag == NODE_NUM) {
|
||||||
|
struct node_num* num = (struct node_num*) n;
|
||||||
|
printf("%d", num->value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
struct node_global* first_node = alloc_global(f_main, 0);
|
||||||
|
struct node_base* result = eval((struct node_base*) first_node);
|
||||||
|
|
||||||
|
printf("Result: ");
|
||||||
|
print_node(result);
|
||||||
|
putchar('\n');
|
||||||
|
}
|
||||||
70
code/compiler/08/runtime.h
Normal file
70
code/compiler/08/runtime.h
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
struct stack;
|
||||||
|
|
||||||
|
enum node_tag {
|
||||||
|
NODE_APP,
|
||||||
|
NODE_NUM,
|
||||||
|
NODE_GLOBAL,
|
||||||
|
NODE_IND,
|
||||||
|
NODE_DATA
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base {
|
||||||
|
enum node_tag tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_app {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_num {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_global {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t arity;
|
||||||
|
void (*function)(struct stack*);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_ind {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_data {
|
||||||
|
struct node_base base;
|
||||||
|
int8_t tag;
|
||||||
|
struct node_base** array;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base* alloc_node();
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
|
||||||
|
struct node_num* alloc_num(int32_t n);
|
||||||
|
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a);
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n);
|
||||||
|
|
||||||
|
struct stack {
|
||||||
|
size_t size;
|
||||||
|
size_t count;
|
||||||
|
struct node_base** data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void stack_init(struct stack* s);
|
||||||
|
void stack_free(struct stack* s);
|
||||||
|
void stack_push(struct stack* s, struct node_base* n);
|
||||||
|
struct node_base* stack_pop(struct stack* s);
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o);
|
||||||
|
void stack_popn(struct stack* s, size_t n);
|
||||||
|
void stack_slide(struct stack* s, size_t n);
|
||||||
|
void stack_update(struct stack* s, size_t o);
|
||||||
|
void stack_alloc(struct stack* s, size_t o);
|
||||||
|
void stack_pack(struct stack* s, size_t n, int8_t t);
|
||||||
|
void stack_split(struct stack* s, size_t n);
|
||||||
|
|
||||||
|
struct node_base* eval(struct node_base* n);
|
||||||
35
code/compiler/08/scanner.l
Normal file
35
code/compiler/08/scanner.l
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
%option noyywrap
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
#define YY_DECL yy::parser::symbol_type yylex()
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
[ \n]+ {}
|
||||||
|
\+ { return yy::parser::make_PLUS(); }
|
||||||
|
\* { return yy::parser::make_TIMES(); }
|
||||||
|
- { return yy::parser::make_MINUS(); }
|
||||||
|
\/ { return yy::parser::make_DIVIDE(); }
|
||||||
|
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
|
||||||
|
defn { return yy::parser::make_DEFN(); }
|
||||||
|
data { return yy::parser::make_DATA(); }
|
||||||
|
case { return yy::parser::make_CASE(); }
|
||||||
|
of { return yy::parser::make_OF(); }
|
||||||
|
\{ { return yy::parser::make_OCURLY(); }
|
||||||
|
\} { return yy::parser::make_CCURLY(); }
|
||||||
|
\( { return yy::parser::make_OPAREN(); }
|
||||||
|
\) { return yy::parser::make_CPAREN(); }
|
||||||
|
, { return yy::parser::make_COMMA(); }
|
||||||
|
-> { return yy::parser::make_ARROW(); }
|
||||||
|
= { return yy::parser::make_EQUAL(); }
|
||||||
|
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
|
||||||
|
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
|
||||||
|
|
||||||
|
%%
|
||||||
99
code/compiler/08/type.cpp
Normal file
99
code/compiler/08/type.cpp
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#include "type.hpp"
|
||||||
|
#include <sstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
auto it = mgr.types.find(name);
|
||||||
|
if(it != mgr.types.end()) {
|
||||||
|
it->second->print(mgr, to);
|
||||||
|
} else {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
left->print(mgr, to);
|
||||||
|
to << " -> (";
|
||||||
|
right->print(mgr, to);
|
||||||
|
to << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string type_mgr::new_type_name() {
|
||||||
|
int temp = last_id++;
|
||||||
|
std::string str = "";
|
||||||
|
|
||||||
|
while(temp != -1) {
|
||||||
|
str += (char) ('a' + (temp % 26));
|
||||||
|
temp = temp / 26 - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse(str.begin(), str.end());
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_type() {
|
||||||
|
return type_ptr(new type_var(new_type_name()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_arrow_type() {
|
||||||
|
return type_ptr(new type_arr(new_type(), new_type()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||||
|
type_var* cast;
|
||||||
|
|
||||||
|
var = nullptr;
|
||||||
|
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||||
|
auto it = types.find(cast->name);
|
||||||
|
|
||||||
|
if(it == types.end()) {
|
||||||
|
var = cast;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
t = it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
|
type_var* lvar;
|
||||||
|
type_var* rvar;
|
||||||
|
type_arr* larr;
|
||||||
|
type_arr* rarr;
|
||||||
|
type_base* lid;
|
||||||
|
type_base* rid;
|
||||||
|
|
||||||
|
l = resolve(l, lvar);
|
||||||
|
r = resolve(r, rvar);
|
||||||
|
|
||||||
|
if(lvar) {
|
||||||
|
bind(lvar->name, r);
|
||||||
|
return;
|
||||||
|
} else if(rvar) {
|
||||||
|
bind(rvar->name, l);
|
||||||
|
return;
|
||||||
|
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||||
|
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||||
|
unify(larr->left, rarr->left);
|
||||||
|
unify(larr->right, rarr->right);
|
||||||
|
return;
|
||||||
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
|
if(lid->name == rid->name) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw unification_error(l, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||||
|
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||||
|
|
||||||
|
if(other && other->name == s) return;
|
||||||
|
types[s] = t;
|
||||||
|
}
|
||||||
65
code/compiler/08/type.hpp
Normal file
65
code/compiler/08/type.hpp
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct type_mgr;
|
||||||
|
|
||||||
|
struct type {
|
||||||
|
virtual ~type() = default;
|
||||||
|
|
||||||
|
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using type_ptr = std::shared_ptr<type>;
|
||||||
|
|
||||||
|
struct type_var : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_var(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_base : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_base(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_data : public type_base {
|
||||||
|
struct constructor {
|
||||||
|
int tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<std::string, constructor> constructors;
|
||||||
|
|
||||||
|
type_data(std::string n)
|
||||||
|
: type_base(std::move(n)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_arr : public type {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
type_arr(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_mgr {
|
||||||
|
int last_id = 0;
|
||||||
|
std::map<std::string, type_ptr> types;
|
||||||
|
|
||||||
|
std::string new_type_name();
|
||||||
|
type_ptr new_type();
|
||||||
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
|
void unify(type_ptr l, type_ptr r);
|
||||||
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
|
void bind(const std::string& s, type_ptr t);
|
||||||
|
};
|
||||||
16
code/compiler/08/type_env.cpp
Normal file
16
code/compiler/08/type_env.cpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
type_ptr type_env::lookup(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) return it->second;
|
||||||
|
if(parent) return parent->lookup(name);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::bind(const std::string& name, type_ptr t) {
|
||||||
|
names[name] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_env type_env::scope() const {
|
||||||
|
return type_env(this);
|
||||||
|
}
|
||||||
16
code/compiler/08/type_env.hpp
Normal file
16
code/compiler/08/type_env.hpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <map>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_env {
|
||||||
|
std::map<std::string, type_ptr> names;
|
||||||
|
type_env const* parent = nullptr;
|
||||||
|
|
||||||
|
type_env(type_env const* p)
|
||||||
|
: parent(p) {}
|
||||||
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
|
type_ptr lookup(const std::string& name) const;
|
||||||
|
void bind(const std::string& name, type_ptr t);
|
||||||
|
type_env scope() const;
|
||||||
|
};
|
||||||
@@ -2,12 +2,11 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 0 - Intro
|
title: Compiling a Functional Language Using C++, Part 0 - Intro
|
||||||
date: 2019-08-03T01:02:30-07:00
|
date: 2019-08-03T01:02:30-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
draft: true
|
|
||||||
---
|
---
|
||||||
During my last academic term, I was enrolled in a compilers course.
|
During my last academic term, I was enrolled in a compilers course.
|
||||||
We had a final project - develop a compiler for a basic Python subset,
|
We had a final project - develop a compiler for a basic Python subset,
|
||||||
using LLVM. It was a little boring - virtually nothing about the compiler
|
using LLVM. It was a little boring - virtually nothing about the compiler
|
||||||
was __not__ covered in class, and it felt more like putting two puzzles
|
was __not__ covered in class, and it felt more like putting two puzzle
|
||||||
pieces together than building a real project.
|
pieces together than building a real project.
|
||||||
|
|
||||||
Instead, I chose to implement a compiler for a functional programming language,
|
Instead, I chose to implement a compiler for a functional programming language,
|
||||||
@@ -138,3 +137,6 @@ Here are the posts that I've written so far for this series:
|
|||||||
* [Typechecking]({{< relref "03_compiler_typechecking.md" >}})
|
* [Typechecking]({{< relref "03_compiler_typechecking.md" >}})
|
||||||
* [Small Improvements]({{< relref "04_compiler_improvements.md" >}})
|
* [Small Improvements]({{< relref "04_compiler_improvements.md" >}})
|
||||||
* [Execution]({{< relref "05_compiler_execution.md" >}})
|
* [Execution]({{< relref "05_compiler_execution.md" >}})
|
||||||
|
* [Compilation]({{< relref "06_compiler_compilation.md" >}})
|
||||||
|
* [Runtime]({{< relref "07_compiler_runtime.md" >}})
|
||||||
|
* [LLVM]({{< relref "08_compiler_llvm.md" >}})
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 1 - Tokenizing
|
title: Compiling a Functional Language Using C++, Part 1 - Tokenizing
|
||||||
date: 2019-08-03T01:02:30-07:00
|
date: 2019-08-03T01:02:30-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
draft: true
|
|
||||||
---
|
---
|
||||||
It makes sense to build a compiler bit by bit, following the stages we outlined in
|
It makes sense to build a compiler bit by bit, following the stages we outlined in
|
||||||
the first post of the series. This is because these stages are essentially a pipeline,
|
the first post of the series. This is because these stages are essentially a pipeline,
|
||||||
@@ -48,7 +47,7 @@ are fairly simple - one or more digits is an integer, a few letters together
|
|||||||
are a variable name. In order to be able to efficiently break text up into
|
are a variable name. In order to be able to efficiently break text up into
|
||||||
such tokens, we restrict ourselves to __regular languages__. A language
|
such tokens, we restrict ourselves to __regular languages__. A language
|
||||||
is defined as a set of strings (potentially infinite), and a regular
|
is defined as a set of strings (potentially infinite), and a regular
|
||||||
language for which we can write a __regular expression__ to check if
|
language is one for which we can write a __regular expression__ to check if
|
||||||
a string is in the set. Regular expressions are a way of representing
|
a string is in the set. Regular expressions are a way of representing
|
||||||
patterns that a string has to match. We define regular expressions
|
patterns that a string has to match. We define regular expressions
|
||||||
as follows:
|
as follows:
|
||||||
@@ -77,7 +76,7 @@ Let's see some examples. An integer, such as 326, can be represented with \\([0-
|
|||||||
This means, one or more characters between 0 or 9. Some (most) regex implementations
|
This means, one or more characters between 0 or 9. Some (most) regex implementations
|
||||||
have a special symbol for \\([0-9]\\), written as \\(\\setminus d\\). A variable,
|
have a special symbol for \\([0-9]\\), written as \\(\\setminus d\\). A variable,
|
||||||
starting with a lowercase letter and containing lowercase or uppercase letters after it,
|
starting with a lowercase letter and containing lowercase or uppercase letters after it,
|
||||||
can be written as \\(\[a-z\]([a-z]+)?\\). Again, most regex implementations provide
|
can be written as \\(\[a-z\]([a-zA-Z]+)?\\). Again, most regex implementations provide
|
||||||
a special operator for \\((r_1+)?\\), written as \\(r_1*\\).
|
a special operator for \\((r_1+)?\\), written as \\(r_1*\\).
|
||||||
|
|
||||||
So how does one go about checking if a regular expression matches a string? An efficient way is to
|
So how does one go about checking if a regular expression matches a string? An efficient way is to
|
||||||
@@ -115,8 +114,8 @@ represent numbers directly into numbers, and do other small tasks.
|
|||||||
|
|
||||||
So, what tokens do we have? From our arithmetic definition, we see that we have integers.
|
So, what tokens do we have? From our arithmetic definition, we see that we have integers.
|
||||||
Let's use the regex `[0-9]+` for those. We also have the operators `+`, `-`, `*`, and `/`.
|
Let's use the regex `[0-9]+` for those. We also have the operators `+`, `-`, `*`, and `/`.
|
||||||
`-` is simple enough: the corresponding regex is `-`. We need to
|
The regex for `-` is simple enough: it's just `-`. However, we need to
|
||||||
preface our `/`, `+` and `*` with a backslash, though, since they happen to also be modifiers
|
preface our `/`, `+` and `*` with a backslash, since they happen to also be modifiers
|
||||||
in flex's regular expressions: `\/`, `\+`, `\*`.
|
in flex's regular expressions: `\/`, `\+`, `\*`.
|
||||||
|
|
||||||
Let's also represent some reserved keywords. We'll say that `defn`, `data`, `case`, and `of`
|
Let's also represent some reserved keywords. We'll say that `defn`, `data`, `case`, and `of`
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 2 - Parsing
|
title: Compiling a Functional Language Using C++, Part 2 - Parsing
|
||||||
date: 2019-08-03T01:02:30-07:00
|
date: 2019-08-03T01:02:30-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
draft: true
|
|
||||||
---
|
---
|
||||||
In the previous post, we covered tokenizing. We learned how to convert an input string into logical segments, and even wrote up a tokenizer to do it according to the rules of our language. Now, it's time to make sense of the tokens, and parse our language.
|
In the previous post, we covered tokenizing. We learned how to convert an input string into logical segments, and even wrote up a tokenizer to do it according to the rules of our language. Now, it's time to make sense of the tokens, and parse our language.
|
||||||
|
|
||||||
@@ -38,7 +37,7 @@ $$
|
|||||||
In practice, there are many ways of using a CFG to parse a programming language. Various parsing algorithms support various subsets
|
In practice, there are many ways of using a CFG to parse a programming language. Various parsing algorithms support various subsets
|
||||||
of context free languages. For instance, top down parsers follow nearly exactly the structure that we had. They try to parse
|
of context free languages. For instance, top down parsers follow nearly exactly the structure that we had. They try to parse
|
||||||
a nonterminal by trying to match each symbol in its body. In the rule \\(S \\rightarrow \\alpha \\beta \\gamma\\), it will
|
a nonterminal by trying to match each symbol in its body. In the rule \\(S \\rightarrow \\alpha \\beta \\gamma\\), it will
|
||||||
first try to match \\(alpha\\), then \\(beta\\), and so on. If one of the three contains a nonterminal, it will attempt to parse
|
first try to match \\(\\alpha\\), then \\(\\beta\\), and so on. If one of the three contains a nonterminal, it will attempt to parse
|
||||||
that nonterminal following the same strategy. However, this leaves a flaw - For instance, consider the grammar
|
that nonterminal following the same strategy. However, this leaves a flaw - For instance, consider the grammar
|
||||||
$$
|
$$
|
||||||
\\begin{align}
|
\\begin{align}
|
||||||
@@ -105,7 +104,7 @@ A\_{add} & \\rightarrow A\_{add}-A\_{mult} \\\\\\
|
|||||||
A\_{add} & \\rightarrow A\_{mult}
|
A\_{add} & \\rightarrow A\_{mult}
|
||||||
\\end{align}
|
\\end{align}
|
||||||
$$
|
$$
|
||||||
The first rule matches another addition, added to the result of another addition. We use the addition in the body
|
The first rule matches another addition, added to the result of a multiplication. Similarly, the second rule matches another addition, from which the result of a multiplication is then subtracted. We use the \\(A\_{add}\\) on the left side of \\(+\\) and \\(-\\) in the body
|
||||||
because we want to be able to parse strings like `1+2+3+4`, which we want to view as `((1+2)+3)+4` (mostly because
|
because we want to be able to parse strings like `1+2+3+4`, which we want to view as `((1+2)+3)+4` (mostly because
|
||||||
subtraction is [left-associative](https://en.wikipedia.org/wiki/Operator_associativity)). So, we want the top level
|
subtraction is [left-associative](https://en.wikipedia.org/wiki/Operator_associativity)). So, we want the top level
|
||||||
of the tree to be the rightmost `+` or `-`, since that means it will be the "last" operation. You may be asking,
|
of the tree to be the rightmost `+` or `-`, since that means it will be the "last" operation. You may be asking,
|
||||||
@@ -150,7 +149,7 @@ What's the last \\(C\\)? We also want a "thing" to be a case expression. Here ar
|
|||||||
$$
|
$$
|
||||||
\\begin{align}
|
\\begin{align}
|
||||||
C & \\rightarrow \\text{case} \\; A\_{add} \\; \\text{of} \\; \\{ L\_B\\} \\\\\\
|
C & \\rightarrow \\text{case} \\; A\_{add} \\; \\text{of} \\; \\{ L\_B\\} \\\\\\
|
||||||
L\_B & \\rightarrow R \\; , \\; L\_B \\\\\\
|
L\_B & \\rightarrow R \\; L\_B \\\\\\
|
||||||
L\_B & \\rightarrow R \\\\\\
|
L\_B & \\rightarrow R \\\\\\
|
||||||
R & \\rightarrow N \\; \\text{arrow} \\; \\{ A\_{add} \\} \\\\\\
|
R & \\rightarrow N \\; \\text{arrow} \\; \\{ A\_{add} \\} \\\\\\
|
||||||
N & \\rightarrow \\text{lowerVar} \\\\\\
|
N & \\rightarrow \\text{lowerVar} \\\\\\
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
---
|
---
|
||||||
title: Compiling a Functional Language Using C++, Part 3 - Type Checking
|
title: Compiling a Functional Language Using C++, Part 3 - Type Checking
|
||||||
date: 2019-08-06T14:26:38-07:00
|
date: 2019-08-06T14:26:38-07:00
|
||||||
draft: true
|
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
---
|
---
|
||||||
I think tokenizing and parsing are boring. The thing is, looking at syntax
|
I think tokenizing and parsing are boring. The thing is, looking at syntax
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
---
|
---
|
||||||
title: Compiling a Functional Language Using C++, Part 4 - Small Improvements
|
title: Compiling a Functional Language Using C++, Part 4 - Small Improvements
|
||||||
date: 2019-08-06T14:26:38-07:00
|
date: 2019-08-06T14:26:38-07:00
|
||||||
draft: true
|
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
---
|
---
|
||||||
We've done quite a big push in the previous post. We defined
|
We've done quite a big push in the previous post. We defined
|
||||||
@@ -65,8 +64,8 @@ Finally, just like `ast_case::typecheck` called
|
|||||||
|
|
||||||
We follow the same implementation strategy for patterns,
|
We follow the same implementation strategy for patterns,
|
||||||
but we don't need indentation, or recursion:
|
but we don't need indentation, or recursion:
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 114 116 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 115 117 >}}
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 122 127 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 123 128 >}}
|
||||||
|
|
||||||
In `main`, let's print the bodies of each function we receive from the parser:
|
In `main`, let's print the bodies of each function we receive from the parser:
|
||||||
{{< codelines "C++" "compiler/04/main.cpp" 47 56 >}}
|
{{< codelines "C++" "compiler/04/main.cpp" 47 56 >}}
|
||||||
@@ -160,12 +159,12 @@ we simply pass the type of the expression to be case analyzed into
|
|||||||
the pattern matching method. However, since we don't want
|
the pattern matching method. However, since we don't want
|
||||||
case analysis on functions, we ensure that the type of the expression
|
case analysis on functions, we ensure that the type of the expression
|
||||||
is `type_base`. If not, we report this:
|
is `type_base`. If not, we report this:
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 100 102 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 107 110 >}}
|
||||||
|
|
||||||
The next exception is in `pattern_constr::match`. It occurs
|
The next exception is in `pattern_constr::match`. It occurs
|
||||||
when the pattern has a constructor we don't recognize, and
|
when the pattern has a constructor we don't recognize, and
|
||||||
that's exactly what we report:
|
that's exactly what we report:
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 131 133 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 132 134 >}}
|
||||||
|
|
||||||
The next exception occurs in a loop, when we bind
|
The next exception occurs in a loop, when we bind
|
||||||
types for each of the constructor pattern's variables.
|
types for each of the constructor pattern's variables.
|
||||||
@@ -174,7 +173,7 @@ constructor type to a `type_arr`. Conceptually,
|
|||||||
this means that the pattern wants to apply the
|
this means that the pattern wants to apply the
|
||||||
constructor to more parameters than it actually
|
constructor to more parameters than it actually
|
||||||
takes:
|
takes:
|
||||||
{{< codelines "C++" "compiler/04/ast.cpp" 137 137 >}}
|
{{< codelines "C++" "compiler/04/ast.cpp" 138 138 >}}
|
||||||
|
|
||||||
We remove the last throw at the bottom of `pattern_constr::match`.
|
We remove the last throw at the bottom of `pattern_constr::match`.
|
||||||
This is because once unification succeeds, we know
|
This is because once unification succeeds, we know
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
---
|
---
|
||||||
title: Compiling a Functional Language Using C++, Part 5 - Execution
|
title: Compiling a Functional Language Using C++, Part 5 - Execution
|
||||||
date: 2019-08-06T14:26:38-07:00
|
date: 2019-08-06T14:26:38-07:00
|
||||||
draft: true
|
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
---
|
---
|
||||||
{{< gmachine_css >}}
|
{{< gmachine_css >}}
|
||||||
@@ -47,7 +46,7 @@ defn snd p = {
|
|||||||
P x y -> { y }
|
P x y -> { y }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
defn slow x = { returns x after waiting for 4 seconds }
|
defn slow x = { returns x after waiting for 1 second }
|
||||||
defn main = { fst (P (slow 320) (slow 6)) }
|
defn main = { fst (P (slow 320) (slow 6)) }
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -559,7 +558,9 @@ rule to Unwind:
|
|||||||
{{< /gmachine_inner >}}
|
{{< /gmachine_inner >}}
|
||||||
{{< /gmachine >}}
|
{{< /gmachine >}}
|
||||||
|
|
||||||
Just one more! Sometimes, it's possible for a tree node to reference itself.
|
Just a couple more special-purpose instructions, and we're done!
|
||||||
|
|
||||||
|
Sometimes, it's possible for a tree node to reference itself.
|
||||||
For instance, Haskell defines the
|
For instance, Haskell defines the
|
||||||
[fixpoint combinator](https://en.wikipedia.org/wiki/Fixed-point_combinator)
|
[fixpoint combinator](https://en.wikipedia.org/wiki/Fixed-point_combinator)
|
||||||
as follows:
|
as follows:
|
||||||
@@ -587,9 +588,27 @@ We can allocate an indirection on the stack, and call Update on it when
|
|||||||
we've constructed a node. While we're constructing the tree, we can
|
we've constructed a node. While we're constructing the tree, we can
|
||||||
refer to the indirection when a self-reference is required.
|
refer to the indirection when a self-reference is required.
|
||||||
|
|
||||||
|
Lastly, we also define a Pop instruction, which just removes
|
||||||
|
some number of nodes from the stack. We want this because
|
||||||
|
calling Update at the end of a function modifies a node further up the stack,
|
||||||
|
leaving anything on top of the stack after that node as scratch work. We get
|
||||||
|
rid of that scratch work simply by popping it.
|
||||||
|
|
||||||
|
{{< gmachine "Pop" >}}
|
||||||
|
{{< gmachine_inner "Before">}}
|
||||||
|
\( \text{Pop} \; n : i \quad a_1, a_2, ..., a_n : s \quad d \quad h \quad m \)
|
||||||
|
{{< /gmachine_inner >}}
|
||||||
|
{{< gmachine_inner "After" >}}
|
||||||
|
\( i \quad s \quad d \quad h \quad m \)
|
||||||
|
{{< /gmachine_inner >}}
|
||||||
|
{{< gmachine_inner "Description" >}}
|
||||||
|
Pop \(n\) nodes from the stack.
|
||||||
|
{{< /gmachine_inner >}}
|
||||||
|
{{< /gmachine >}}
|
||||||
|
|
||||||
That's it for the instructions. Knowing them, however, doesn't
|
That's it for the instructions. Knowing them, however, doesn't
|
||||||
tell us what to do with our `ast` structs. We'll need to define
|
tell us what to do with our `ast` structs. We'll need to define
|
||||||
rules to translate trees into these instructions, and I've already
|
rules to translate trees into these instructions, and I've already
|
||||||
alluded to this when we went over `double 326`.
|
alluded to this when we went over `double 326`.
|
||||||
However, this has already gotten pretty long,
|
However, this has already gotten pretty long,
|
||||||
so we'll do it in the next post: (link me!)
|
so we'll do it in the next post: [Part 6 - Compilation]({{< relref "06_compiler_compilation.md" >}}).
|
||||||
|
|||||||
504
content/blog/06_compiler_compilation.md
Normal file
504
content/blog/06_compiler_compilation.md
Normal file
@@ -0,0 +1,504 @@
|
|||||||
|
---
|
||||||
|
title: Compiling a Functional Language Using C++, Part 6 - Compilation
|
||||||
|
date: 2019-08-06T14:26:38-07:00
|
||||||
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
---
|
||||||
|
In the previous post, we defined a machine for graph reduction,
|
||||||
|
called a G-machine. However, this machine is still not particularly
|
||||||
|
connected to __our__ language. In this post, we will give
|
||||||
|
meanings to programs in our language in the context of
|
||||||
|
this G-machine. We will define a __compilation scheme__,
|
||||||
|
which will be a set of rules that tell us how to
|
||||||
|
translate programs in our language into G-machine instructions.
|
||||||
|
To mirror _Implementing Functional Languages: a tutorial_, we'll
|
||||||
|
call this compilation scheme \\(\\mathcal{C}\\), and write it
|
||||||
|
as \\(\\mathcal{C} ⟦e⟧ = i\\), meaning "the expression \\(e\\)
|
||||||
|
compiles to the instructions \\(i\\)".
|
||||||
|
|
||||||
|
To follow our route from the typechecking, let's start
|
||||||
|
with compiling expressions that are numbers. It's pretty easy:
|
||||||
|
$$
|
||||||
|
\\mathcal{C} ⟦n⟧ = [\\text{PushInt} \\; n]
|
||||||
|
$$
|
||||||
|
|
||||||
|
Here, we compiled a number expression to a list of
|
||||||
|
instructions with only one element - PushInt.
|
||||||
|
|
||||||
|
Just like when we did typechecking, let's
|
||||||
|
move on to compiling function applications. As
|
||||||
|
we informally stated in the previous chapter, since
|
||||||
|
the thing we're applying has to be on top,
|
||||||
|
we want to compile it last:
|
||||||
|
|
||||||
|
$$
|
||||||
|
\\mathcal{C} ⟦e\_1 \\; e\_2⟧ = \\mathcal{C} ⟦e\_2⟧ ⧺ \\mathcal{C} ⟦e\_1⟧ ⧺ [\\text{MkApp}]
|
||||||
|
$$
|
||||||
|
|
||||||
|
Here, we used the \\(⧺\\) operator to represent the concatenation of two
|
||||||
|
lists. Otherwise, this should be pretty intutive - we first run the instructions
|
||||||
|
to create the parameter, then we run the instructions to create the function,
|
||||||
|
and finally, we combine them using MkApp.
|
||||||
|
|
||||||
|
It's variables that once again force us to adjust our strategy. If our
|
||||||
|
program is well-typed, we know our variable will be on the stack:
|
||||||
|
our definition of Unwind makes it so for functions, and we will
|
||||||
|
define our case expression compilation scheme to match. However,
|
||||||
|
we still need to know __where__ on the stack each variable is,
|
||||||
|
and this changes as the stack is modified.
|
||||||
|
|
||||||
|
To accommodate for this, we define an environment, \\(\\rho\\),
|
||||||
|
to be a partial function mapping variable names to thier
|
||||||
|
offsets on the stack. We write \\(\\rho = [x \\rightarrow n, y \\rightarrow m]\\)
|
||||||
|
to say "the environment \\(\\rho\\) maps variable \\(x\\) to stack offset \\(n\\),
|
||||||
|
and variable \\(y\\) to stack offset \\(m\\)". We also write \\(\\rho \\; x\\) to
|
||||||
|
say "look up \\(x\\) in \\(\\rho\\)", since \\(\\rho\\) is a function. Finally,
|
||||||
|
to help with the ever-changing stack, we define an augmented environment
|
||||||
|
\\(\\rho^{+n}\\), such that \\(\\rho^{+n} \\; x = \\rho \\; x + n\\). In words,
|
||||||
|
this basically means "\\(\\rho^{+n}\\) has all the variables from \\(\\rho\\),
|
||||||
|
but their addresses are incremented by \\(n\\)". We now pass \\(\\rho\\)
|
||||||
|
in to \\(\\mathcal{C}\\) together with the expression \\(e\\). Let's
|
||||||
|
rewrite our first two rules. For numbers:
|
||||||
|
|
||||||
|
$$
|
||||||
|
\\mathcal{C} ⟦n⟧ \\; \\rho = [\\text{PushInt} \\; n]
|
||||||
|
$$
|
||||||
|
|
||||||
|
For function application:
|
||||||
|
$$
|
||||||
|
\\mathcal{C} ⟦e\_1 \\; e\_2⟧ \\; \\rho = \\mathcal{C} ⟦e\_2⟧ \\; \\rho ⧺ \\mathcal{C} ⟦e\_1⟧ \\; \\rho^{+1} ⧺ [\\text{MkApp}]
|
||||||
|
$$
|
||||||
|
|
||||||
|
Notice how in that last rule, we passed in \\(\\rho^{+1}\\) when compiling the function's expression. This is because
|
||||||
|
the result of running the instructions for \\(e\_2\\) will have left on the stack the function's parameter. Whatever
|
||||||
|
was at the top of the stack (and thus, had index 0), is now the second element from the top (address 1). The
|
||||||
|
same is true for all other things that were on the stack. So, we increment the environment accordingly.
|
||||||
|
|
||||||
|
With the environment, the variable rule is simple:
|
||||||
|
$$
|
||||||
|
\\mathcal{C} ⟦x⟧ \\; \\rho = [\\text{Push} \\; (\\rho \\; x)]
|
||||||
|
$$
|
||||||
|
|
||||||
|
One more thing. If we run across a function name, we want to
|
||||||
|
use PushGlobal rather than Push. Defining \\(f\\) to be a name
|
||||||
|
of a global function, we capture this using the following rule:
|
||||||
|
|
||||||
|
$$
|
||||||
|
\\mathcal{C} ⟦f⟧ \\; \\rho = [\\text{PushGlobal} \\; f]
|
||||||
|
$$
|
||||||
|
|
||||||
|
Now it's time for us to compile case expressions, but there's a bit of
|
||||||
|
an issue - our case expressions branches don't map one-to-one with
|
||||||
|
the \\(t \\rightarrow i\_t\\) format of the Jump instruction.
|
||||||
|
This is because we allow for name patterns in the form \\(x\\),
|
||||||
|
which can possibly match more than one tag. Consider this
|
||||||
|
rather useless example:
|
||||||
|
|
||||||
|
```
|
||||||
|
data Bool = { True, False }
|
||||||
|
defn weird b = { case b of { b -> { False } } }
|
||||||
|
```
|
||||||
|
|
||||||
|
We only have one branch, but we have two tags that should
|
||||||
|
lead to it! Not only that, but variable patterns are
|
||||||
|
location-dependent: if a variable pattern comes
|
||||||
|
before a constructor pattern, then the constructor
|
||||||
|
pattern will never be reached. On the other hand,
|
||||||
|
if a constructor pattern comes before a variable
|
||||||
|
pattern, it will be tried before the varible pattern,
|
||||||
|
and thus is reachable.
|
||||||
|
|
||||||
|
We will ignore this problem for now - we will define our semantics
|
||||||
|
as though each case expression branch can match exactly one tag.
|
||||||
|
In our C++ code, we will write a conversion function that will
|
||||||
|
figure out which tag goes to which sequence of instructions.
|
||||||
|
Effectively, we'll be performing [desugaring](https://en.wikipedia.org/wiki/Syntactic_sugar).
|
||||||
|
|
||||||
|
Now, on to defining the compilation rules for case expressions.
|
||||||
|
It's helpful to define compiling a single branch of a case expression
|
||||||
|
separately. For a branch in the form \\(t \\; x\_1 \\; x\_2 \\; ... \\; x\_n \\rightarrow \text{body}\\),
|
||||||
|
we define a compilation scheme \\(\\mathcal{A}\\) as follows:
|
||||||
|
|
||||||
|
$$
|
||||||
|
\\begin{align}
|
||||||
|
\\mathcal{A} ⟦t \\; x\_1 \\; ... \\; x\_n \\rightarrow \text{body}⟧ \\; \\rho & =
|
||||||
|
t \\rightarrow [\\text{Split} \\; n] \\; ⧺ \\; \\mathcal{C}⟦\\text{body}⟧ \\; \\rho' \\; ⧺ \\; [\\text{Slide} \\; n] \\\\\\
|
||||||
|
\text{where} \\; \\rho' &= \\rho^{+n}[x\_1 \\rightarrow 0, ..., x\_n \\rightarrow n - 1]
|
||||||
|
\\end{align}
|
||||||
|
$$
|
||||||
|
|
||||||
|
First, we run Split - the node on the top of the stack is a packed constructor,
|
||||||
|
and we want access to its member variables, since they can be referenced by
|
||||||
|
the branch's body via \\(x\_i\\). For the same reason, we must make sure to include
|
||||||
|
\\(x\_1\\) through \\(x\_n\\) in our environment. Furthermore, since the split values now occupy the stack,
|
||||||
|
we have to offset our environment by \\(n\\) before adding bindings to our new variables.
|
||||||
|
Doing all these things gives us \\(\\rho'\\), which we use to compile the body, placing
|
||||||
|
the resulting instructions after Split. This leaves us with the desired graph on top of
|
||||||
|
the stack - the only thing left to do is to clean up the stack of the unpacked values,
|
||||||
|
which we do using Slide.
|
||||||
|
|
||||||
|
Notice that we didn't just create instructions - we created a mapping from the tag \\(t\\)
|
||||||
|
to the instructions that correspond to it.
|
||||||
|
|
||||||
|
Now, it's time for compiling the whole case expression. We first want
|
||||||
|
to construct the graph for the expression we want to perform case analysis on.
|
||||||
|
Next, we want to evaluate it (since we need a packed value, not a graph,
|
||||||
|
to read the tag). Finally, we perform a jump depending on the tag. This
|
||||||
|
is captured by the following rule:
|
||||||
|
|
||||||
|
$$
|
||||||
|
\\mathcal{C} ⟦\\text{case} \\; e \\; \\text{of} \\; \\text{alt}_1 ... \\text{alt}_n⟧ \\; \\rho =
|
||||||
|
\\mathcal{C} ⟦e⟧ \\; \\rho \\; ⧺ [\\text{Eval}, \\text{Jump} \\; [\\mathcal{A} ⟦\\text{alt}_1⟧ \; \\rho, ..., \\mathcal{A} ⟦\\text{alt}_n⟧ \; \\rho]]
|
||||||
|
$$
|
||||||
|
|
||||||
|
This works because \\(\\mathcal{A}\\) creates not only instructions,
|
||||||
|
but also a tag mapping. We simply populate our Jump instruction such mappings
|
||||||
|
resulting from compiling each branch.
|
||||||
|
|
||||||
|
You may have noticed that we didn't add rules for binary operators. Just like
|
||||||
|
with type checking, we treat them as function calls. However, rather that constructing
|
||||||
|
graphs when we have to instantiate those functions, we simply
|
||||||
|
evaluate the arguments and perform the relevant arithmetic operation using BinOp.
|
||||||
|
We will do a similar thing for constructors.
|
||||||
|
|
||||||
|
### Implementation
|
||||||
|
|
||||||
|
With that out of the way, we can get around to writing some code. Let's
|
||||||
|
first define C++ structs for the instructions of the G-machine:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/06/instruction.hpp" >}}
|
||||||
|
|
||||||
|
I omit the implementation of the various (trivial) `print` methods in this post;
|
||||||
|
as always, you can look at the full project source code, which is
|
||||||
|
freely available for each post in the series.
|
||||||
|
|
||||||
|
We can now envision a method on the `ast` struct that takes an environment
|
||||||
|
(just like our compilation scheme takes the environment \\(\\rho\\\)),
|
||||||
|
and compiles the `ast`. Rather than returning a vector
|
||||||
|
of instructions (which involves copying, unless we get some optimization kicking in),
|
||||||
|
we'll pass a reference to a vector to our method. The method will then place the generated
|
||||||
|
instructions into the vector.
|
||||||
|
|
||||||
|
There's one more thing to be considered. How do we tell apart a "global"
|
||||||
|
from a variable? A naive solution would be to take a list or map of
|
||||||
|
global functions as a third parameter to our `compile` method.
|
||||||
|
But there's an easier way! We know that the program passed type checking.
|
||||||
|
This means that every referenced variable exists. From then, the situation is easy -
|
||||||
|
if actual variable names are kept in the environment, \\(\\rho\\), then whenever
|
||||||
|
we see a variable that __isn't__ in the current environment, it must be a function name.
|
||||||
|
|
||||||
|
Having finished contemplating out method, it's time to define a signature:
|
||||||
|
```C++
|
||||||
|
virtual void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
```
|
||||||
|
|
||||||
|
Ah, but now we have to define "environment". Let's do that. Here's our header:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/06/env.hpp" >}}
|
||||||
|
|
||||||
|
And here's the source file:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/06/env.cpp" >}}
|
||||||
|
|
||||||
|
There's not that much to see here, but let's go through it anyway.
|
||||||
|
We define an environment as a linked list, kind of like
|
||||||
|
we did with the type environment. This time, though,
|
||||||
|
we use shared pointers instead of raw pointers to reference the parent.
|
||||||
|
I decided on this because we will need to be using virtual methods
|
||||||
|
(since we have two subclasses of `env`), and thus will need to
|
||||||
|
be passing the `env` by pointer. At that point, we might as well
|
||||||
|
use the "proper" way!
|
||||||
|
|
||||||
|
I implemented the environment as a linked list because it is, in essence,
|
||||||
|
a stack. However, not every "offset" in a stack is introduced by
|
||||||
|
binding variables - for instance, when we create an application node,
|
||||||
|
we first build the argument value on the stack, and then,
|
||||||
|
with that value still on the stack, build the left hand side of the application.
|
||||||
|
Thus, all the variable positions are offset by the presence of the argument
|
||||||
|
on the stack, and we must account for that. Similarly, in cases when we will
|
||||||
|
allocate space on the stack (we will run into these cases later), we will
|
||||||
|
need to account for that change. Thus, since we can increment
|
||||||
|
the offset by two ways (binding a variable and building something on the stack),
|
||||||
|
we allow for two types of nodes in our `env` stack.
|
||||||
|
|
||||||
|
During recursion we will be tweaking the return value of `get_offset` to
|
||||||
|
calculate the final location of a variable on the stack (if the
|
||||||
|
parent of a node returned offset `1`, but the node itself is a variable
|
||||||
|
node and thus introduces another offset, we need to return `2`). Because
|
||||||
|
of this, we cannot reasonably return a constant like `-1` (it will quickly
|
||||||
|
be made positive on a long list), and thus we throw an exception. To
|
||||||
|
allow for a safe way to check for an offset, without try-catch,
|
||||||
|
we also add a `has_variable` method which checks if the lookup will succeed.
|
||||||
|
A better approach would be to use `std::optional`, but it's C++17, so
|
||||||
|
we'll shy away from it.
|
||||||
|
|
||||||
|
It will also help to move some of the functions on the `binop` enum
|
||||||
|
into a separate file. The new neader is pretty small:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/06/binop.hpp" >}}
|
||||||
|
|
||||||
|
The new source file is not much longer:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/06/binop.cpp" >}}
|
||||||
|
|
||||||
|
And now, we begin our implementation. Let's start with the easy ones:
|
||||||
|
`ast_int`, `ast_lid` and `ast_uid`. The code for `ast_int` involves just pushing
|
||||||
|
the integer into the stack:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 36 38 >}}
|
||||||
|
|
||||||
|
The code for `ast_lid` needs to check if the variable is global or local,
|
||||||
|
just like we discussed:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 53 58 >}}
|
||||||
|
|
||||||
|
We do not have to do this for `ast_uid`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 73 75 >}}
|
||||||
|
|
||||||
|
On to `ast_binop`! This is the first time we have to change our environment.
|
||||||
|
As we said earlier, once we build the right operand on the stack, every offset that we counted
|
||||||
|
from the top of the stack will have been shifted by 1 (we see this
|
||||||
|
in our compilation scheme for function application). So,
|
||||||
|
we create a new environment with `env_offset`, and use that
|
||||||
|
when we compile the left child:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 103 110 >}}
|
||||||
|
|
||||||
|
`ast_binop` performs two applications: `(+) lhs rhs`.
|
||||||
|
We push `rhs`, then `lhs`, then `(+)`, and then use MkApp
|
||||||
|
twice. In `ast_app`, we only need to perform one application,
|
||||||
|
`lhs rhs`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 134 138 >}}
|
||||||
|
|
||||||
|
Note that we also extend our environment in this one,
|
||||||
|
for the exact same reason as before.
|
||||||
|
|
||||||
|
Case expressions are the only thing left on the agenda. This
|
||||||
|
is the time during which we have to perform desugaring. Here,
|
||||||
|
though, we run into an issue: we don't have tags assigned to constructors!
|
||||||
|
We need to adjust our code to keep track of the tags of the various
|
||||||
|
constructors of a type. To do this, we add a subclass for the `type_base`
|
||||||
|
struct, called `type_data`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/type.hpp" 33 42 >}}
|
||||||
|
|
||||||
|
When we create types from `definition_data`, we tag the corresponding constructors:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/definition.cpp" 54 71 >}}
|
||||||
|
|
||||||
|
Ah, but adding constructor info to the type doesn't solve the problem.
|
||||||
|
Once we performed type checking, we don't keep
|
||||||
|
the types that we computed for an AST node, in the node. And obviously, we don't want
|
||||||
|
to go looking for them again. Furthermore, we can't just look up a constructor
|
||||||
|
in the environment, since we can well have patterns that don't have __any__ constructors:
|
||||||
|
|
||||||
|
```
|
||||||
|
match l {
|
||||||
|
l -> { 0 }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
So, we want each `ast` node to store its type (well, in practice we only need this for
|
||||||
|
`ast_case`, but we might as well store it for all nodes). We can add it, no problem.
|
||||||
|
To add to that, we can add another, non-virtual `typecheck` method (let's call it `typecheck_common`,
|
||||||
|
since naming is hard). This method will call `typecheck`, and store the output into
|
||||||
|
the `node_type` field.
|
||||||
|
|
||||||
|
The signature is identical to `typecheck`, except it's neither virtual nor const:
|
||||||
|
```
|
||||||
|
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||||
|
```
|
||||||
|
|
||||||
|
And the implementation is as simple as you think:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 9 12 >}}
|
||||||
|
|
||||||
|
In client code (`definition_defn::typecheck_first` for instance), we should now
|
||||||
|
use `typecheck_common` instead of `typecheck`. With that done, we're almost there.
|
||||||
|
However, we're still missing something: most likely, the initial type assigned to any
|
||||||
|
node is a `type_var`, or a type variable. In this case, `type_var` __needs__ the information
|
||||||
|
from `type_mgr`, which we will not be keeping around. Besides, it's cleaner to keep the actual type
|
||||||
|
as a member of the node, not a variable type that references it. In order
|
||||||
|
to address this, we write two conversion functions that call `resolve` on all
|
||||||
|
types in an AST, given a type manager. After this is done, the type manager can be thrown away.
|
||||||
|
The signatures of the functions are as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
void resolve_common(const type_mgr& mgr);
|
||||||
|
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||||
|
```
|
||||||
|
|
||||||
|
We also add the `resolve` method to `definition`, so that we can call it
|
||||||
|
without having to run `dynamic_cast`. The implementation for `ast::resolve_common`
|
||||||
|
just resolves the type:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 14 21 >}}
|
||||||
|
|
||||||
|
The virtual `ast::resolve` just calls `ast::resolve_common` on an all `ast` children
|
||||||
|
of a node. Here's a sample implementation from `ast_binop`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 98 101 >}}
|
||||||
|
|
||||||
|
And here's the implementation of `definition::resolve` on `definition_defn`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/definition.cpp" 32 42 >}}
|
||||||
|
|
||||||
|
Finally, we call `resolve` at the end `typecheck_program` in `main.cpp`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/main.cpp" 40 42 >}}
|
||||||
|
|
||||||
|
At last, we're ready to implement the code for compiling `ast_case`.
|
||||||
|
Here it is, in all its glory:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/ast.cpp" 178 230 >}}
|
||||||
|
|
||||||
|
There's a lot to unpack here. First of all, just like we said in the compilation
|
||||||
|
scheme, we want to build and evaluate the expression that's being analyzed.
|
||||||
|
Once that's done, however, things get more tricky. We know that each
|
||||||
|
branch of a case expression will correspond to a vector of instructions -
|
||||||
|
in fact, our jump instruction contains a mapping from tags to instructions.
|
||||||
|
As we also discussed above, each list of instructions can be mapped to
|
||||||
|
by multiple tags. We don't want to recompile the same sequence of instructions
|
||||||
|
multiple times (or indeed, generate machine code for it). So, we keep
|
||||||
|
a mapping of tags to their corresponding sequences of instructions. We implement
|
||||||
|
this by having a vector of vectors of instructions (in which each inner vector
|
||||||
|
represents the code for a branch), and a map of tag number to index
|
||||||
|
in the vector containing all the branches. This way, multiple tags
|
||||||
|
can point to the same instruction set without duplicating information.
|
||||||
|
|
||||||
|
We also don't allow a tag to be mapped to more than one sequence of instructions.
|
||||||
|
This is handled differently depending on whether a variable pattern or a
|
||||||
|
constructor pattern are encountered. Variable patterns map all
|
||||||
|
tags that haven't been mapped yet, so no error can occur. Constructor patterns,
|
||||||
|
though, can explicitly try to map the same tag twice, and we don't want that.
|
||||||
|
|
||||||
|
I implied in the previous paragraph the implementation of our case expression
|
||||||
|
compilation algorithm, but let's go through it. Once we've compiled
|
||||||
|
the expression to be analyzed, and evaluated it (just like in our definitions
|
||||||
|
above), we proceed to look at all the branches specified in the case expression.
|
||||||
|
|
||||||
|
If a branch has a variable pattern, we must map to the result of the compilation
|
||||||
|
all the remaining, unmapped tags. We also aren't going to be taking apart
|
||||||
|
our value, so we don't need to use Split, but we do need to add 1 to the
|
||||||
|
environment offset to account the the presence of that value. So,
|
||||||
|
we compile the branch body with that offset, and iterate through
|
||||||
|
all the constructors of our data type. We skip a constructor
|
||||||
|
if it's been mapped, and if it hasn't been, we map it to the index
|
||||||
|
that this branch body will have in our list. Finally,
|
||||||
|
we push the newly compiled instruction sequence into the list of branch
|
||||||
|
bodies.
|
||||||
|
|
||||||
|
If a branch is a constructor pattern, on the other hand, we lead our compilation
|
||||||
|
output with a Split. This takes off the value from the stack, but pushes on
|
||||||
|
all the parameters of the constructor. We account for this by incrementing the
|
||||||
|
environment with the offset given by the number of arguments (just like we did
|
||||||
|
in our definitions of our compilation scheme). Before we map the tag,
|
||||||
|
we ensure that it hasn't already been mapped (and throw an exception, currently
|
||||||
|
in the form of a type error due to the growing length of this post),
|
||||||
|
and finally map it and insert the new branch code into the list of branches.
|
||||||
|
|
||||||
|
After we're done with all the branches, we also check for non-exhaustive patterns,
|
||||||
|
since otherwise we could run into runtime errors. With this, the case expression,
|
||||||
|
and the last of the AST nodes, can be compiled.
|
||||||
|
|
||||||
|
We also add a `compile` method to definitions, since they contain
|
||||||
|
our AST nodes. The method is empty for `defn_data`, and
|
||||||
|
looks as follows for `definition_defn`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/definition.cpp" 44 52 >}}
|
||||||
|
|
||||||
|
Notice that we terminate the function with Update and Pop. This
|
||||||
|
will turn the `ast_app` node that served as the "root"
|
||||||
|
of the application into an indirection to the value that we have computed.
|
||||||
|
Doing so will also remove all "scratch work" from the stack.
|
||||||
|
In essense, this is how we can lazily evaluate expressions.
|
||||||
|
|
||||||
|
Finally, we make a function in our `main.cpp` file to compile
|
||||||
|
all the definitions:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/06/main.cpp" 45 56 >}}
|
||||||
|
|
||||||
|
In this method, we also include some extra
|
||||||
|
output to help us see the result of our compilation. Since
|
||||||
|
at the moment, only the `definition_defn` program has to
|
||||||
|
be compiled, we try cast all definitions to it, and if
|
||||||
|
we succeed, we print them out.
|
||||||
|
|
||||||
|
Let's try it all out! For the below sample program:
|
||||||
|
|
||||||
|
{{< rawblock "compiler/06/examples/works1.txt" >}}
|
||||||
|
|
||||||
|
Our compiler produces the following new output:
|
||||||
|
```
|
||||||
|
PushInt(6)
|
||||||
|
PushInt(320)
|
||||||
|
PushGlobal(plus)
|
||||||
|
MkApp()
|
||||||
|
MkApp()
|
||||||
|
Update(0)
|
||||||
|
Pop(0)
|
||||||
|
|
||||||
|
Push(1)
|
||||||
|
Push(1)
|
||||||
|
PushGlobal(plus)
|
||||||
|
MkApp()
|
||||||
|
MkApp()
|
||||||
|
Update(2)
|
||||||
|
Pop(2)
|
||||||
|
```
|
||||||
|
|
||||||
|
The first sequence of instructions is clearly `main`. It creates
|
||||||
|
an application of `plus` to `320`, and then applies that to
|
||||||
|
`6`, which results in `plus 320 6`, which is correct. The
|
||||||
|
second sequence of instruction pushes the parameter that
|
||||||
|
sits on offset 1 from the top of the stack (`y`). It then
|
||||||
|
pushes a parameter from the same offset again, but this time,
|
||||||
|
since `y` was previously pushed on the stack, `x` is now
|
||||||
|
in that position, so `x` is pushed onto the stack.
|
||||||
|
Finally, `+` is pushed, and the application
|
||||||
|
`(+) x y` is created, which is equivalent to `x+y`.
|
||||||
|
|
||||||
|
Let's also take a look at a case expression program:
|
||||||
|
|
||||||
|
{{< rawblock "compiler/06/examples/works3.txt" >}}
|
||||||
|
|
||||||
|
The result of the compilation is as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
Push(0)
|
||||||
|
Eval()
|
||||||
|
Jump(
|
||||||
|
Split()
|
||||||
|
PushInt(0)
|
||||||
|
Slide(0)
|
||||||
|
|
||||||
|
Split()
|
||||||
|
Push(1)
|
||||||
|
PushGlobal(length)
|
||||||
|
MkApp()
|
||||||
|
PushInt(1)
|
||||||
|
PushGlobal(plus)
|
||||||
|
MkApp()
|
||||||
|
MkApp()
|
||||||
|
Slide(2)
|
||||||
|
|
||||||
|
)
|
||||||
|
Update(1)
|
||||||
|
Pop(1)
|
||||||
|
```
|
||||||
|
|
||||||
|
We push the first (and only) parameter onto the stack. We then make
|
||||||
|
sure it's evaluated, and perform case analysis: if the list
|
||||||
|
is `Nil`, we simply push the number 0 onto the stack. If it's
|
||||||
|
a concatenation of some `x` and another lists `xs`, we
|
||||||
|
push `xs` and `length` onto the stack, make the application
|
||||||
|
(`length xs`), push the 1, and finally apply `+` to the result.
|
||||||
|
This all makes sense!
|
||||||
|
|
||||||
|
With this, we've been able to compile our expressions and functions
|
||||||
|
into G-machine code. We're not done, however - our computers
|
||||||
|
aren't G-machines. We'll need to compile our G-machine code to
|
||||||
|
__machine code__ (we will use LLVM for this), implement the
|
||||||
|
__runtime__, and develop a __garbage collector__. We'll
|
||||||
|
tackle the first of these in the next post - [Part 7 - Runtime]({{< relref "07_compiler_runtime.md" >}}).
|
||||||
@@ -1,169 +0,0 @@
|
|||||||
---
|
|
||||||
title: Compiling a Functional Language Using C++, Part 6 - Compilation
|
|
||||||
date: 2019-08-06T14:26:38-07:00
|
|
||||||
draft: true
|
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
|
||||||
---
|
|
||||||
In the previous post, we defined a machine for graph reduction,
|
|
||||||
called a G-machine. However, this machine is still not particularly
|
|
||||||
connected to __our__ language. In this post, we will give
|
|
||||||
meanings to programs in our language in the context of
|
|
||||||
this G-machine. We will define a __compilation scheme__,
|
|
||||||
which will be a set of rules that tell us how to
|
|
||||||
translate programs in our language into G-machine instructions.
|
|
||||||
To mirror _Implementing Functional Languages: a tutorial_, we'll
|
|
||||||
call this compilation scheme \\(\\mathcal{C}\\), and write it
|
|
||||||
as \\(\\mathcal{C} ⟦e⟧ = i\\), meaning "the expression \\(e\\)
|
|
||||||
compiles to the instructions \\(i\\)".
|
|
||||||
|
|
||||||
To follow our route from the typechecking, let's start
|
|
||||||
with compiling expressions that are numbers. It's pretty easy:
|
|
||||||
$$
|
|
||||||
\\mathcal{C} ⟦n⟧ = [\\text{PushInt} \\; n]
|
|
||||||
$$
|
|
||||||
|
|
||||||
Here, we compiled a number expression to a list of
|
|
||||||
instructions with only one element - PushInt.
|
|
||||||
|
|
||||||
Just like when we did typechecking, let's
|
|
||||||
move on to compiling function applications. As
|
|
||||||
we informally stated in the previous chapter, since
|
|
||||||
the thing we're applying has to be on top,
|
|
||||||
we want to compile it last:
|
|
||||||
|
|
||||||
$$
|
|
||||||
\\mathcal{C} ⟦e\_1 \\; e\_2⟧ = \\mathcal{C} ⟦e\_2⟧ ⧺ \\mathcal{C} ⟦e\_1⟧ ⧺ [\\text{MkApp}]
|
|
||||||
$$
|
|
||||||
|
|
||||||
Here, we used the \\(⧺\\) operator to represent the concatenation of two
|
|
||||||
lists. Otherwise, this should be pretty intutive - we first run the instructions
|
|
||||||
to create the parameter, then we run the instructions to create the function,
|
|
||||||
and finally, we combine them using MkApp.
|
|
||||||
|
|
||||||
It's variables that once again force us to adjust our strategy. If our
|
|
||||||
program is well-typed, we know our variable will be on the stack:
|
|
||||||
our definition of Unwind makes it so for functions, and we will
|
|
||||||
define our case expression compilation scheme to match. However,
|
|
||||||
we still need to know __where__ on the stack each variable is,
|
|
||||||
and this changes as the stack is modified.
|
|
||||||
|
|
||||||
To accommodate for this, we define an environment, \\(\\rho\\),
|
|
||||||
to be a partial function mapping variable names to thier
|
|
||||||
offsets on the stack. We write \\(\\rho = [x \\rightarrow n, y \\rightarrow m]\\)
|
|
||||||
to say "the environment \\(\\rho\\) maps variable \\(x\\) to stack offset \\(n\\),
|
|
||||||
and variable \\(y\\) to stack offset \\(m\\)". We also write \\(\\rho \\; x\\) to
|
|
||||||
say "look up \\(x\\) in \\(\\rho\\)", since \\(\\rho\\) is a function. Finally,
|
|
||||||
to help with the ever-changing stack, we define an augmented environment
|
|
||||||
\\(\\rho^{+n}\\), such that \\(\\rho^{+n} \\; x = \\rho \\; x + n\\). In words,
|
|
||||||
this basically means "\\(\\rho^{+n}\\) has all the variables from \\(\\rho\\),
|
|
||||||
but their addresses are incremented by \\(n\\)". We now pass \\(\\rho\\)
|
|
||||||
in to \\(\\mathcal{C}\\) together with the expression \\(e\\). Let's
|
|
||||||
rewrite our first two rules. For numbers:
|
|
||||||
|
|
||||||
$$
|
|
||||||
\\mathcal{C} ⟦n⟧ \\; \\rho = [\\text{PushInt} \\; n]
|
|
||||||
$$
|
|
||||||
|
|
||||||
For function application:
|
|
||||||
$$
|
|
||||||
\\mathcal{C} ⟦e\_1 \\; e\_2⟧ \\; \\rho = \\mathcal{C} ⟦e\_2⟧ \\; \\rho ⧺ \\mathcal{C} ⟦e\_1⟧ \\; \\rho^{+1} ⧺ [\\text{MkApp}]
|
|
||||||
$$
|
|
||||||
|
|
||||||
Notice how in that last rule, we passed in \\(\\rho^{+1}\\) when compiling the function's expression. This is because
|
|
||||||
the result of running the instructions for \\(e\_2\\) will have left on the stack the function's parameter. Whatever
|
|
||||||
was at the top of the stack (and thus, had index 0), is now the second element from the top (address 1). The
|
|
||||||
same is true for all other things that were on the stack. So, we increment the environment accordingly.
|
|
||||||
|
|
||||||
With the environment, the variable rule is simple:
|
|
||||||
$$
|
|
||||||
\\mathcal{C} ⟦x⟧ \\; \\rho = [\\text{Push} \\; (\\rho \\; x)]
|
|
||||||
$$
|
|
||||||
|
|
||||||
One more thing. If we run across a function name, we want to
|
|
||||||
use PushGlobal rather than Push. Defining \\(f\\) to be a name
|
|
||||||
of a global function, we capture this using the following rule:
|
|
||||||
|
|
||||||
$$
|
|
||||||
\\mathcal{C} ⟦f⟧ \\; \\rho = [\\text{PushGlobal} \\; f]
|
|
||||||
$$
|
|
||||||
|
|
||||||
Now it's time for us to compile case expressions, but there's a bit of
|
|
||||||
an issue - our case expressions branches don't map one-to-one with
|
|
||||||
the \\(t \\rightarrow i\_t\\) format of the Jump instruction.
|
|
||||||
This is because we allow for name patterns in the form \\(x\\),
|
|
||||||
which can possibly match more than one tag. Consider this
|
|
||||||
rather useless example:
|
|
||||||
|
|
||||||
```
|
|
||||||
data Bool = { True, False }
|
|
||||||
defn weird b = { case b of { b -> { False } } }
|
|
||||||
```
|
|
||||||
|
|
||||||
We only have one branch, but we have two tags that should
|
|
||||||
lead to it! Not only that, but variable patterns are
|
|
||||||
location-dependent: if a variable pattern comes
|
|
||||||
before a constructor pattern, then the constructor
|
|
||||||
pattern will never be reached. On the other hand,
|
|
||||||
if a constructor pattern comes before a variable
|
|
||||||
pattern, it will be tried before the varible pattern,
|
|
||||||
and thus is reachable.
|
|
||||||
|
|
||||||
We will ignore this problem for now - we will define our semantics
|
|
||||||
as though each case expression branch can match exactly one tag.
|
|
||||||
In our C++ code, we will write a conversion function that will
|
|
||||||
figure out which tag goes to which sequence of instructions.
|
|
||||||
Effectively, we'll be performing [desugaring](https://en.wikipedia.org/wiki/Syntactic_sugar).
|
|
||||||
|
|
||||||
Now, on to defining the compilation rules for case expressions.
|
|
||||||
It's helpful to define compiling a single branch of a case expression
|
|
||||||
separately. For a branch in the form \\(t \\; x\_1 \\; x\_2 \\; ... \\; x\_n \\rightarrow \text{body}\\),
|
|
||||||
we define a compilation scheme \\(\\mathcal{A}\\) as follows:
|
|
||||||
|
|
||||||
$$
|
|
||||||
\\begin{align}
|
|
||||||
\\mathcal{A} ⟦t \\; x\_1 \\; ... \\; x\_n \\rightarrow \text{body}⟧ \\; \\rho & =
|
|
||||||
t \\rightarrow [\\text{Split} \\; n] \\; ⧺ \\; \\mathcal{C}⟦\\text{body}⟧ \\; \\rho' \\; ⧺ \\; [\\text{Slide} \\; n] \\\\\\
|
|
||||||
\text{where} \\; \\rho' &= \\rho^{+n}[x\_1 \\rightarrow 0, ..., x\_n \\rightarrow n - 1]
|
|
||||||
\\end{align}
|
|
||||||
$$
|
|
||||||
|
|
||||||
First, we run Split - the node on the top of the stack is a packed constructor,
|
|
||||||
and we want access to its member variables, since they can be referenced by
|
|
||||||
the branch's body via \\(x\_i\\). For the same reason, we must make sure to include
|
|
||||||
\\(x\_1\\) through \\(x\_n\\) in our environment. Furthermore, since the split values now occupy the stack,
|
|
||||||
we have to offset our environment by \\(n\\) before adding bindings to our new variables.
|
|
||||||
Doing all these things gives us \\(\\rho'\\), which we use to compile the body, placing
|
|
||||||
the resulting instructions after Split. This leaves us with the desired graph on top of
|
|
||||||
the stack - the only thing left to do is to clean up the stack of the unpacked values,
|
|
||||||
which we do using Slide.
|
|
||||||
|
|
||||||
Notice that we didn't just create instructions - we created a mapping from the tag \\(t\\)
|
|
||||||
to the instructions that correspond to it.
|
|
||||||
|
|
||||||
Now, it's time for compiling the whole case expression. We first want
|
|
||||||
to construct the graph for the expression we want to perform case analysis on.
|
|
||||||
Next, we want to evaluate it (since we need a packed value, not a graph,
|
|
||||||
to read the tag). Finally, we perform a jump depending on the tag. This
|
|
||||||
is capture by the following rule:
|
|
||||||
|
|
||||||
$$
|
|
||||||
\\mathcal{C} ⟦\\text{case} \\; e \\; \\text{of} \\; \\text{alt}_1 ... \\text{alt}_n⟧ \\; \\rho =
|
|
||||||
\\mathcal{C} ⟦e⟧ \\; \\rho \\; ⧺ [\\text{Eval}, \\text{Jump} \\; [\\mathcal{A} ⟦\\text{alt}_1⟧ \; \\rho, ..., \\mathcal{A} ⟦\\text{alt}_n⟧ \; \\rho]]
|
|
||||||
$$
|
|
||||||
|
|
||||||
This works because \\(\\mathcal{A}\\) creates not only instructions,
|
|
||||||
but also a tag mapping. We simply populate our Jump instruction such mappings
|
|
||||||
resulting from compiling each branch.
|
|
||||||
|
|
||||||
You may have noticed that we didn't add rules for binary operators. Just like
|
|
||||||
with type checking, we treat them as function calls. However, rather that constructing
|
|
||||||
graphs when we have to instantiate those functions, we simply
|
|
||||||
evaluate the arguments and perform the relevant arithmetic operation using BinOp.
|
|
||||||
We will do a similar thing for constructors.
|
|
||||||
|
|
||||||
With that out of the way, we can get around to writing some code. We can envision
|
|
||||||
a method on the `ast` struct that takes an environment (just like our compilation
|
|
||||||
scheme takes the environment \\(\\rho\\\)). Rather than returning a vector
|
|
||||||
of instructions (which involves copying, unless we get some optimization kicking in),
|
|
||||||
we'll pass to it a reference to a vector. The method will then place the generated
|
|
||||||
instructions into the vector.
|
|
||||||
160
content/blog/07_compiler_runtime.md
Normal file
160
content/blog/07_compiler_runtime.md
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
---
|
||||||
|
title: Compiling a Functional Language Using C++, Part 7 - Runtime
|
||||||
|
date: 2019-08-06T14:26:38-07:00
|
||||||
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
---
|
||||||
|
Wikipedia has the following definition for a __runtime__:
|
||||||
|
|
||||||
|
> A [runtime] primarily implements portions of an execution model.
|
||||||
|
|
||||||
|
We know what our execution model is! We talked about it in Part 5 - it's the
|
||||||
|
lazy graph reduction we've specified. Creating and manipulating
|
||||||
|
graph nodes is slightly above hardware level, and all programs in our
|
||||||
|
functional language will rely on such manipulation (it's how they run!). Furthermore,
|
||||||
|
most G-machine instructions are also above hardware level (especially unwind!).
|
||||||
|
|
||||||
|
Push and Slide and other instructions are pretty complex.
|
||||||
|
Most computers aren't stack machines. We'll have to implement
|
||||||
|
our own stack, and whenever a graph-building function will want to modify
|
||||||
|
the stack, it will have to call library routines for our stack implementation:
|
||||||
|
|
||||||
|
```C
|
||||||
|
void stack_push(struct stack* s, struct node_s* n);
|
||||||
|
struct node_s* stack_slide(struct stack* s, size_t c);
|
||||||
|
/* other stack operations */
|
||||||
|
```
|
||||||
|
|
||||||
|
Furthermore, we observe that Unwind does a lot of the heavy lifting in our
|
||||||
|
G-machine definition. After we build the graph,
|
||||||
|
Unwind is what picks it apart and performs function calls. Furthermore,
|
||||||
|
Unwind pushes Unwind back on the stack: once you've hit it,
|
||||||
|
you're continuing to Unwind until you reach a function call. This
|
||||||
|
effectively means we can implement Unwind as a loop:
|
||||||
|
|
||||||
|
```C
|
||||||
|
while(1) {
|
||||||
|
// Check for Unwind's first rule
|
||||||
|
// Check for Unwind's second rule
|
||||||
|
// ...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
In this implementation, Unwind is in charge. We won't need to insert
|
||||||
|
the Unwind operations at the end of our generated functions, and you
|
||||||
|
may have noticed we've already been following this strategy in our
|
||||||
|
implementation of the G-machine compilation.
|
||||||
|
|
||||||
|
We can start working on an implementation of the runtime right now,
|
||||||
|
beginning with the nodes:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/07/runtime.h" 4 50 >}}
|
||||||
|
|
||||||
|
We have a variety of different nodes that can be on the stack, but without
|
||||||
|
the magic of C++'s `vtable` and RTTI, we have to take care of the bookkeeping
|
||||||
|
ourselves. We add an enum, `node_tag`, which we will use to indicate what
|
||||||
|
type of node we're looking at. We also add a "base class" `node_base`, which
|
||||||
|
contains the fields that all nodes must contain (only `tag` at the moment).
|
||||||
|
We then add to the beginning of each node struct a member of type
|
||||||
|
`node_base`. With this, a pointer to a node struct can be interpreted as a pointer
|
||||||
|
to `node_base`, which is our lowest common denominator. To go back, we
|
||||||
|
check the `tag` of `node_base`, and cast the pointer appropriately. This way,
|
||||||
|
we mimic inheritance, in a very basic manner.
|
||||||
|
|
||||||
|
We also add an `alloc_node`, which allocates a region of memory big enough
|
||||||
|
to be any node. We do this because we sometimes mutate nodes (replacing
|
||||||
|
expressions with the results of their evaluation), changing their type.
|
||||||
|
We then want to be able to change a node without reallocating memory.
|
||||||
|
Since the biggest node we have is `node_app`, that's the one we choose.
|
||||||
|
|
||||||
|
Finally, to make it easier to create nodes from our generated code,
|
||||||
|
we add helper functions like `alloc_num`, which allocate a given
|
||||||
|
node type, and set its tag and member fields appropriately. We
|
||||||
|
don't include such a function for `node_data`, since this
|
||||||
|
node will be created only in one possible way.
|
||||||
|
|
||||||
|
Here's the implementation:
|
||||||
|
{{< codelines "C" "compiler/07/runtime.c" 6 40 >}}
|
||||||
|
|
||||||
|
We now move on to implement some stack operations. Let's list them:
|
||||||
|
|
||||||
|
* `stack_init` and `stack_free` - one allocates memory for the stack,
|
||||||
|
the other releases it.
|
||||||
|
* `stack_push`, `stack_pop` and `stack_peek` - the classic stack operations.
|
||||||
|
We have `_peek` to take an offset, so we can peek relative to the top of the stack.
|
||||||
|
* `stack_popn` - pop off some number of nodes instead of one.
|
||||||
|
* `stack_slide` - the slide we specified in the semantics. Keeps the top, deletes the
|
||||||
|
next several nodes.
|
||||||
|
* `stack_update` - turns the node at the offset into an indirection to the result,
|
||||||
|
which we will use for lazy evaluation (modifying expressions with their reduced forms).
|
||||||
|
* `stack_alloc` - allocate indirection nodes on the stack. We will use this later.
|
||||||
|
* `stack_pack` and `stack_split` - Wrap and unwrap constructors on the stack.
|
||||||
|
|
||||||
|
We declare these in a header:
|
||||||
|
{{< codelines "C" "compiler/07/runtime.h" 52 68 >}}
|
||||||
|
|
||||||
|
And implement them as follows:
|
||||||
|
{{< codelines "C" "compiler/07/runtime.c" 42 116 >}}
|
||||||
|
|
||||||
|
Let's now talk about how this will connect to the code we generate. To get
|
||||||
|
a quick example, consider the `node_global` struct that we have declared above.
|
||||||
|
It has a member `function`, which is a __function pointer__ to a function
|
||||||
|
that takes a stack and returns void.
|
||||||
|
|
||||||
|
When we finally generate machine code for each of the functions
|
||||||
|
we have in our program, it will be made up of sequences of G-machine
|
||||||
|
operations expressed using assembly instructions. These instructions will still
|
||||||
|
have to manipulate the G-machine stack (they still represent G-machine operations!),
|
||||||
|
and thus, the resulting assembly subroutine will take as parameter a stack. It will
|
||||||
|
then construct the function's graph on that stack, as we've already seen. Thus,
|
||||||
|
we express a compiled top-level function as a subroutine that takes a stack,
|
||||||
|
and returns void. A global node holds in it the pointer to the function that it will call.
|
||||||
|
|
||||||
|
When our program will start, it will assume that there exists a top-level
|
||||||
|
function `f_main` that takes 0 parameters. It will take that function, call it
|
||||||
|
to produce the initial graph, and then let the unwind loop take care of the evaluation.
|
||||||
|
|
||||||
|
Thus, our program will initially look like this:
|
||||||
|
{{< codelines "C" "compiler/07/runtime.c" 154 159 >}}
|
||||||
|
|
||||||
|
As we said, we expect an externally-declared subroutine `f_main`. We construct
|
||||||
|
a global node for `f_main` with arity 0, and then start the execution using a function `eval`.
|
||||||
|
What's `eval`, though? It's the function that will take care of creating
|
||||||
|
a new stack, and evaluating the node that is passed to it using
|
||||||
|
our unwind loop. `eval` itself is pretty terse:
|
||||||
|
|
||||||
|
{{< codelines "C" "compiler/07/runtime.c" 144 152 >}}
|
||||||
|
|
||||||
|
We create a fresh program stack, start it off with whatever node
|
||||||
|
we want to evaluate, and have `unwind` take care of the rest.
|
||||||
|
|
||||||
|
`unwind` is a direct implementation of the rules from Part 5:
|
||||||
|
|
||||||
|
{{< codelines "C" "compiler/07/runtime.c" 118 142 >}}
|
||||||
|
|
||||||
|
We can now come up with some simple programs. Let's try
|
||||||
|
writing out, by hand, `main = { 320 + 6 }`. We end up with:
|
||||||
|
|
||||||
|
{{< codeblock "C" "compiler/07/examples/runtime1.c" >}}
|
||||||
|
|
||||||
|
If we add to the bottom of our `main` the following code:
|
||||||
|
```C
|
||||||
|
printf("%d\n", ((struct node_num*) result)->value);
|
||||||
|
```
|
||||||
|
|
||||||
|
And compile and run our code:
|
||||||
|
```
|
||||||
|
gcc runtime.c examples/runtime1.c
|
||||||
|
./a.out
|
||||||
|
```
|
||||||
|
|
||||||
|
We get the output `326`, which is exactly correct!
|
||||||
|
|
||||||
|
We now have a common set of functions and declarations
|
||||||
|
that serve to support the code we generate from our compiler.
|
||||||
|
Although this time, we wrote out `f_main` by hand, we will soon
|
||||||
|
use LLVM to generate code for `f_main` and more. Once we get
|
||||||
|
that going, we be able to compile our code!
|
||||||
|
|
||||||
|
Next time, we will start work on converting our G-machine instructions
|
||||||
|
into machine code. We will set up LLVM and get our very first
|
||||||
|
fully functional compiled programs in [Part 8 - LLVM]({{< relref "08_compiler_llvm.md" >}}).
|
||||||
578
content/blog/08_compiler_llvm.md
Normal file
578
content/blog/08_compiler_llvm.md
Normal file
@@ -0,0 +1,578 @@
|
|||||||
|
---
|
||||||
|
title: Compiling a Functional Language Using C++, Part 8 - LLVM
|
||||||
|
date: 2019-10-30T22:16:22-07:00
|
||||||
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
---
|
||||||
|
|
||||||
|
We don't want a compiler that can only generate code for a single
|
||||||
|
platform. Our language should work on macOS, Windows, and Linux,
|
||||||
|
on x86\_64, ARM, and maybe some other architectures. We also
|
||||||
|
don't want to manually implement the compiler for each platform,
|
||||||
|
dealing with the specifics of each architecture and operating
|
||||||
|
system.
|
||||||
|
|
||||||
|
This is where LLVM comes in. LLVM (which stands for __Low Level Virtual Machine__),
|
||||||
|
is a project which presents us with a kind of generic assembly language,
|
||||||
|
an __Intermediate Representation__ (IR). It also provides tooling to compile the
|
||||||
|
IR into platform-specific instructions, as well as to apply a host of various
|
||||||
|
optimizations. We can thus translate our G-machine instructions to LLVM,
|
||||||
|
and then use LLVM to generate machine code, which gets us to our ultimate
|
||||||
|
goal of compiling our language.
|
||||||
|
|
||||||
|
We start with adding LLVM to our CMake project.
|
||||||
|
{{< codelines "CMake" "compiler/08/CMakeLists.txt" 7 7 >}}
|
||||||
|
|
||||||
|
LLVM is a huge project, and has many components. We don't need
|
||||||
|
most of them. We do need the core libraries, the x86 assembly
|
||||||
|
generator, and x86 assembly parser. I'm
|
||||||
|
not sure why we need the last one, but I ran into linking
|
||||||
|
errors without them. We find the required link targets
|
||||||
|
for these components using this CMake command:
|
||||||
|
|
||||||
|
{{< codelines "CMake" "compiler/08/CMakeLists.txt" 19 20 >}}
|
||||||
|
|
||||||
|
Finally, we add the new include directories, link targets,
|
||||||
|
and definitions to our compiler executable:
|
||||||
|
|
||||||
|
{{< codelines "CMake" "compiler/08/CMakeLists.txt" 39 41 >}}
|
||||||
|
|
||||||
|
Great, we have the infrastructure updated to work with LLVM. It's
|
||||||
|
now time to start using the LLVM API to compile our G-machine instructions
|
||||||
|
into assembly. We start with `LLVMContext`. The LLVM documentation states:
|
||||||
|
|
||||||
|
> This is an important class for using LLVM in a threaded context.
|
||||||
|
> It (opaquely) owns and manages the core "global" data of LLVM's core infrastructure, including the type and constant uniquing tables.
|
||||||
|
|
||||||
|
We will have exactly one instance of such a class in our program.
|
||||||
|
|
||||||
|
Additionally, we want an `IRBuilder`, which will help us generate IR instructions,
|
||||||
|
placing them into basic blocks (more on that in a bit). Also, we want
|
||||||
|
a `Module` object, which represents some collection of code and declarations
|
||||||
|
(perhaps like a C++ source file). Let's keep these things in our own
|
||||||
|
`llvm_context` class. Here's what that looks like:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/08/llvm_context.hpp" >}}
|
||||||
|
|
||||||
|
We include the LLVM context, builder, and module as members
|
||||||
|
of the context struct. Since the builder and the module need
|
||||||
|
the context, we initialize them in the constructor, where they
|
||||||
|
can safely reference it.
|
||||||
|
|
||||||
|
Besides these fields, we added
|
||||||
|
a few others, namely the `functions` and `struct_types` maps,
|
||||||
|
and the various `llvm::Type` subclasses such as `stack_type`.
|
||||||
|
We did this because we want to be able to call our runtime
|
||||||
|
functions (and use our runtime structs) from LLVM. To generate
|
||||||
|
a function call from LLVM, we need to have access to an
|
||||||
|
`llvm::Function` object. We thus want to have an `llvm::Function`
|
||||||
|
object for each runtime function we want to call. We could declare
|
||||||
|
a member variable in our `llvm_context` for each runtime function,
|
||||||
|
but it's easier to leave this to be an implementation
|
||||||
|
detail, and only have a dynamically created map between runtime
|
||||||
|
function names and their corresponding `llvm::Function` objects.
|
||||||
|
|
||||||
|
We populate the maps and other type-related variables in the
|
||||||
|
two methods, `create_functions()` and `create_types()`. To
|
||||||
|
create an `llvm::Function`, we must provide an `llvm::FunctionType`,
|
||||||
|
an `llvm::LinkageType`, the name of the function, and the module
|
||||||
|
in which the function is declared. Since we only have one
|
||||||
|
module (the one we initialized in the constructor) that's
|
||||||
|
the module we pass in. The name of the function is the same
|
||||||
|
as its name in the runtime. The linkage type is a little
|
||||||
|
more complicated - it tells LLVM the "visibility" of a function.
|
||||||
|
"Private" or "Internal" would hide this function from the linker
|
||||||
|
(like `static` functions in C). However, we want to do the opposite: our
|
||||||
|
generated functions should be accessible from other code.
|
||||||
|
Thus, our linkage type is "External".
|
||||||
|
|
||||||
|
The only remaining parameter is the `llvm::FunctionType`, which
|
||||||
|
is created using code like:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
llvm::FunctionType::get(return_type, {param_type_1, param_type_2, ...}, is_variadic)
|
||||||
|
```
|
||||||
|
|
||||||
|
Declaring all the functions and types in our runtime is mostly
|
||||||
|
just tedious. Here are a few lines from `create_functions()`, which
|
||||||
|
give a very good idea of the rest of that method:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/llvm_context.cpp" 47 60 >}}
|
||||||
|
|
||||||
|
Similarly, here are a few lines from `create_types()`, from
|
||||||
|
which you can extrapolate the rest:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/llvm_context.cpp" 7 11 >}}
|
||||||
|
|
||||||
|
We also tell LLVM the contents of our structs, so that
|
||||||
|
we may later reference specific fields. This is just like
|
||||||
|
forward declaration - we can forward declare a struct
|
||||||
|
in C/C++, but unless we also declare its contents,
|
||||||
|
we can't access what's inside. Below is the code
|
||||||
|
for specifying the body of `node_base` and `node_app`.
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/llvm_context.cpp" 19 26 >}}
|
||||||
|
|
||||||
|
There's still more functionality packed into `llvm_context`.
|
||||||
|
Let's next take a look into `custom_function`, and
|
||||||
|
the `create_custom_function` method. Why do we need
|
||||||
|
these? To highlight the need for the custom class,
|
||||||
|
let's take a look at `instruction_pushglobal` which
|
||||||
|
occurs at the G-machine level, and then at `alloc_global`,
|
||||||
|
which will be a function call generated as part of
|
||||||
|
the PushGlobal instruction. `instruction_pushglobal`'s
|
||||||
|
only member variable is `name`, which stands for
|
||||||
|
the name of the global function it's referencing. However,
|
||||||
|
`alloc_global` requires an arity argument! We can
|
||||||
|
try to get this information from the `llvm::Function`
|
||||||
|
corresponding to the global we're trying to reference,
|
||||||
|
but this doesn't get us anywhere: as far as LLVM
|
||||||
|
is concerned, any global function only takes one
|
||||||
|
parameter, the stack. The rest of the parameters
|
||||||
|
are given through that stack, and their number cannot
|
||||||
|
be easily deduced from the function alone.
|
||||||
|
|
||||||
|
Instead, we decide to store global functions together
|
||||||
|
with their arity. We thus create a class to combine
|
||||||
|
these two things (`custom_function`), define
|
||||||
|
a map from global function names to instances
|
||||||
|
of `custom_function`, and add a convenience method
|
||||||
|
(`create_custom_function`) that takes care of
|
||||||
|
constructing an `llvm::Function` object, creating
|
||||||
|
a `custom_function`, and storing it in the map.
|
||||||
|
|
||||||
|
The implementation for `custom_function` is
|
||||||
|
straightforward:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/llvm_context.cpp" 234 252 >}}
|
||||||
|
|
||||||
|
We create a function type, then a function, and finally
|
||||||
|
initialize a `custom_function`. There's one thing
|
||||||
|
we haven't seen yet in this function, which is the
|
||||||
|
`BasicBlock` class. We'll get to what basic blocks
|
||||||
|
are shortly, but for now it's sufficient to
|
||||||
|
know that the basic block gives us a place to
|
||||||
|
insert code.
|
||||||
|
|
||||||
|
This isn't the end of our `llvm_context` class: it also
|
||||||
|
has a variety of other `create_*` methods! Let's take a look
|
||||||
|
at their signatures. Most return either `void`,
|
||||||
|
`llvm::ConstantInt*`, or `llvm::Value*`. Since
|
||||||
|
`llvm::ConstantInt*` is a subclass of `llvm::Value*`, let's
|
||||||
|
just treat it as simply an `llvm::Value*` while trying
|
||||||
|
to understand these methods.
|
||||||
|
|
||||||
|
So, what is `llvm::Value`? To answer this question, let's
|
||||||
|
first understand how the LLVM IR works.
|
||||||
|
|
||||||
|
### LLVM IR
|
||||||
|
An important property of LLVM IR is that it is in __Single Static Assignment__
|
||||||
|
(SSA) form. This means that each variable can only be assigned to once. For instance,
|
||||||
|
if we use `<-` to represent assignment, the following program is valid:
|
||||||
|
|
||||||
|
```
|
||||||
|
x <- 1
|
||||||
|
y <- 2
|
||||||
|
z <- x + y
|
||||||
|
```
|
||||||
|
|
||||||
|
However, the following program is __not__ valid:
|
||||||
|
|
||||||
|
```
|
||||||
|
x <- 1
|
||||||
|
x <- x + 1
|
||||||
|
```
|
||||||
|
|
||||||
|
But what if we __do__ want to modify a variable `x`?
|
||||||
|
We can declare another "version" of `x` every time we modify it.
|
||||||
|
For instance, if we wanted to increment `x` twice, we'd do this:
|
||||||
|
|
||||||
|
```
|
||||||
|
x <- 1
|
||||||
|
x1 <- x + 1
|
||||||
|
x2 <- x1 + 1
|
||||||
|
```
|
||||||
|
|
||||||
|
In practice, LLVM's C++ API can take care of versioning variables on its own, by
|
||||||
|
auto-incrementing numbers associated with each variable we use.
|
||||||
|
|
||||||
|
Assigned to each variable is `llvm::Value`. The LLVM documentation states:
|
||||||
|
|
||||||
|
> It is the base class of all values computed by a program that may be used as operands to other values.
|
||||||
|
|
||||||
|
It's important to understand that `llvm::Value` __does not store the result of the computation__.
|
||||||
|
It rather represents how something may be computed. 1 is a value because it computed by
|
||||||
|
just returning 1. `x + 1` is a value because it is computed by adding the value inside of
|
||||||
|
`x` to 1. Since we cannot modify a variable once we've declared it, we will
|
||||||
|
keep assigning intermediate results to new variables, constructing new values
|
||||||
|
out of values that we've already specified.
|
||||||
|
|
||||||
|
This somewhat elucidates what the `create_*` functions do: `create_i8` creates an 8-bit integer
|
||||||
|
value, and `create_pop` creates a value that is computed by calling
|
||||||
|
our runtime `stack_pop` function.
|
||||||
|
|
||||||
|
Before we move on to look at the implementations of these functions,
|
||||||
|
we need to understand another concept from the world of compiler design:
|
||||||
|
__basic blocks__. A basic block is a sequence of instructions that
|
||||||
|
are guaranteed to be executed one after another. This means that a
|
||||||
|
basic block cannot have an if/else, jump, or any other type of control flow anywhere
|
||||||
|
except at the end. If control flow could appear inside the basic block,
|
||||||
|
there would be opporunity for execution of some, but not all,
|
||||||
|
instructions in the block, violating the definition. Every time
|
||||||
|
we add an IR instruction in LLVM, we add it to a basic block.
|
||||||
|
Writing control flow involves creating several blocks, with each
|
||||||
|
block serving as the destination of a potential jump. We will
|
||||||
|
see this used to compile the Jump instruction.
|
||||||
|
|
||||||
|
### Generating LLVM IR
|
||||||
|
Now that we understand what `llvm::Value` is, and have a vague
|
||||||
|
understanding of how LLVM is structured, let's take a look at
|
||||||
|
the implementations of the `create_*` functions. The simplest
|
||||||
|
is `create_i8`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/llvm_context.cpp" 150 152 >}}
|
||||||
|
|
||||||
|
Not much to see here. We create an instance of the `llvm::ConstantInt` class,
|
||||||
|
from the actual integer given to the method. As we said before,
|
||||||
|
`llvm::ConstantInt` is a subclass of `llvm::Value`. Next up, let's look
|
||||||
|
at `create_pop`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/llvm_context.cpp" 160 163 >}}
|
||||||
|
|
||||||
|
We first retrieve an `llvm::Function` associated with `stack_pop`
|
||||||
|
from our map, and then use `llvm::IRBuilder::CreateCall` to insert
|
||||||
|
a value that represents a function call into the currently
|
||||||
|
selected basic block (the builder's state is what
|
||||||
|
dictates what the "selected basic block" is). `CreateCall`
|
||||||
|
takes as parameters the function we want to call (`stack_pop`,
|
||||||
|
which we store into the `pop_f` variable), as well as the arguments
|
||||||
|
to the function (for which we pass `f->arg_begin()`).
|
||||||
|
|
||||||
|
Hold on. What the heck is `arg_begin()`? Why do we take a function
|
||||||
|
as a paramter to this method? The answer is fairly simple: this
|
||||||
|
method is used when we are
|
||||||
|
generating a function with signature `void f_(struct stack* s)`
|
||||||
|
(we discussed the signature in the previous post). The
|
||||||
|
parameter that we give to `create_pop` is this function we're
|
||||||
|
generating, and `arg_begin()` gets the value that represents
|
||||||
|
the first parameter to our function - `s`! Since `stack_pop`
|
||||||
|
takes a stack, we need to give it the stack we're working on,
|
||||||
|
and so we use `f->arg_begin()` to access it.
|
||||||
|
|
||||||
|
Most of the other functions follow this exact pattern, with small
|
||||||
|
deviations. However, another function uses a more complicated LLVM
|
||||||
|
instruction:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/llvm_context.cpp" 202 209 >}}
|
||||||
|
|
||||||
|
`unwrap_num` is used to cast a given node pointer to a pointer
|
||||||
|
to a number node, and then return the integer value from
|
||||||
|
that number node. It starts fairly innocently: we ask
|
||||||
|
LLVM for the type of a pointer to a `node_num` struct,
|
||||||
|
and then use `CreatePointerCast` to create a value
|
||||||
|
that is the same node pointer we're given, but now interpreted
|
||||||
|
as a number node pointer. We now have to access
|
||||||
|
the `value` field of our node. `CreateGEP` helps us with
|
||||||
|
this: given a pointer to a node, and two offsets
|
||||||
|
`n` and `k`, it effectively performs the following:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
&(num_pointer[n]->kth_field)
|
||||||
|
```
|
||||||
|
|
||||||
|
The first offset, then, gives an index into the "array"
|
||||||
|
represented by the pointer, while the second offset
|
||||||
|
gives the index of the field we want to access. We
|
||||||
|
want to dereference the pointer (`num_pointer[0]`),
|
||||||
|
and we want the second field (`1`, when counting from 0).
|
||||||
|
Thus, we call `CreateGEP` with these offsets and our pointers.
|
||||||
|
|
||||||
|
This still leaves us with a pointer to a number, rather
|
||||||
|
than the number itself. To dereference the pointer, we use
|
||||||
|
`CreateLoad`. This gives us the value of the number node,
|
||||||
|
which we promptly return.
|
||||||
|
|
||||||
|
This concludes our implementation of the `llvm_context` -
|
||||||
|
it's time to move on to the G-machine instructions.
|
||||||
|
|
||||||
|
### G-machine Instructions to LLVM IR
|
||||||
|
|
||||||
|
Let's now envision a `gen_llvm` method on the `instruction` struct,
|
||||||
|
which will turn the still-abstract G-machine instruction
|
||||||
|
into tangible, close-to-metal LLVM IR. As we've seen
|
||||||
|
in our implementation of `llvm_context`, to access the stack, we need access to the first
|
||||||
|
argument of the function we're generating. Thus, we need this method
|
||||||
|
to accept the function whose instructions are
|
||||||
|
being converted to LLVM. We also pass in the
|
||||||
|
`llvm_context`, since it contains the LLVM builder,
|
||||||
|
context, module, and a map of globally declared functions.
|
||||||
|
|
||||||
|
With these things in mind, here's the signature for `gen_llvm`:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
virtual void gen_llvm(llvm_context&, llvm::Function*) const;
|
||||||
|
```
|
||||||
|
|
||||||
|
Let's get right to it! `instruction_pushint` gives us an easy
|
||||||
|
start:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/instruction.cpp" 17 19 >}}
|
||||||
|
|
||||||
|
We create an LLVM integer constant with the value of
|
||||||
|
our integer, and push it onto the stack.
|
||||||
|
|
||||||
|
`instruction_push` is equally terse:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/instruction.cpp" 37 39 >}}
|
||||||
|
|
||||||
|
We simply peek at the value of the stack at the given
|
||||||
|
offset (an integer of the same size as `size_t`, which
|
||||||
|
we create using `create_size`). Once we have the
|
||||||
|
result of the peek, we push it onto the stack.
|
||||||
|
|
||||||
|
`instruction_pushglobal` is more involved. Let's take a look:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/instruction.cpp" 26 30 >}}
|
||||||
|
|
||||||
|
First, we retrive the `custom_function` associated with
|
||||||
|
the given global name. We then create an LLVM integer
|
||||||
|
constant representing the arity of the function,
|
||||||
|
and then push onto the stack the result of `alloc_global`,
|
||||||
|
giving it the function and arity just like it expects.
|
||||||
|
|
||||||
|
`instruction_pop` is also short, and doesn't require much
|
||||||
|
further explanation:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/instruction.cpp" 46 48 >}}
|
||||||
|
|
||||||
|
Some other instructions, such as `instruction_update`,
|
||||||
|
`instruction_pack`, `instruction_split`, `instruction_slide`,
|
||||||
|
`instruction_alloc` and `instruction_eval` are equally as simple,
|
||||||
|
and we omit them for the purpose of brevity.
|
||||||
|
|
||||||
|
What remains are two "meaty" functions, `instruction_jump` and
|
||||||
|
`instruction_binop`. Let's start with the former:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/instruction.cpp" 101 123 >}}
|
||||||
|
|
||||||
|
This is the one and only function in which we have to take
|
||||||
|
care of control flow. Conceptually, depending on the tag
|
||||||
|
of the `node_data` at the top of the stack, we want
|
||||||
|
to pick one of many branches and jump to it.
|
||||||
|
As we discussed, a basic block has to be executed in
|
||||||
|
its entirety; since the branches of a case expression
|
||||||
|
are mutually exclusive (only one of them is executed in any given case),
|
||||||
|
we have to create a separate basic block for each branch.
|
||||||
|
Given these blocks, we then want to branch to the correct one
|
||||||
|
using the tag of the node on top of the stack.
|
||||||
|
|
||||||
|
This is exactly what we do in this function. We first peek
|
||||||
|
at the node on top of the stack, and use `CreateGEP` through
|
||||||
|
`unwrap_data_tag` to get access to its tag. What we then
|
||||||
|
need is LLVM's switch instruction, created using `CreateSwitch`.
|
||||||
|
We must provide the switch with a "default" case in case
|
||||||
|
the tag value is something we don't recognize. To do this,
|
||||||
|
we create a "safety" `BasicBlock`. With this new safety
|
||||||
|
block in hand, we're able to call `CreateSwitch`, giving it
|
||||||
|
the tag value to switch on, the safety block to default to,
|
||||||
|
and the expected number of branches (to optimize memory allocation).
|
||||||
|
|
||||||
|
Next, we create a vector of blocks, and for each branch,
|
||||||
|
we append to it a corresponding block `branch_block`, into
|
||||||
|
which we insert the LLVM IR corresponding to the
|
||||||
|
instructions of the branch. No matter the branch we take,
|
||||||
|
we eventually want to come back to the same basic block,
|
||||||
|
which will perform the usual function cleanup via Update and Slide.
|
||||||
|
We re-use the safety block for this, and use `CreateBr` at the
|
||||||
|
end of each `branch_block` to perform an unconditional jump.
|
||||||
|
|
||||||
|
After we create each of the blocks, we use the `tag_mappings`
|
||||||
|
to add cases to the switch instruction, using `addCase`. Finally,
|
||||||
|
we set the builder's insertion point to the safety block,
|
||||||
|
meaning that the next instructions will insert their
|
||||||
|
LLVM IR into that block. Since we have all branches
|
||||||
|
jump to the safety block at the end, this means that
|
||||||
|
no matter which branch we take in the case expression,
|
||||||
|
we will still execute the subsequent instructions as expected.
|
||||||
|
|
||||||
|
Let's now look at `instruction_binop`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/instruction.cpp" 139 150 >}}
|
||||||
|
|
||||||
|
In this instruction, we pop and unwrap two integers from
|
||||||
|
the stack (assuming they are integers). Depending on
|
||||||
|
the type of operation the instruction is set to, we
|
||||||
|
then push the result of the corresponding LLVM
|
||||||
|
instruction. `PLUS` calls LLVM's `CreateAdd` to insert
|
||||||
|
addition, `MINUS` calls `CreateSub`, and so on. No matter
|
||||||
|
what the operation was, we push the result onto the stack.
|
||||||
|
|
||||||
|
That's all for our instructions! We're so very close now. Let's
|
||||||
|
move on to compiling definitions.
|
||||||
|
|
||||||
|
### Definitions to LLVM IR
|
||||||
|
As with typechecking, to allow for mutually recursive functions,
|
||||||
|
we need to be able each global function from any other function.
|
||||||
|
We then take the same approah as before, going in two passes.
|
||||||
|
This leads to two new methods for `definition`:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
virtual void gen_llvm_first(llvm_context& ctx) = 0;
|
||||||
|
virtual void gen_llvm_second(llvm_context& ctx) = 0;
|
||||||
|
```
|
||||||
|
|
||||||
|
The first pass is intended to register all functions into
|
||||||
|
the `llvm_context`, making them visible to other functions.
|
||||||
|
The second pass is used to actually generate the code for
|
||||||
|
each function, now having access to all the other global
|
||||||
|
functions. Let's see the implementation for `gen_llvm_first`
|
||||||
|
for `definition_defn`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/definition.cpp" 58 60 >}}
|
||||||
|
|
||||||
|
Since `create_custom_function` already creates a function
|
||||||
|
__and__ registers it with `llvm_context`, this is
|
||||||
|
all we need. Note that we created a new member variable
|
||||||
|
for `definition_defn` which stores this newly created
|
||||||
|
function. In the second pass, we will populate this
|
||||||
|
function with LLVM IR from the definition's instructions.
|
||||||
|
|
||||||
|
We actually create functions for each of the constructors
|
||||||
|
of data types, but they're quite special: all they do is
|
||||||
|
pack their arguments! Since they don't need access to
|
||||||
|
the other global functions, we might as well create
|
||||||
|
their bodies then and there:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/definition.cpp" 101 112 >}}
|
||||||
|
|
||||||
|
Like in `definition_defn`, we use `create_custom_function`.
|
||||||
|
However, we then use `SetInsertPoint` to configure our builder to insert code into
|
||||||
|
the newly created function (which already has a `BasicBlock`,
|
||||||
|
thanks to that one previously unexplained line in `create_custom_function`!).
|
||||||
|
Since we decided to only include the Pack instruction, we generate
|
||||||
|
a call to it directly using `create_pack`. We follow this
|
||||||
|
up with `CreateRetVoid`, which tells LLVM that this is
|
||||||
|
the end of the function, and that it is now safe to return
|
||||||
|
from it.
|
||||||
|
|
||||||
|
Great! We now implement the second pass of `gen_llvm`. In
|
||||||
|
the case of `definition_defn`, we do almost exactly
|
||||||
|
what we did in the first pass of `definition_data`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/definition.cpp" 62 68 >}}
|
||||||
|
|
||||||
|
As for `definition_data`, we have nothing to do in the
|
||||||
|
second pass. We're done!
|
||||||
|
|
||||||
|
### Getting Results
|
||||||
|
We're almost there. Two things remain. The first: our implementation
|
||||||
|
of `ast_binop`, implement each binary operation as simply a function call:
|
||||||
|
`+` calls `f_plus`, and so on. But so far, we have not implemented
|
||||||
|
`f_plus`, or any other binary operator function. We do this
|
||||||
|
in `main.cpp`, creating a function `gen_llvm_internal_op`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/main.cpp" 70 83 >}}
|
||||||
|
|
||||||
|
We create a simple function body. We then append G-machine
|
||||||
|
instructions that take each argument, evaluate it,
|
||||||
|
and then perform the corresponding binary operation.
|
||||||
|
With these instructions in the body, we insert
|
||||||
|
them into a new function, just like we did in our code
|
||||||
|
for `definition_defn` and `definition_data`.
|
||||||
|
|
||||||
|
Finally, we write our `gen_llvm` function that we will
|
||||||
|
call from `main`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/main.cpp" 125 141 >}}
|
||||||
|
|
||||||
|
It first creates the functions for
|
||||||
|
`+`, `-`, `*`, and `/`. Then, it calls the first
|
||||||
|
pass of `gen_llvm` on all definitions, followed
|
||||||
|
by the second pass. Lastly, it uses LLVM's built-in
|
||||||
|
functionality to print out the generated IR in
|
||||||
|
our module, and then uses a function `output_llvm`
|
||||||
|
to create an object file ready for linking.
|
||||||
|
|
||||||
|
To be very honest, I took the `output_llvm` function
|
||||||
|
almost entirely from instructional material for my university's
|
||||||
|
compilers course. The gist of it, though, is: we determine
|
||||||
|
the target architecture and platform, specify a "generic" CPU,
|
||||||
|
create a default set of options, and then generate an object file.
|
||||||
|
Here it is:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/main.cpp" 85 123 >}}
|
||||||
|
|
||||||
|
We now add a `generate_llvm` call to `main`.
|
||||||
|
|
||||||
|
Are we there?
|
||||||
|
|
||||||
|
Let's try to compile our first example, `works1.txt`. The
|
||||||
|
file:
|
||||||
|
|
||||||
|
{{< rawblock "compiler/08/examples/works1.txt" >}}
|
||||||
|
|
||||||
|
We run the following commands in our build directory:
|
||||||
|
|
||||||
|
```
|
||||||
|
./compiler < ../examples/work1.txt
|
||||||
|
gcc -no-pie ../runtime.c program.o
|
||||||
|
./a.out
|
||||||
|
```
|
||||||
|
|
||||||
|
Nothing happens. How anticlimactic! Our runtime has no way of
|
||||||
|
printing out the result of the evaluation. Let's change that:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/runtime.c" 157 183 >}}
|
||||||
|
|
||||||
|
Rerunning our commands, we get:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 326
|
||||||
|
```
|
||||||
|
|
||||||
|
The correct result! Let's try it with `works2.txt`:
|
||||||
|
|
||||||
|
{{< rawblock "compiler/08/examples/works2.txt" >}}
|
||||||
|
|
||||||
|
And again, we get the right answer:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 326
|
||||||
|
```
|
||||||
|
|
||||||
|
This is child's play, though. Let's try with something
|
||||||
|
more complicated, like `works3.txt`:
|
||||||
|
|
||||||
|
{{< rawblock "compiler/08/examples/works3.txt" >}}
|
||||||
|
|
||||||
|
Once again, our program does exactly what we intended:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 3
|
||||||
|
```
|
||||||
|
|
||||||
|
Alright, this is neat, but we haven't yet confirmed that
|
||||||
|
lazy evaluation works. How about we try it with
|
||||||
|
`works5.txt`:
|
||||||
|
|
||||||
|
{{< rawblock "compiler/08/examples/works5.txt" >}}
|
||||||
|
|
||||||
|
Yet again, the program works:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 9
|
||||||
|
```
|
||||||
|
|
||||||
|
At last, we have a working compiler!
|
||||||
|
|
||||||
|
While this is a major victory, we are not yet
|
||||||
|
finished with the compiler altogether. While
|
||||||
|
we allocate nodes whenever we need them, we
|
||||||
|
have not once uttered the phrase `free` in our
|
||||||
|
runtime. Our language works, but we have no way
|
||||||
|
of comparing numbers, no lambdas, no `let/in`.
|
||||||
|
In the next several posts, we will improve
|
||||||
|
our compiler to properly free unused memory
|
||||||
|
usign a __garbage collector__, implement
|
||||||
|
lambda functions using __lambda lifting__,
|
||||||
|
and use our Alloc instruction to implement `let/in` expressions. See
|
||||||
|
you there!
|
||||||
102
content/blog/better_explanations.md
Normal file
102
content/blog/better_explanations.md
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
---
|
||||||
|
title: "Thoughts on Better Explanations"
|
||||||
|
date: 2019-10-12T00:33:02-07:00
|
||||||
|
tags: ["Language Server Protocol"]
|
||||||
|
---
|
||||||
|
|
||||||
|
How do you explain how to write a program?
|
||||||
|
|
||||||
|
Instructional material is becoming more and more popular on the web, with
|
||||||
|
thousands of programming tutorials for languages, frameworks,
|
||||||
|
and technologies created on YouTube, Medium, and peole's
|
||||||
|
personal sites. And yet, there seem to be little standardization or
|
||||||
|
progress towards an "effective" way. Everyone is pasting code
|
||||||
|
examples, showing gists, or even sharing whole projects on GitHub.
|
||||||
|
When I was writing the earliest posts on this site, I did the same.
|
||||||
|
Write some code, copy paste it, be done. Write some code, link it,
|
||||||
|
be done. If I'm feeling fancy, write some code, gist it, be done.
|
||||||
|
It's not unlikely for code presented in this way
|
||||||
|
to become outdated and dysfunctional.
|
||||||
|
|
||||||
|
I discovered a whole new perspective when going through
|
||||||
|
[Software Foundations](https://softwarefoundations.cis.upenn.edu/). What's
|
||||||
|
different about that book is that the line between source code and instructional
|
||||||
|
text is blurred - the HTML is generated from the comments in the Coq file, and
|
||||||
|
code from the Coq file is included as snippets in the book. Rather than
|
||||||
|
having readers piece together the snippets from the HTML, it simply directed
|
||||||
|
them to the Coq file from which the page was generated. It maintained
|
||||||
|
both the benefits of a live code example, and of a textbook written to teach,
|
||||||
|
not to simply explain what the code does.
|
||||||
|
|
||||||
|
This is reminiscent of [Literate Programming](https://en.wikipedia.org/wiki/Literate_programming),
|
||||||
|
a style of programming in which the explanation of the program, in human-oriented order, is presented,
|
||||||
|
with code as supporting material. Tools such as CWEB implement Literate Programming, allowing
|
||||||
|
users to write files that are then converted into C source, and can be compiled as usual. I was intrigued
|
||||||
|
by the idea, but in all honesty, found it lacking.
|
||||||
|
|
||||||
|
For one, there is the problem of an extra processing step. Compilers are written to compile C, and not
|
||||||
|
CWEB files. Thus, a program must take CWEB source, convert it to C, and then a compiler must
|
||||||
|
convert the C code to machine language. This doesn't feel elegant - you're effectively
|
||||||
|
stripping the CWEB source files of the text you added to them. In technical terms, it's not really
|
||||||
|
that big of an issue - software build systems already have support for multiple processing steps,
|
||||||
|
and it would be hard to CWEB a piece of software large enough that the intermediate step will cause problems.
|
||||||
|
|
||||||
|
Another issue is the lack of universality. CWEB is specialized for C. WEB, the original literate programming
|
||||||
|
tool, is specialized for Pascal. There's tools that are language agnostic, of course, such as noweb. But
|
||||||
|
the [Wikipedia page for noweb](https://en.wikipedia.org/wiki/Noweb) drops this bomb:
|
||||||
|
|
||||||
|
> noweb defines a specific file format and a file is likely to interleave three different formats
|
||||||
|
> (noweb, latex and the language used for the software). This is not recognised by other software development
|
||||||
|
> tools and consequently using noweb excludes the use of UML or code documentation tools.
|
||||||
|
|
||||||
|
This may be the worst trade deal in the history of trade deals, maybe ever! By trying to explain how our
|
||||||
|
code works, __we sacrifce all other tooling__. Worse, because Literal Programming encourages presenting
|
||||||
|
code in fragments and out of order, it is particularly difficult to reason about programs in an automated
|
||||||
|
setting.
|
||||||
|
|
||||||
|
When I present code to a reader, I want to write it with the use of existing tooling. I want my syntax
|
||||||
|
highlighting. I want my linting. I want my build system. And in the same way, a user who is reading
|
||||||
|
my code wants to be able to view it, change it, experiment with it. Furthermore, though, I want
|
||||||
|
to be able to guide the reader's attention. Text-in-comments works great for Coq, but other languages like
|
||||||
|
C++, in which the order of declarations matters, may not be as suited for such an approach.
|
||||||
|
|
||||||
|
In essense, I want:
|
||||||
|
|
||||||
|
* The power of language-specific tooling, without having to extend the tooling itself
|
||||||
|
* A universal way of describing a program in any language
|
||||||
|
* A way of maintaining synchrony between the explanation and the source
|
||||||
|
|
||||||
|
I have an idea of a piece of software that can do such a thing.
|
||||||
|
|
||||||
|
### A Language Server Based Tool
|
||||||
|
It is a well known problem that various editors support different languages
|
||||||
|
with mixed success. The idea of the Language Server Protocol is to allow
|
||||||
|
for a program (the server) to be in charge of making sense of the code, and then
|
||||||
|
communicate the results to an editor. The editor, in that case,
|
||||||
|
doesn't have to do as much heavy lifting, and instead just queries
|
||||||
|
the language server when it needs information.
|
||||||
|
|
||||||
|
While this technology is used for text editors, I think it can
|
||||||
|
be adapted to educational texts that reference a particular
|
||||||
|
codebase. I envision the following workflow:
|
||||||
|
|
||||||
|
1. An author writes their tutorial/book/blog post
|
||||||
|
in their markup language of choice (Markdown).
|
||||||
|
2. They reference a fragment of code (a function, a variable)
|
||||||
|
through a specialized syntax.
|
||||||
|
3. When the HTML/LaTeX output is created, a language server
|
||||||
|
is started. The language server uses information from
|
||||||
|
the references in step 2 to insert code fragments into
|
||||||
|
the generated output.
|
||||||
|
|
||||||
|
After each "conversion" of source text to HTML/LaTeX, the
|
||||||
|
code in the generated snippets will be in sync with the codebase.
|
||||||
|
At the same time, changing the source text will not require changing
|
||||||
|
the source files. Finally, since language servers exist for most
|
||||||
|
established languages, this sytem can work nearly out of the box,
|
||||||
|
and even be added to established projects with no changes to the projects
|
||||||
|
themselves.
|
||||||
|
|
||||||
|
Of course, this is just a rough idea. I'm not sure how plausible it is
|
||||||
|
to include snippets with the use of Language Server Protocol. But
|
||||||
|
I certainly would like to try!
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user