Compare commits
4 Commits
153349f3d5
...
e7f0ccfa16
Author | SHA1 | Date | |
---|---|---|---|
e7f0ccfa16 | |||
e5d01a4e19 | |||
b7d72f2fbf | |||
281dbbd174 |
129
code/compiler/09/examples/primes.txt
Normal file
129
code/compiler/09/examples/primes.txt
Normal file
|
@ -0,0 +1,129 @@
|
|||
data List = { Nil, Cons Nat List }
|
||||
data Bool = { True, False }
|
||||
data Nat = { O, S Nat }
|
||||
|
||||
defn ifN c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
|
||||
defn ifL c t e = {
|
||||
case c of {
|
||||
True -> { t }
|
||||
False -> { e }
|
||||
}
|
||||
}
|
||||
|
||||
defn toInt n = {
|
||||
case n of {
|
||||
O -> { 0 }
|
||||
S np -> { 1 + toInt np }
|
||||
}
|
||||
}
|
||||
|
||||
defn lte n m = {
|
||||
case m of {
|
||||
O -> {
|
||||
case n of {
|
||||
O -> { True }
|
||||
S np -> { False }
|
||||
}
|
||||
}
|
||||
S mp -> {
|
||||
case n of {
|
||||
O -> { True }
|
||||
S np -> { lte np mp }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn minus n m = {
|
||||
case m of {
|
||||
O -> { n }
|
||||
S mp -> {
|
||||
case n of {
|
||||
O -> { O }
|
||||
S np -> {
|
||||
minus np mp
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn mod n m = {
|
||||
ifN (lte m n) (mod (minus n m) m) n
|
||||
}
|
||||
|
||||
defn notDivisibleBy n m = {
|
||||
case (mod m n) of {
|
||||
O -> { False }
|
||||
S mp -> { True }
|
||||
}
|
||||
}
|
||||
|
||||
defn filter f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { ifL (f x) (Cons x (filter f xs)) (filter f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn map f l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> { Cons (f x) (map f xs) }
|
||||
}
|
||||
}
|
||||
|
||||
defn nats = {
|
||||
Cons (S (S O)) (map S nats)
|
||||
}
|
||||
|
||||
defn primesRec l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons p xs -> { Cons p (primesRec (filter (notDivisibleBy p) xs)) }
|
||||
}
|
||||
}
|
||||
|
||||
defn primes = {
|
||||
primesRec nats
|
||||
}
|
||||
|
||||
defn take n l = {
|
||||
case l of {
|
||||
Nil -> { Nil }
|
||||
Cons x xs -> {
|
||||
case n of {
|
||||
O -> { Nil }
|
||||
S np -> { Cons x (take np xs) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defn head l = {
|
||||
case l of {
|
||||
Nil -> { O }
|
||||
Cons x xs -> { x }
|
||||
}
|
||||
}
|
||||
|
||||
defn reverseAcc a l = {
|
||||
case l of {
|
||||
Nil -> { a }
|
||||
Cons x xs -> { reverseAcc (Cons x a) xs }
|
||||
}
|
||||
}
|
||||
|
||||
defn reverse l = {
|
||||
reverseAcc Nil l
|
||||
}
|
||||
|
||||
defn main = {
|
||||
toInt (head (reverse (take ((S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S O))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) primes)))
|
||||
}
|
|
@ -15,7 +15,7 @@ void instruction_pushint::print(int indent, std::ostream& to) const {
|
|||
}
|
||||
|
||||
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
ctx.create_push(f, ctx.create_num(ctx.create_i32(value)));
|
||||
ctx.create_push(f, ctx.create_num(f, ctx.create_i32(value)));
|
||||
}
|
||||
|
||||
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||
|
@ -26,7 +26,7 @@ void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
|||
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto& global_f = ctx.custom_functions.at("f_" + name);
|
||||
auto arity = ctx.create_i32(global_f->arity);
|
||||
ctx.create_push(f, ctx.create_global(global_f->function, arity));
|
||||
ctx.create_push(f, ctx.create_global(f, global_f->function, arity));
|
||||
}
|
||||
|
||||
void instruction_push::print(int indent, std::ostream& to) const {
|
||||
|
@ -55,7 +55,7 @@ void instruction_mkapp::print(int indent, std::ostream& to) const {
|
|||
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||
auto left = ctx.create_pop(f);
|
||||
auto right = ctx.create_pop(f);
|
||||
ctx.create_push(f, ctx.create_app(left, right));
|
||||
ctx.create_push(f, ctx.create_app(f, left, right));
|
||||
}
|
||||
|
||||
void instruction_update::print(int indent, std::ostream& to) const {
|
||||
|
@ -146,7 +146,7 @@ void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
|||
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
|
||||
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
|
||||
}
|
||||
ctx.create_push(f, ctx.create_num(result));
|
||||
ctx.create_push(f, ctx.create_num(f, result));
|
||||
}
|
||||
|
||||
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||
|
|
|
@ -5,7 +5,9 @@ using namespace llvm;
|
|||
|
||||
void llvm_context::create_types() {
|
||||
stack_type = StructType::create(ctx, "stack");
|
||||
gmachine_type = StructType::create(ctx, "gmachine");
|
||||
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||
gmachine_ptr_type = PointerType::getUnqual(gmachine_type);
|
||||
tag_type = IntegerType::getInt8Ty(ctx);
|
||||
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||
|
@ -14,10 +16,18 @@ void llvm_context::create_types() {
|
|||
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||
function_type = FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false);
|
||||
function_type = FunctionType::get(Type::getVoidTy(ctx), { gmachine_ptr_type }, false);
|
||||
|
||||
gmachine_type->setBody(
|
||||
stack_ptr_type,
|
||||
node_ptr_type,
|
||||
IntegerType::getInt64Ty(ctx),
|
||||
IntegerType::getInt64Ty(ctx)
|
||||
);
|
||||
struct_types.at("node_base")->setBody(
|
||||
IntegerType::getInt32Ty(ctx)
|
||||
IntegerType::getInt32Ty(ctx),
|
||||
IntegerType::getInt8Ty(ctx),
|
||||
node_ptr_type
|
||||
);
|
||||
struct_types.at("node_app")->setBody(
|
||||
struct_types.at("node_base"),
|
||||
|
@ -82,34 +92,40 @@ void llvm_context::create_functions() {
|
|||
"stack_popn",
|
||||
&module
|
||||
);
|
||||
functions["stack_slide"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||
functions["gmachine_slide"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_slide",
|
||||
"gmachine_slide",
|
||||
&module
|
||||
);
|
||||
functions["stack_update"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||
functions["gmachine_update"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_update",
|
||||
"gmachine_update",
|
||||
&module
|
||||
);
|
||||
functions["stack_alloc"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||
functions["gmachine_alloc"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_alloc",
|
||||
"gmachine_alloc",
|
||||
&module
|
||||
);
|
||||
functions["stack_pack"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type, tag_type }, false),
|
||||
functions["gmachine_pack"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type, tag_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_pack",
|
||||
"gmachine_pack",
|
||||
&module
|
||||
);
|
||||
functions["stack_split"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||
functions["gmachine_split"] = Function::Create(
|
||||
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_split",
|
||||
"gmachine_split",
|
||||
&module
|
||||
);
|
||||
functions["gmachine_track"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { gmachine_ptr_type, node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"gmachine_track",
|
||||
&module
|
||||
);
|
||||
|
||||
|
@ -139,14 +155,8 @@ void llvm_context::create_functions() {
|
|||
&module
|
||||
);
|
||||
|
||||
functions["eval"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"eval",
|
||||
&module
|
||||
);
|
||||
functions["unwind"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||
FunctionType::get(void_type, { gmachine_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"unwind",
|
||||
&module
|
||||
|
@ -165,44 +175,43 @@ ConstantInt* llvm_context::create_size(size_t i) {
|
|||
|
||||
Value* llvm_context::create_pop(Function* f) {
|
||||
auto pop_f = functions.at("stack_pop");
|
||||
return builder.CreateCall(pop_f, { f->arg_begin() });
|
||||
return builder.CreateCall(pop_f, { unwrap_gmachine_stack_ptr(f->arg_begin()) });
|
||||
}
|
||||
Value* llvm_context::create_peek(Function* f, Value* off) {
|
||||
auto peek_f = functions.at("stack_peek");
|
||||
return builder.CreateCall(peek_f, { f->arg_begin(), off });
|
||||
return builder.CreateCall(peek_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||
}
|
||||
void llvm_context::create_push(Function* f, Value* v) {
|
||||
auto push_f = functions.at("stack_push");
|
||||
builder.CreateCall(push_f, { f->arg_begin(), v });
|
||||
builder.CreateCall(push_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), v });
|
||||
}
|
||||
void llvm_context::create_popn(Function* f, Value* off) {
|
||||
auto popn_f = functions.at("stack_popn");
|
||||
builder.CreateCall(popn_f, { f->arg_begin(), off });
|
||||
builder.CreateCall(popn_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||
}
|
||||
void llvm_context::create_update(Function* f, Value* off) {
|
||||
auto update_f = functions.at("stack_update");
|
||||
auto update_f = functions.at("gmachine_update");
|
||||
builder.CreateCall(update_f, { f->arg_begin(), off });
|
||||
}
|
||||
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
|
||||
auto pack_f = functions.at("stack_pack");
|
||||
auto pack_f = functions.at("gmachine_pack");
|
||||
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
|
||||
}
|
||||
void llvm_context::create_split(Function* f, Value* c) {
|
||||
auto split_f = functions.at("stack_split");
|
||||
auto split_f = functions.at("gmachine_split");
|
||||
builder.CreateCall(split_f, { f->arg_begin(), c });
|
||||
}
|
||||
void llvm_context::create_slide(Function* f, Value* off) {
|
||||
auto slide_f = functions.at("stack_slide");
|
||||
auto slide_f = functions.at("gmachine_slide");
|
||||
builder.CreateCall(slide_f, { f->arg_begin(), off });
|
||||
}
|
||||
void llvm_context::create_alloc(Function* f, Value* n) {
|
||||
auto alloc_f = functions.at("stack_alloc");
|
||||
auto alloc_f = functions.at("gmachine_alloc");
|
||||
builder.CreateCall(alloc_f, { f->arg_begin(), n });
|
||||
}
|
||||
|
||||
Value* llvm_context::create_eval(Value* e) {
|
||||
auto eval_f = functions.at("eval");
|
||||
return builder.CreateCall(eval_f, { e });
|
||||
Value* llvm_context::create_track(Function* f, Value* v) {
|
||||
auto track_f = functions.at("gmachine_track");
|
||||
return builder.CreateCall(track_f, { f->arg_begin(), v });
|
||||
}
|
||||
|
||||
void llvm_context::create_unwind(Function* f) {
|
||||
|
@ -210,6 +219,11 @@ void llvm_context::create_unwind(Function* f) {
|
|||
builder.CreateCall(unwind_f, { f->args().begin() });
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
|
||||
auto offset_0 = create_i32(0);
|
||||
return builder.CreateGEP(g, { offset_0, offset_0 });
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_num(Value* v) {
|
||||
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
|
||||
auto cast = builder.CreatePointerCast(v, num_ptr_type);
|
||||
|
@ -218,9 +232,10 @@ Value* llvm_context::unwrap_num(Value* v) {
|
|||
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||
return builder.CreateLoad(int_ptr);
|
||||
}
|
||||
Value* llvm_context::create_num(Value* v) {
|
||||
Value* llvm_context::create_num(Function* f, Value* v) {
|
||||
auto alloc_num_f = functions.at("alloc_num");
|
||||
return builder.CreateCall(alloc_num_f, { v });
|
||||
auto alloc_num_call = builder.CreateCall(alloc_num_f, { v });
|
||||
return create_track(f, alloc_num_call);
|
||||
}
|
||||
|
||||
Value* llvm_context::unwrap_data_tag(Value* v) {
|
||||
|
@ -232,20 +247,20 @@ Value* llvm_context::unwrap_data_tag(Value* v) {
|
|||
return builder.CreateLoad(tag_ptr);
|
||||
}
|
||||
|
||||
Value* llvm_context::create_global(Value* f, Value* a) {
|
||||
Value* llvm_context::create_global(Function* f, Value* gf, Value* a) {
|
||||
auto alloc_global_f = functions.at("alloc_global");
|
||||
return builder.CreateCall(alloc_global_f, { f, a });
|
||||
auto alloc_global_call = builder.CreateCall(alloc_global_f, { gf, a });
|
||||
return create_track(f, alloc_global_call);
|
||||
}
|
||||
|
||||
Value* llvm_context::create_app(Value* l, Value* r) {
|
||||
Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
|
||||
auto alloc_app_f = functions.at("alloc_app");
|
||||
return builder.CreateCall(alloc_app_f, { l, r });
|
||||
auto alloc_app_call = builder.CreateCall(alloc_app_f, { l, r });
|
||||
return create_track(f, alloc_app_call);
|
||||
}
|
||||
|
||||
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
|
||||
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||
auto function_type =
|
||||
llvm::FunctionType::get(void_type, { stack_ptr_type }, false);
|
||||
auto new_function = llvm::Function::Create(
|
||||
function_type,
|
||||
llvm::Function::LinkageTypes::ExternalLinkage,
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Value.h>
|
||||
#include <map>
|
||||
|
||||
struct llvm_context {
|
||||
|
@ -23,7 +24,9 @@ struct llvm_context {
|
|||
std::map<std::string, llvm::StructType*> struct_types;
|
||||
|
||||
llvm::StructType* stack_type;
|
||||
llvm::StructType* gmachine_type;
|
||||
llvm::PointerType* stack_ptr_type;
|
||||
llvm::PointerType* gmachine_ptr_type;
|
||||
llvm::PointerType* node_ptr_type;
|
||||
llvm::IntegerType* tag_type;
|
||||
llvm::FunctionType* function_type;
|
||||
|
@ -50,18 +53,20 @@ struct llvm_context {
|
|||
void create_split(llvm::Function*, llvm::Value*);
|
||||
void create_slide(llvm::Function*, llvm::Value*);
|
||||
void create_alloc(llvm::Function*, llvm::Value*);
|
||||
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||
|
||||
llvm::Value* create_eval(llvm::Value*);
|
||||
void create_unwind(llvm::Function*);
|
||||
|
||||
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||
|
||||
llvm::Value* unwrap_num(llvm::Value*);
|
||||
llvm::Value* create_num(llvm::Value*);
|
||||
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||
|
||||
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||
|
||||
llvm::Value* create_global(llvm::Value*, llvm::Value*);
|
||||
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
|
||||
llvm::Value* create_app(llvm::Value*, llvm::Value*);
|
||||
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||
|
||||
llvm::Function* create_custom_function(std::string name, int32_t arity);
|
||||
};
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
struct node_base* alloc_node() {
|
||||
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||
new_node->gc_next = NULL;
|
||||
new_node->gc_reachable = 0;
|
||||
assert(new_node != NULL);
|
||||
return new_node;
|
||||
}
|
||||
|
@ -25,7 +27,7 @@ struct node_num* alloc_num(int32_t n) {
|
|||
return node;
|
||||
}
|
||||
|
||||
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a) {
|
||||
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
||||
struct node_global* node = (struct node_global*) alloc_node();
|
||||
node->base.tag = NODE_GLOBAL;
|
||||
node->arity = a;
|
||||
|
@ -40,6 +42,33 @@ struct node_ind* alloc_ind(struct node_base* n) {
|
|||
return node;
|
||||
}
|
||||
|
||||
void free_node_direct(struct node_base* n) {
|
||||
if(n->tag == NODE_DATA) {
|
||||
free(((struct node_data*) n)->array);
|
||||
}
|
||||
}
|
||||
|
||||
void gc_visit_node(struct node_base* n) {
|
||||
if(n->gc_reachable) return;
|
||||
n->gc_reachable = 1;
|
||||
|
||||
if(n->tag == NODE_APP) {
|
||||
struct node_app* app = (struct node_app*) n;
|
||||
gc_visit_node(app->left);
|
||||
gc_visit_node(app->right);
|
||||
} if(n->tag == NODE_IND) {
|
||||
struct node_ind* ind = (struct node_ind*) n;
|
||||
gc_visit_node(ind->next);
|
||||
} if(n->tag == NODE_DATA) {
|
||||
struct node_data* data = (struct node_data*) n;
|
||||
struct node_base** to_visit = data->array;
|
||||
while(*to_visit) {
|
||||
gc_visit_node(*to_visit);
|
||||
to_visit++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void stack_init(struct stack* s) {
|
||||
s->size = 4;
|
||||
s->count = 0;
|
||||
|
@ -74,49 +103,109 @@ void stack_popn(struct stack* s, size_t n) {
|
|||
s->count -= n;
|
||||
}
|
||||
|
||||
void stack_slide(struct stack* s, size_t n) {
|
||||
assert(s->count > n);
|
||||
s->data[s->count - n - 1] = s->data[s->count - 1];
|
||||
s->count -= n;
|
||||
void gmachine_init(struct gmachine* g) {
|
||||
stack_init(&g->stack);
|
||||
g->gc_nodes = NULL;
|
||||
g->gc_node_count = 0;
|
||||
g->gc_node_threshold = 128;
|
||||
}
|
||||
|
||||
void stack_update(struct stack* s, size_t o) {
|
||||
assert(s->count > o + 1);
|
||||
struct node_ind* ind = (struct node_ind*) s->data[s->count - o - 2];
|
||||
ind->base.tag = NODE_IND;
|
||||
ind->next = s->data[s->count -= 1];
|
||||
}
|
||||
void gmachine_free(struct gmachine* g) {
|
||||
stack_free(&g->stack);
|
||||
struct node_base* to_free = g->gc_nodes;
|
||||
struct node_base* next;
|
||||
|
||||
void stack_alloc(struct stack* s, size_t o) {
|
||||
while(o--) {
|
||||
stack_push(s, (struct node_base*) alloc_ind(NULL));
|
||||
while(to_free) {
|
||||
next = to_free->gc_next;
|
||||
free_node_direct(to_free);
|
||||
free(to_free);
|
||||
to_free = next;
|
||||
}
|
||||
}
|
||||
|
||||
void stack_pack(struct stack* s, size_t n, int8_t t) {
|
||||
assert(s->count >= n);
|
||||
void gmachine_slide(struct gmachine* g, size_t n) {
|
||||
assert(g->stack.count > n);
|
||||
g->stack.data[g->stack.count - n - 1] = g->stack.data[g->stack.count - 1];
|
||||
g->stack.count -= n;
|
||||
}
|
||||
|
||||
struct node_base** data = malloc(sizeof(*data) * n);
|
||||
void gmachine_update(struct gmachine* g, size_t o) {
|
||||
assert(g->stack.count > o + 1);
|
||||
struct node_ind* ind =
|
||||
(struct node_ind*) g->stack.data[g->stack.count - o - 2];
|
||||
ind->base.tag = NODE_IND;
|
||||
ind->next = g->stack.data[g->stack.count -= 1];
|
||||
}
|
||||
|
||||
void gmachine_alloc(struct gmachine* g, size_t o) {
|
||||
while(o--) {
|
||||
stack_push(&g->stack,
|
||||
gmachine_track(g, (struct node_base*) alloc_ind(NULL)));
|
||||
}
|
||||
}
|
||||
|
||||
void gmachine_pack(struct gmachine* g, size_t n, int8_t t) {
|
||||
assert(g->stack.count >= n);
|
||||
|
||||
struct node_base** data = malloc(sizeof(*data) * (n + 1));
|
||||
assert(data != NULL);
|
||||
memcpy(data, &s->data[s->count - n], n * sizeof(*data));
|
||||
memcpy(data, &g->stack.data[g->stack.count - n], n * sizeof(*data));
|
||||
data[n] = NULL;
|
||||
|
||||
struct node_data* new_node = (struct node_data*) alloc_node();
|
||||
new_node->array = data;
|
||||
new_node->base.tag = NODE_DATA;
|
||||
new_node->tag = t;
|
||||
|
||||
stack_popn(s, n);
|
||||
stack_push(s, (struct node_base*) new_node);
|
||||
stack_popn(&g->stack, n);
|
||||
stack_push(&g->stack, gmachine_track(g, (struct node_base*) new_node));
|
||||
}
|
||||
|
||||
void stack_split(struct stack* s, size_t n) {
|
||||
struct node_data* node = (struct node_data*) stack_pop(s);
|
||||
void gmachine_split(struct gmachine* g, size_t n) {
|
||||
struct node_data* node = (struct node_data*) stack_pop(&g->stack);
|
||||
for(size_t i = 0; i < n; i++) {
|
||||
stack_push(s, node->array[i]);
|
||||
stack_push(&g->stack, node->array[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void unwind(struct stack* s) {
|
||||
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
||||
g->gc_node_count++;
|
||||
b->gc_next = g->gc_nodes;
|
||||
g->gc_nodes = b;
|
||||
|
||||
if(g->gc_node_count >= g->gc_node_threshold) {
|
||||
uint64_t nodes_before = g->gc_node_count;
|
||||
gc_visit_node(b);
|
||||
gmachine_gc(g);
|
||||
g->gc_node_threshold = g->gc_node_count * 2;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
void gmachine_gc(struct gmachine* g) {
|
||||
for(size_t i = 0; i < g->stack.count; i++) {
|
||||
gc_visit_node(g->stack.data[i]);
|
||||
}
|
||||
|
||||
struct node_base** head_ptr = &g->gc_nodes;
|
||||
while(*head_ptr) {
|
||||
if((*head_ptr)->gc_reachable) {
|
||||
(*head_ptr)->gc_reachable = 0;
|
||||
head_ptr = &(*head_ptr)->gc_next;
|
||||
} else {
|
||||
struct node_base* to_free = *head_ptr;
|
||||
*head_ptr = to_free->gc_next;
|
||||
free_node_direct(to_free);
|
||||
free(to_free);
|
||||
g->gc_node_count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void unwind(struct gmachine* g) {
|
||||
struct stack* s = &g->stack;
|
||||
|
||||
while(1) {
|
||||
struct node_base* peek = stack_peek(s, 0);
|
||||
if(peek->tag == NODE_APP) {
|
||||
|
@ -131,7 +220,7 @@ void unwind(struct stack* s) {
|
|||
= ((struct node_app*) s->data[s->count - i - 1])->right;
|
||||
}
|
||||
|
||||
n->function(s);
|
||||
n->function(g);
|
||||
} else if(peek->tag == NODE_IND) {
|
||||
struct node_ind* n = (struct node_ind*) peek;
|
||||
stack_pop(s);
|
||||
|
@ -142,17 +231,7 @@ void unwind(struct stack* s) {
|
|||
}
|
||||
}
|
||||
|
||||
struct node_base* eval(struct node_base* n) {
|
||||
struct stack program_stack;
|
||||
stack_init(&program_stack);
|
||||
stack_push(&program_stack, n);
|
||||
unwind(&program_stack);
|
||||
struct node_base* result = stack_pop(&program_stack);
|
||||
stack_free(&program_stack);
|
||||
return result;
|
||||
}
|
||||
|
||||
extern void f_main(struct stack* s);
|
||||
extern void f_main(struct gmachine* s);
|
||||
|
||||
void print_node(struct node_base* n) {
|
||||
if(n->tag == NODE_APP) {
|
||||
|
@ -174,10 +253,17 @@ void print_node(struct node_base* n) {
|
|||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
struct gmachine gmachine;
|
||||
struct node_global* first_node = alloc_global(f_main, 0);
|
||||
struct node_base* result = eval((struct node_base*) first_node);
|
||||
struct node_base* result;
|
||||
|
||||
gmachine_init(&gmachine);
|
||||
gmachine_track(&gmachine, (struct node_base*) first_node);
|
||||
stack_push(&gmachine.stack, (struct node_base*) first_node);
|
||||
unwind(&gmachine);
|
||||
result = stack_pop(&gmachine.stack);
|
||||
printf("Result: ");
|
||||
print_node(result);
|
||||
putchar('\n');
|
||||
gmachine_free(&gmachine);
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#pragma once
|
||||
#include <stdlib.h>
|
||||
|
||||
struct stack;
|
||||
struct gmachine;
|
||||
|
||||
enum node_tag {
|
||||
NODE_APP,
|
||||
|
@ -13,6 +13,8 @@ enum node_tag {
|
|||
|
||||
struct node_base {
|
||||
enum node_tag tag;
|
||||
int8_t gc_reachable;
|
||||
struct node_base* gc_next;
|
||||
};
|
||||
|
||||
struct node_app {
|
||||
|
@ -29,7 +31,7 @@ struct node_num {
|
|||
struct node_global {
|
||||
struct node_base base;
|
||||
int32_t arity;
|
||||
void (*function)(struct stack*);
|
||||
void (*function)(struct gmachine*);
|
||||
};
|
||||
|
||||
struct node_ind {
|
||||
|
@ -46,8 +48,10 @@ struct node_data {
|
|||
struct node_base* alloc_node();
|
||||
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
|
||||
struct node_num* alloc_num(int32_t n);
|
||||
struct node_global* alloc_global(void (*f)(struct stack*), int32_t a);
|
||||
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a);
|
||||
struct node_ind* alloc_ind(struct node_base* n);
|
||||
void free_node_direct(struct node_base*);
|
||||
void gc_visit_node(struct node_base*);
|
||||
|
||||
struct stack {
|
||||
size_t size;
|
||||
|
@ -61,10 +65,20 @@ void stack_push(struct stack* s, struct node_base* n);
|
|||
struct node_base* stack_pop(struct stack* s);
|
||||
struct node_base* stack_peek(struct stack* s, size_t o);
|
||||
void stack_popn(struct stack* s, size_t n);
|
||||
void stack_slide(struct stack* s, size_t n);
|
||||
void stack_update(struct stack* s, size_t o);
|
||||
void stack_alloc(struct stack* s, size_t o);
|
||||
void stack_pack(struct stack* s, size_t n, int8_t t);
|
||||
void stack_split(struct stack* s, size_t n);
|
||||
|
||||
struct node_base* eval(struct node_base* n);
|
||||
struct gmachine {
|
||||
struct stack stack;
|
||||
struct node_base* gc_nodes;
|
||||
int64_t gc_node_count;
|
||||
int64_t gc_node_threshold;
|
||||
};
|
||||
|
||||
void gmachine_init(struct gmachine* g);
|
||||
void gmachine_free(struct gmachine* g);
|
||||
void gmachine_slide(struct gmachine* g, size_t n);
|
||||
void gmachine_update(struct gmachine* g, size_t o);
|
||||
void gmachine_alloc(struct gmachine* g, size_t o);
|
||||
void gmachine_pack(struct gmachine* g, size_t n, int8_t t);
|
||||
void gmachine_split(struct gmachine* g, size_t n);
|
||||
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b);
|
||||
void gmachine_gc(struct gmachine* g);
|
||||
|
|
|
@ -574,5 +574,6 @@ In the next several posts, we will improve
|
|||
our compiler to properly free unused memory
|
||||
usign a __garbage collector__, implement
|
||||
lambda functions using __lambda lifting__,
|
||||
and use our Alloc instruction to implement `let/in` expressions. See
|
||||
you there!
|
||||
and use our Alloc instruction to implement `let/in` expressions.
|
||||
We get started on the first of these tasks in
|
||||
[Part 9 - Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}}).
|
||||
|
|
559
content/blog/09_compiler_garbage_collection.md
Normal file
559
content/blog/09_compiler_garbage_collection.md
Normal file
|
@ -0,0 +1,559 @@
|
|||
---
|
||||
title: Compiling a Functional Language Using C++, Part 9 - Garbage Collection
|
||||
date: 2020-01-27T20:35:15-08:00
|
||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||
draft: true
|
||||
---
|
||||
|
||||
> "When will you learn? When will you learn that __your actions have consequences?__"
|
||||
|
||||
So far, we've entirely ignored the problem of memory management. Every time
|
||||
that we need a new node for our growing graph, we simply ask for more memory
|
||||
from the runtime with `malloc`. But selfishly, even when we no longer require
|
||||
the memory allocated for a particular node, when that node is no longer in use,
|
||||
we do not `free` it. In fact, our runtime currently has no idea about
|
||||
which nodes are needed and which ones are ready to be discarded.
|
||||
|
||||
To convince ourselves that this is a problem, let's first assess the extent of the damage.
|
||||
Consider the program from `works3.txt`:
|
||||
|
||||
{{< rawblock "compiler/09/examples/works3.txt" >}}
|
||||
|
||||
Compiling and running this program through `valgrind`, we get the following output:
|
||||
|
||||
```
|
||||
==XXXX== LEAK SUMMARY:
|
||||
==XXXX== definitely lost: 288 bytes in 12 blocks
|
||||
==XXXX== indirectly lost: 768 bytes in 34 blocks
|
||||
==XXXX== possibly lost: 0 bytes in 0 blocks
|
||||
==XXXX== still reachable: 0 bytes in 0 blocks
|
||||
==XXXX== suppressed: 0 bytes in 0 blocks
|
||||
```
|
||||
|
||||
We lost 1056 bytes of memory, just to return the length of a list
|
||||
with 3 elements. The problem of leaking memory is very real.
|
||||
|
||||
How do we solve this issue? We can't embed memory management into our language;
|
||||
We want to keep it pure, and managing memory is typically pretty far from
|
||||
that goal. Instead, we will make our runtime do the work of freeing memory.
|
||||
Even then, this is a nontrivial goal: our runtime manipulates graphs, each
|
||||
of which can be combined with others in arbitrary ways. In general, there
|
||||
will not always be a _single_ node that, when freed, will guarantee that
|
||||
another node can be freed as well. Instead, it's very possible in our
|
||||
graphs that two parent nodes both refer to a third, and only when both
|
||||
parents are freed can we free that third node itself. Consider,
|
||||
for instance, the function `square` as follows:
|
||||
|
||||
```
|
||||
defn square x = {
|
||||
x * x
|
||||
}
|
||||
```
|
||||
|
||||
This function will receive, on top of the stack, a single graph representing `x`.
|
||||
It will then create two applications of a global `(+)` function, each time
|
||||
to the graph of `x`. Thus, it will construct a tree with two `App` nodes, both
|
||||
of which
|
||||
{{< sidenote "right" "lazy-note" "must keep track of a reference to x.">}}
|
||||
We later take advantage of this, by replacing the graph of <code>x</code> with the
|
||||
result of evaluating it. Since both <code>App</code> nodes point to the same
|
||||
graph, when we evaluate it once, each node observes this update, and is not
|
||||
required to evaluate <code>x</code> again. With this, we achieve lazy evaluation.
|
||||
{{< /sidenote >}} The runtime will have to wait until both `App` nodes
|
||||
are freed before it can free the graph of `x`.
|
||||
|
||||
This seems simple enough! If there are multiple things that may reference a node
|
||||
in the graph, why don't we just keep track of how many there are? Once we know
|
||||
that no more things are still referencing a node, we can free it. This is
|
||||
called [reference counting](https://en.wikipedia.org/wiki/Reference_counting).
|
||||
Reference counting is a valid technique, but unfortunately, it will not suit us.
|
||||
The reason for this is that our language may produce
|
||||
[cyclic graphs](https://en.wikipedia.org/wiki/Cycle_(graph_theory)). Consider,
|
||||
for example, this definition of an infinite list of the number 1:
|
||||
|
||||
```
|
||||
defn ones = { Cons 1 ones }
|
||||
```
|
||||
|
||||
Envisioning the graph of the tree, we can see `ones` as an application
|
||||
of the constructor `Cons` to two arguments, one of which is `ones` again.
|
||||
{{< sidenote "right" "recursive-note" "It refers to itself!" >}}
|
||||
Things are actually more complicated than this. In our current language,
|
||||
recursive definitions are only possible in function definitions (like
|
||||
<code>ones</code>). In our runtime, each time there is a reference
|
||||
to a function, this is done through a <em>new node</em>, which
|
||||
means that functions with recursive definitions are <em>not</em> represented cyclically.
|
||||
Therefore, reference counting would work. However, in the future,
|
||||
our language will have more ways of creating circular definitions,
|
||||
some of which will indeed create cycles in our graphs. So, to
|
||||
prepare for this, we will avoid the use of reference counting.
|
||||
{{< /sidenote >}} In this case, when we compute the number of nodes
|
||||
that require `ones`, we will always find the number to be at least 1: `ones`
|
||||
needs `ones`, which needs `ones`, and so on. It will not be possible for
|
||||
us to free `ones`, then, by simply counting the number of references to it.
|
||||
|
||||
There's a more powerful technique than reference counting for freeing
|
||||
unused memory: __mark-and-sweep garbage collection__. This technique
|
||||
is conceptually pretty simple to grasp, yet will allow us to handle
|
||||
cycles in our graphs. Unsurprisingly, we implement this type
|
||||
of garbage collection in two stages:
|
||||
|
||||
1. __Mark__: We go through every node that is still needed by
|
||||
the runtime, and recursively mark is, its children, and so on as "to keep".
|
||||
2. __Sweep__: We go through every node we haven't yet freed, and,
|
||||
if it hasn't been marked as "to keep", we free it.
|
||||
|
||||
This also seems simple enough. There are two main things for us
|
||||
to figure out:
|
||||
|
||||
1. For __Mark__, what are the "nodes still needed by the runtime"?
|
||||
These are just the nodes on the various G-machine stacks. If
|
||||
a node is not on the stack, nor is it a child of a node
|
||||
that is on the stack, why should we keep it around?
|
||||
2. For __Sweep__, how do we keep track of all the nodes we haven't
|
||||
yet freed? In our case, the solution is a global list of allocated
|
||||
nodes, which is updated every time that a node is allocated.
|
||||
|
||||
Wait a minute, though. Inside of `unwind` in C, we only have
|
||||
a reference to the most recent stack. Our execution model allows
|
||||
for an arbitrary number of stacks: we can keep using `Eval`,
|
||||
placing the current stack on the dump, and starting a new stack
|
||||
from scratch to evaluate a node. How can we traverse these stacks
|
||||
from inside unwind? One solution could be to have each stack
|
||||
point to the "parent" stack. To find all the nodes on the
|
||||
stack, then, we'd start with the current stack, mark all the
|
||||
nodes on it as "required", then move on to the parent stack,
|
||||
rinse and repeat. This is plausible and pretty simple, but
|
||||
there's another way.
|
||||
|
||||
We clean up after ourselves.
|
||||
|
||||
### Towards a Cleaner Stack
|
||||
Simon Peyton Jones wrote his G-machine semantics in a particular way. Every time
|
||||
that a function is called, all it leaves behind on the stack is the graph node
|
||||
that represents the function's output. Our own internal functions, however, have been less
|
||||
careful. Consider, for instance, the "binary operator" function I showed you.
|
||||
Its body is given by the following G-machine instructions:
|
||||
|
||||
```C++
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||
```
|
||||
|
||||
When the function is called, there are at least 3 things on the stack:
|
||||
|
||||
1. The "outermost" application node, to be replaced with an indirection (to enable laziness).
|
||||
2. The second argument to the binary operator.
|
||||
3. The first argument to the binary operator.
|
||||
|
||||
Then, __Push__ adds another node to the stack, an __Eval__ forces
|
||||
its evaluation (and leaves it on the stack). This happens again with the second argument.
|
||||
Finally, we call __BinOp__, popping two values off the stack and combining them
|
||||
according to the binary operator. This leaves the stack with 4 things: the 3 I described
|
||||
above, and thew newly computed value. This is fine as far as `eval` is concerned: its
|
||||
implementation only asks for the top value on the stack after `unwind` finishes. But
|
||||
for anything more complicated, this is a very bad side effect. We want to leave the
|
||||
stack as clean as we found it - with one node and no garbage.
|
||||
|
||||
Fortunately, the way we compile functions is a good guide for how we should
|
||||
compile internal operators and constructors. The idea is captured
|
||||
by the two instructions we insert at the end of a user-defined
|
||||
function:
|
||||
|
||||
{{< codelines "C++" "compiler/09/definition.cpp" 56 57 >}}
|
||||
|
||||
Once a result is computed, we turn the node that represented the application
|
||||
into an indirection, and point it to the computed result (as I said before,
|
||||
this enables lazy evaluation). We also pop the arguments given to the function
|
||||
off the stack. Let's add these two things to the `gen_llvm_internal_op` function:
|
||||
|
||||
{{< codelines "C++" "compiler/09/main.cpp" 70 85 >}}
|
||||
|
||||
Notice, in particular, the `instruction_update(2)` and `instruction_pop(2)`
|
||||
instructions that were recently added. A similar thing has to be done for data
|
||||
type constructors. The difference, though, is that __Pack__ removes the data
|
||||
it packs from the stack, and thus, __Pop__ is not needed:
|
||||
|
||||
{{< codelines "C++" "compiler/09/definition.cpp" 102 117 >}}
|
||||
|
||||
With this done, let's run a quick test: let's print the number of things
|
||||
on the stack at the end of an `eval` call (before the stack is freed,
|
||||
of course). We can compare the output of runtime without the fix (`old`)
|
||||
and with the fix (`current`):
|
||||
|
||||
```
|
||||
current old
|
||||
|
||||
Current stack size is 0 | Current stack size: 1
|
||||
Current stack size is 0 | Current stack size: 1
|
||||
Current stack size is 0 | Current stack size: 1
|
||||
Current stack size is 0 | Current stack size: 1
|
||||
Current stack size is 0 | Current stack size: 0
|
||||
Current stack size is 0 | Current stack size: 0
|
||||
Current stack size is 0 | Current stack size: 3
|
||||
Current stack size is 0 | Current stack size: 0
|
||||
Current stack size is 0 | Current stack size: 3
|
||||
Current stack size is 0 | Current stack size: 0
|
||||
Current stack size is 0 | Current stack size: 3
|
||||
Result: 3 | Result: 3
|
||||
```
|
||||
|
||||
The stack is now much cleaner! Every time `eval` is called, it starts
|
||||
with one node, and ends with one node (which is then popped).
|
||||
|
||||
### One Stack to Rule Them All
|
||||
|
||||
Wait a minute. If the stack is really always empty at the end, do we really need to construct
|
||||
a new stack every time?
|
||||
{{< sidenote "right" "arity-note" "I think not" >}}
|
||||
There's some nuance to this. While it is true that for the most
|
||||
part, we can get rid of the new stacks in favor of a single
|
||||
one, our runtime will experience a change. The change lies
|
||||
in the Unwind-Global rule, which <em>requires that the
|
||||
stack has as many children as the function needs
|
||||
arguments</em>. Until now, there was no way
|
||||
for this condition to be accidentally satisfied: the function
|
||||
we were unwinding was the only thing on the stack. Now,
|
||||
though, things are different: the function being
|
||||
unwound may share a stack with something else,
|
||||
and just checking the stack size will not be sufficient.
|
||||
<em>I believe</em> that this is not a problem for us,
|
||||
since the compiler will only emit <strong>Eval</strong>
|
||||
instructions for things it knows are data types or numbers,
|
||||
meaning their type is not a partially applied function
|
||||
that is missing arguments. However, this is a nontrivial
|
||||
observation.
|
||||
{{< /sidenote >}}, and Simon Peyton Jones seems to
|
||||
agree. In _Implementing Functional Languages: a tutorial_, he mentions
|
||||
that the dump does not need to be implemented as a real stack of stacks.
|
||||
So let's try this out: instead of starting a new stack using `eval`,
|
||||
let's use an existing one, by just calling `unwind` again. To do so,
|
||||
all we have to do is change our `instruction_eval` instruction. When
|
||||
the G-machine wants something evaluated now, it should just call
|
||||
`unwind` directly!
|
||||
|
||||
To make this change, we have to make `unwind` available to the
|
||||
compiler. We thus declare it in the `llvm_context.cpp` file:
|
||||
|
||||
{{< codelines "C++" "compiler/09/llvm_context.cpp" 158 163 >}}
|
||||
|
||||
And even create a function to construct a call to `unwind`
|
||||
with the following signature:
|
||||
|
||||
{{< codelines "C++" "compiler/09/llvm_context.hpp" 58 58 >}}
|
||||
|
||||
We implement it like so:
|
||||
|
||||
{{< codelines "C++" "compiler/09/llvm_context.cpp" 217 220 >}}
|
||||
|
||||
Finally, the `instruction_eval::gen_llvm` method simply calls
|
||||
`unwind`:
|
||||
|
||||
{{< codelines "C++" "compiler/09/instruction.cpp" 157 159 >}}
|
||||
|
||||
After this change, we only call `eval` from `main`. Furthermore,
|
||||
since `eval` releases all the resources it allocates before
|
||||
returning, we won't be able to
|
||||
{{< sidenote "right" "retrieve-note" "easily retrieve" >}}
|
||||
We were able to do this before, but that's because our
|
||||
runtime didn't free the nodes, <em>ever</em>. Now that
|
||||
it does, returning a node violates that node's lifetime.
|
||||
{{< /sidenote >}}the result of the evaluation from it.
|
||||
Thus, we simply merge `eval` with `main` - combining
|
||||
the printing and the initialization / freeing
|
||||
code.
|
||||
|
||||
With this, only one stack will be allocated for the entirety of
|
||||
program execution. This doesn't just help us save on memory
|
||||
allocations, but also __solves the problem of marking
|
||||
valid nodes during garbage collection__! Instead of traversing
|
||||
a dump of stacks, we can now simply traverse a single stack;
|
||||
all that we need is in one place.
|
||||
|
||||
So this takes care, more or less, of the "mark" portion of mark-and-sweep.
|
||||
Using the stack, we can recursively mark the nodes that we need. But
|
||||
what about "sweeping"? How can we possibly know of every node that
|
||||
we've allocated? There's some more bookkeping for us to do.
|
||||
|
||||
### It's All Connected
|
||||
There exists a simple technique I've previously seen (and used)
|
||||
for keeping track of all the allocated memory. The technique is
|
||||
to __turn all the allocated nodes into elements of a linked list__.
|
||||
The general process of implementing this proceeds as follows:
|
||||
|
||||
1. To each node, add a "next" pointer.
|
||||
2. Keep a handle to the whole node chain somewhere.
|
||||
3. Add each newly allocated node to the front of the whole chain.
|
||||
|
||||
This "somewhere" could be a global variable. However,
|
||||
since we already pass a stack to almost all of our
|
||||
functions, it makes more sense to make the list handle
|
||||
a part of some data structure that will also contain the stack,
|
||||
and pass that around, instead. This keeps all of the G-machine
|
||||
data in one place, and in principle could allow for concurrent
|
||||
execution of more than one G-machine in a single program. Let's
|
||||
call our new data structure `gmachine`:
|
||||
|
||||
{{< codelines "C++" "compiler/09/runtime.h" 69 74 >}}
|
||||
|
||||
Here, the `stack` field holds the G-machine stack,
|
||||
and the `gc_nodes` is the handle to the list of all the nodes
|
||||
we've allocated and not yet freed. Don't worry about the `gc_node_count`
|
||||
and `gc_threshold` fields - we'll get to them a little later.
|
||||
|
||||
This is going to be a significant change. First of all, since
|
||||
the handle won't be global, it can't be accessed from inside the
|
||||
`alloc_*` functions. Instead, we have to make sure to add
|
||||
nodes allocated through `alloc_*` to a G-machine somewhere
|
||||
wherever we call the allocators. To make it easier to add nodes to a G-machine
|
||||
GC handle, let's make a new function, `track`:
|
||||
|
||||
```C
|
||||
struct node_base* gmachine_track(struct gmachine*, struct node_base*);
|
||||
```
|
||||
|
||||
This function will add the given node to the G-machine's handle,
|
||||
and return that same node. This way, we can wrap nodes in
|
||||
a call to `gmachine_track`. We will talk about this
|
||||
function's implementation later in the post.
|
||||
|
||||
This doesn't get us all the way to a working runtime, though:
|
||||
right now, we still pass around `struct stack*` instead of
|
||||
`struct gmachine*` everywhere. However, the whole point
|
||||
of adding the `gmachine` struct was to store more data in it!
|
||||
Surely we need that new data somewhere, and thus, we need to
|
||||
use the `gmachine` struct for _some_ functions. What functions
|
||||
_do_ need a whole `gmachine*`, and which ones only need
|
||||
a `stack*`?
|
||||
|
||||
1. {{< sidenote "right" "ownership-note" "Clearly," >}}
|
||||
This might not be clear. Maybe <em>pushing</em> onto a stack will
|
||||
add a node to our GC handle, and so, we need to have access
|
||||
to the handle in <code>stack_push</code>. The underlying
|
||||
question is that of <em>ownership</em>: when we allocate
|
||||
a node, which part of the program does it "belong" to?
|
||||
The "owner" of the node should do the work of managing
|
||||
when to free it or keep it. Since we already agreed to
|
||||
create a <code>gmachine</code> struct to house the GC
|
||||
handle, it makes sense that nodes are owned by the
|
||||
G-machine. Thus, the assumption in functions like
|
||||
<code>stack_push</code> is that the "owner" of the node
|
||||
already took care of allocating and tracking it, and
|
||||
<code>stack_push</code> itself shouldn't bother.
|
||||
{{< /sidenote >}} `stack_push`, `stack_pop`, and similar functions
|
||||
do not require a G-machine.
|
||||
2. `stack_alloc` and `stack_pack` __do__ need a G-machine,
|
||||
because they must allocate new nodes. Where the nodes
|
||||
are allocated, we should add them to the GC handle.
|
||||
3. Since they use `stack_alloc` and `stack_pack`,
|
||||
generated functions also need a G-machine.
|
||||
4. Since `unwind` calls the generated functions,
|
||||
it must also receive a G-machine.
|
||||
|
||||
As far as stack functions go, we only _need_ to update
|
||||
`stack_alloc` and `stack_pack`. Everything else
|
||||
doesn't require new node allocations, and thus,
|
||||
does not require the GC handle. However, this makes
|
||||
our code rather ugly: we have a set of mostly `stack_*`
|
||||
functions, followed suddenly by two `gmachine_*` functions.
|
||||
In the interest of cleanliness, let's instead do the following:
|
||||
|
||||
1. Make all functions associated with G-machine rules (like
|
||||
__Alloc__, __Update__, and so on) require a `gmachine*`. This
|
||||
way, theres a correspondence between our code and the theory.
|
||||
2. Leave the rest of the functions (`stack_push`, `stack_pop`,
|
||||
etc.) as is. They are not G-machine specific, and don't
|
||||
require a GC handle, so there's no need to touch them.
|
||||
|
||||
Let's make this change. We end up with the following
|
||||
functions:
|
||||
|
||||
{{< codelines "C" "compiler/09/runtime.h" 56 84 >}}
|
||||
|
||||
For the majority of the changed functions, the
|
||||
updates are
|
||||
{{< sidenote "right" "cosmetic-note" "cosmetic." >}}
|
||||
We must also update the LLVM/C++ declarations of
|
||||
the affected functions: many of them now take a
|
||||
<code>gmachine_ptr_type</code> instead of <code>stack_ptr_type</code>.
|
||||
This change is not shown explicitly here (it is hard to do with our
|
||||
growing code base), but it is nonetheless significant.
|
||||
{{< /sidenote >}} The functions
|
||||
that require more significant modifications are `gmachine_alloc`
|
||||
and `gmachine_pack`. In both, we must now make a call to `gmachine_track`
|
||||
to ensure that a newly allocated node will be garbage collected in the future.
|
||||
The updated code for `gmachine_alloc` is:
|
||||
|
||||
{{< codelines "C" "compiler/09/runtime.c" 140 145 >}}
|
||||
|
||||
Correspondingly, the updated code for `gmachine_pack` is:
|
||||
|
||||
{{< codelines "C" "compiler/09/runtime.c" 147 162 >}}
|
||||
|
||||
Note that we've secretly made one more change. Instead of
|
||||
allocating `sizeof(*data) * n` bytes of memory for
|
||||
the array of packed nodes, we allocate `sizeof(*data) * (n + 1)`,
|
||||
and set the last element to `NULL`. This will allow other
|
||||
functions (which we will soon write) to know how many elements are packed inside
|
||||
a `node_data` (effectively, we've added a `NULL` terminator).
|
||||
|
||||
We must change our compiler to keep it up to date with this change. Importantly,
|
||||
it must know that a G-machine struct exists. To give it
|
||||
this information, we add a new
|
||||
`llvm::StructType*` called `gmachine_type` to the `llvm_context` class,
|
||||
initialize it in the constructor, and set its body as follows:
|
||||
|
||||
{{< codelines "C++" "compiler/09/llvm_context.cpp" 21 26 >}}
|
||||
|
||||
The compiler must also know that generated functions now use the G-machine
|
||||
struct rather than a stack struct:
|
||||
|
||||
{{< codelines "C++" "compiler/09/llvm_context.cpp" 19 19 >}}
|
||||
|
||||
Since we still use some functions that require a stack and not a G-machine,
|
||||
we must have a way to get the stack from a G-machine. To do this,
|
||||
we create a new `unwrap` function, which uses LLVM's GEP instruction
|
||||
to get a pointer to the G-machine's stack field:
|
||||
|
||||
{{< codelines "C++" "compiler/09/llvm_context.cpp" 222 225 >}}
|
||||
|
||||
We use this function elsewhere, such `llvm_context::create_pop`:
|
||||
|
||||
{{< codelines "C++" "compiler/09/llvm_context.cpp" 176 179 >}}
|
||||
|
||||
Finally, we want to make sure our generated functions don't allocate
|
||||
nodes without tracking them with the G-machine. To do so, we modify
|
||||
all the `create_*` methods to require the G-machine function argument,
|
||||
and update the functions themselves to call `gmachine_track`. For
|
||||
example, here's `llvm_context::create_num`:
|
||||
|
||||
{{< codelines "C++" "compiler/09/llvm_context.cpp" 235 239 >}}
|
||||
|
||||
Of course, this requires us to add a new `create_track` method
|
||||
to the `llvm_context`:
|
||||
|
||||
{{< codelines "C++" "compiler/09/llvm_context.cpp" 212 215 >}}
|
||||
|
||||
This is good. Let's now implement the actual mark-and-sweep algorithm
|
||||
in `gmachine_gc`:
|
||||
|
||||
{{< codelines "C" "compiler/09/runtime.c" 186 204 >}}
|
||||
|
||||
In the code above, we first iterate through the stack,
|
||||
calling `gc_visit_node` on every node that we encounter. The
|
||||
assumption is that once `gc_visit_node` is done, every node
|
||||
that _can_ be reached has its `gc_reachable` field set to 1,
|
||||
and all the others have it set to 0.
|
||||
|
||||
Once we reach the end of the stack, we continue to the "sweep" phase,
|
||||
iterating through the linked list of nodes (held in the G-machine
|
||||
GC handle `gc_nodes`). For each node, if its `gc_reachable` flag
|
||||
is not set, we remove it from the linked list, and call `free_node_direct`
|
||||
on it. Otherwise (that is, if the flag __is__ set), we clear it,
|
||||
so that the node can potentially be garbage collected in a future
|
||||
invocation of `gmachine_gc`.
|
||||
|
||||
`gc_visit_node` recursively marks a node and its children as reachable:
|
||||
|
||||
{{< codelines "C" "compiler/09/runtime.c" 51 70 >}}
|
||||
|
||||
This is possible with the `node_data` nodes because of the change we
|
||||
made to the `gmachine_pack` instruction earlier: now, the last element
|
||||
of the "packed" array is `NULL`, telling `gc_visit_node` that it has
|
||||
reached the end of the list of children.
|
||||
|
||||
`free_node_direct` performs a non-recursive deallocation of all
|
||||
the resources held by a particular node. So far, this is only
|
||||
needed for `node_data` nodes, since the arrays holding their children
|
||||
are dynamically allocated. Thus, the code for the function is
|
||||
pretty simple:
|
||||
|
||||
{{< codelines "C" "compiler/09/runtime.c" 45 49 >}}
|
||||
|
||||
### When to Collect
|
||||
When should we run garbage collection? Initially, I tried
|
||||
running it after every call to `unwind`. However, this
|
||||
quickly proved impractical: the performance of all
|
||||
the programs in the language decreased by a spectacular
|
||||
amount. Programs like `works1.txt` and `works2.txt`
|
||||
would take tens of seconds to complete.
|
||||
|
||||
Instead of this madness, let's settle for an approach
|
||||
common to many garbage collectors. Let's __perform
|
||||
garbage collection every time the amount of
|
||||
memory we've allocated doubles__. Tracking when the
|
||||
amount of allocated memory doubles is the purpose of
|
||||
the `gc_node_count` and `gc_threshold` fields in the
|
||||
`gmachine` struct. The former field tracks how many
|
||||
nodes have been tracked by the garbage collector, and the
|
||||
latter holds the number of nodes the G-machine must
|
||||
reach before triggering garbage collection.
|
||||
|
||||
Since the G-machine is made aware of allocations
|
||||
by a call to the `gmachine_track` function, this
|
||||
is where we will attempt to perform garbage collection.
|
||||
We end up with the following code:
|
||||
|
||||
{{< codelines "C++" "compiler/09/runtime.c" 171 184 >}}
|
||||
|
||||
When a node is added to the GC handle, we increment the `gc_node_count`
|
||||
field. If the new value of this field exceeds the threshold,
|
||||
we perform garbage collection. There are cases in which
|
||||
this is fairly dangerous: for instance, `gmachine_pack` first
|
||||
moves all packed nodes into an array, then allocates a `node_data`
|
||||
node. This means that for a brief moment, the nodes stored
|
||||
into the new data node are inaccessible from the stack,
|
||||
and thus susceptible to garbage collection. To prevent
|
||||
situations like this, we run `gc_visit_node` on the node
|
||||
being tracked, marking it and its children as "reachable".
|
||||
Finally, we set the next "free" threshold to double
|
||||
the number of currently allocated nodes.
|
||||
|
||||
This is about as much as we need to do. The change in this
|
||||
post was a major one, and required updating multiple files.
|
||||
As always, you're welcome to check out [the compiler source
|
||||
code for this post](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/09).
|
||||
To wrap up, let's evaluate our change.
|
||||
|
||||
To especially stress the compiler, I came up with a prime number
|
||||
generator. Since booleans are not in the standard library, and
|
||||
since it isn't possible to pattern match on numbers, my
|
||||
only option was the use Peano encoding. This effectively
|
||||
means that numbers are represented as linked lists,
|
||||
which makes garbage collection all the more
|
||||
important. The program is quite long, but you can
|
||||
[find the entire code here](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/09/examples/primes.txt).
|
||||
|
||||
When I ran the `primes` program compiled using the
|
||||
previous version of the compiler using `time`, I
|
||||
got the following output:
|
||||
|
||||
```
|
||||
Maximum resident set size (kbytes): 935764
|
||||
Minor (reclaiming a frame) page faults: 233642
|
||||
```
|
||||
|
||||
In contrast, here is the output of `time` when running
|
||||
the same program compiled with the new version of
|
||||
the compiler:
|
||||
|
||||
```
|
||||
Maximum resident set size (kbytes): 7448
|
||||
Minor (reclaiming a frame) page faults: 1577
|
||||
```
|
||||
|
||||
We have reduced maximum memory usage by a factor of
|
||||
125, and the number of page faults by a factor of 148.
|
||||
That seems pretty good!
|
||||
|
||||
With this success, we end today's post. As I mentioned
|
||||
before, we're not done. The language is still clunky to use,
|
||||
and can benefit from `let/in` expressions and __lambda functions__.
|
||||
Furthermore, our language is currently monomorphic, and would
|
||||
be much better with __polymorphism__. Finally, to make our language
|
||||
capable of more-than-trivial work, we may want to implement
|
||||
__Input/Output__ and __strings__. I hope to see you in future posts,
|
||||
where we will implement these features!
|
Loading…
Reference in New Issue
Block a user