From c79b5a4120e0d185a83f994971c74f8579b6969d Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Tue, 5 Nov 2019 00:42:33 -0800 Subject: [PATCH] Start writing actual compillation code in compiler series --- code/compiler/08/ast.cpp | 3 +- code/compiler/08/instruction.cpp | 67 ++++++++++++++ code/compiler/08/instruction.hpp | 21 +++++ code/compiler/08/llvm_context.cpp | 141 +++++++++++++++++++++++++++++- code/compiler/08/llvm_context.hpp | 28 +++++- content/blog/08_compiler_llvm.md | 7 +- 6 files changed, 260 insertions(+), 7 deletions(-) diff --git a/code/compiler/08/ast.cpp b/code/compiler/08/ast.cpp index 00d92a3..c04eb62 100644 --- a/code/compiler/08/ast.cpp +++ b/code/compiler/08/ast.cpp @@ -206,7 +206,8 @@ void ast_case::compile(const env_ptr& env, std::vector& into) c new_env = env_ptr(new env_var(*it, new_env)); } - branch_instructions.push_back(instruction_ptr(new instruction_split())); + branch_instructions.push_back(instruction_ptr(new instruction_split( + cpat->params.size()))); branch->expr->compile(new_env, branch_instructions); branch_instructions.push_back(instruction_ptr(new instruction_slide( cpat->params.size()))); diff --git a/code/compiler/08/instruction.cpp b/code/compiler/08/instruction.cpp index 0272e20..a845da4 100644 --- a/code/compiler/08/instruction.cpp +++ b/code/compiler/08/instruction.cpp @@ -1,4 +1,8 @@ #include "instruction.hpp" +#include "llvm_context.hpp" +#include + +using namespace llvm; static void print_indent(int n, std::ostream& to) { while(n--) to << " "; @@ -9,36 +13,66 @@ void instruction_pushint::print(int indent, std::ostream& to) const { to << "PushInt(" << value << ")" << std::endl; } +void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_push(f, ctx.create_num(ctx.create_i32(value))); +} + void instruction_pushglobal::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "PushGlobal(" << name << ")" << std::endl; } +void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const { + // TODO +} + void instruction_push::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "Push(" << offset << ")" << std::endl; } +void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset))); +} + void instruction_mkapp::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "MkApp()" << std::endl; } +void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const { + auto left = ctx.create_pop(f); + auto right = ctx.create_pop(f); + ctx.create_push(f, ctx.create_app(left, right)); +} + void instruction_update::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "Update(" << offset << ")" << std::endl; } +void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_update(f, ctx.create_size(offset)); +} + void instruction_pack::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "Pack(" << tag << ", " << size << ")" << std::endl; } +void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag)); +} + void instruction_split::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "Split()" << std::endl; } +void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_split(f, ctx.create_size(size)); +} + void instruction_jump::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "Jump(" << std::endl; @@ -52,27 +86,60 @@ void instruction_jump::print(int indent, std::ostream& to) const { to << ")" << std::endl; } +void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const { + // TODO +} + void instruction_slide::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "Slide(" << offset << ")" << std::endl; } +void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_slide(f, ctx.create_size(offset)); +} + void instruction_binop::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "BinOp(" << op_action(op) << ")" << std::endl; } +void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const { + auto left_int = ctx.unwrap_num(ctx.create_pop(f)); + auto right_int = ctx.unwrap_num(ctx.create_pop(f)); + llvm::Value* result; + switch(op) { + case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break; + case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break; + case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break; + case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break; + } + ctx.create_push(f, ctx.create_num(result)); +} + void instruction_eval::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "Eval()" << std::endl; } +void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_push(f, ctx.create_eval(ctx.create_pop(f))); +} + void instruction_alloc::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "Alloc(" << amount << ")" << std::endl; } +void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const { + ctx.create_alloc(f, ctx.create_size(amount)); +} + void instruction_unwind::print(int indent, std::ostream& to) const { print_indent(indent, to); to << "Unwind()" << std::endl; } + +void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const { + // Nothing +} diff --git a/code/compiler/08/instruction.hpp b/code/compiler/08/instruction.hpp index 879cd05..fa31e18 100644 --- a/code/compiler/08/instruction.hpp +++ b/code/compiler/08/instruction.hpp @@ -1,15 +1,18 @@ #pragma once +#include #include #include #include #include #include #include "binop.hpp" +#include "llvm_context.hpp" struct instruction { virtual ~instruction() = default; virtual void print(int indent, std::ostream& to) const = 0; + virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0; }; using instruction_ptr = std::unique_ptr; @@ -21,6 +24,7 @@ struct instruction_pushint : public instruction { : value(v) {} void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_pushglobal : public instruction { @@ -30,6 +34,7 @@ struct instruction_pushglobal : public instruction { : name(std::move(n)) {} void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_push : public instruction { @@ -39,10 +44,12 @@ struct instruction_push : public instruction { : offset(o) {} void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_mkapp : public instruction { void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_update : public instruction { @@ -52,6 +59,7 @@ struct instruction_update : public instruction { : offset(o) {} void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_pack : public instruction { @@ -62,10 +70,17 @@ struct instruction_pack : public instruction { : tag(t), size(s) {} void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_split : public instruction { + int size; + + instruction_split(int s) + : size(s) {} + void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_jump : public instruction { @@ -73,6 +88,7 @@ struct instruction_jump : public instruction { std::map tag_mappings; void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_slide : public instruction { @@ -82,6 +98,7 @@ struct instruction_slide : public instruction { : offset(o) {} void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_binop : public instruction { @@ -91,10 +108,12 @@ struct instruction_binop : public instruction { : op(o) {} void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_eval : public instruction { void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_alloc : public instruction { @@ -104,8 +123,10 @@ struct instruction_alloc : public instruction { : amount(a) {} void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; struct instruction_unwind : public instruction { void print(int indent, std::ostream& to) const; + void gen_llvm(llvm_context& ctx, llvm::Function* f) const; }; diff --git a/code/compiler/08/llvm_context.cpp b/code/compiler/08/llvm_context.cpp index 582b45c..5aadeef 100644 --- a/code/compiler/08/llvm_context.cpp +++ b/code/compiler/08/llvm_context.cpp @@ -3,7 +3,7 @@ using namespace llvm; -void llvm_state::create_types() { +void llvm_context::create_types() { stack_type = StructType::create(ctx, "stack"); stack_ptr_type = PointerType::getUnqual(stack_type); tag_type = IntegerType::getInt8Ty(ctx); @@ -14,11 +14,38 @@ void llvm_state::create_types() { struct_types["node_ind"] = StructType::create(ctx, "node_ind"); struct_types["node_data"] = StructType::create(ctx, "node_data"); node_ptr_type = PointerType::getUnqual(struct_types.at("node_base")); + function_type = FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false); + + struct_types.at("node_base")->setBody( + IntegerType::getInt32Ty(ctx) + ); + struct_types.at("node_app")->setBody( + struct_types.at("node_base"), + node_ptr_type, + node_ptr_type + ); + struct_types.at("node_num")->setBody( + struct_types.at("node_base"), + IntegerType::getInt32Ty(ctx) + ); + struct_types.at("node_global")->setBody( + struct_types.at("node_base"), + FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false) + ); + struct_types.at("node_ind")->setBody( + struct_types.at("node_base"), + node_ptr_type + ); + struct_types.at("node_data")->setBody( + struct_types.at("node_base"), + IntegerType::getInt8Ty(ctx), + PointerType::getUnqual(node_ptr_type) + ); } -void llvm_state::create_functions() { +void llvm_context::create_functions() { auto void_type = Type::getVoidTy(ctx); - auto sizet_type = IntegerType::getInt64Ty(ctx); + auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8); functions["stack_init"] = Function::Create( FunctionType::get(void_type, { stack_ptr_type }, false), Function::LinkageTypes::ExternalLinkage, @@ -85,4 +112,112 @@ void llvm_state::create_functions() { "stack_push", &module ); + + auto int32_type = IntegerType::getInt32Ty(ctx); + functions["alloc_app"] = Function::Create( + FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_app", + &module + ); + functions["alloc_num"] = Function::Create( + FunctionType::get(node_ptr_type, { int32_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_num", + &module + ); + functions["alloc_global"] = Function::Create( + FunctionType::get(node_ptr_type, { function_type, int32_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_global", + &module + ); + functions["alloc_ind"] = Function::Create( + FunctionType::get(node_ptr_type, { node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "alloc_ind", + &module + ); + + functions["eval"] = Function::Create( + FunctionType::get(node_ptr_type, { node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "eval", + &module + ); +} + +Value* llvm_context::create_i8(int8_t i) { + return ConstantInt::get(ctx, APInt(8, i)); +} +Value* llvm_context::create_i32(int32_t i) { + return ConstantInt::get(ctx, APInt(32, i)); +} +Value* llvm_context::create_size(size_t i) { + return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i)); +} + +Value* llvm_context::create_pop(Function* f) { + auto pop_f = functions.at("stack_pop"); + return builder.CreateCall(pop_f, { f->arg_begin() }); +} +Value* llvm_context::create_peek(Function* f, Value* off) { + auto peek_f = functions.at("stack_peek"); + return builder.CreateCall(peek_f, { f->arg_begin(), off }); +} +void llvm_context::create_push(Function* f, Value* v) { + auto push_f = functions.at("stack_push"); + builder.CreateCall(push_f, { f->arg_begin(), v }); +} +void llvm_context::create_popn(Function* f, Value* off) { + auto popn_f = functions.at("stack_popn"); + builder.CreateCall(popn_f, { f->arg_begin(), off }); +} +void llvm_context::create_update(Function* f, Value* off) { + auto update_f = functions.at("stack_update"); + builder.CreateCall(update_f, { f->arg_begin(), off }); +} +void llvm_context::create_pack(Function* f, Value* c, Value* t) { + auto pack_f = functions.at("stack_pack"); + builder.CreateCall(pack_f, { f->arg_begin(), c, t }); +} +void llvm_context::create_split(Function* f, Value* c) { + auto split_f = functions.at("stack_split"); + builder.CreateCall(split_f, { f->arg_begin(), c }); +} +void llvm_context::create_slide(Function* f, Value* off) { + auto slide_f = functions.at("stack_slide"); + builder.CreateCall(slide_f, { f->arg_begin(), off }); +} +void llvm_context::create_alloc(Function* f, Value* n) { + auto alloc_f = functions.at("stack_alloc"); + builder.CreateCall(alloc_f, { f->arg_begin(), n }); +} + +Value* llvm_context::create_eval(Value* e) { + auto eval_f = functions.at("eval"); + return builder.CreateCall(eval_f, { e }); +} + +Value* llvm_context::unwrap_num(Value* v) { + auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num")); + auto cast = builder.CreatePointerCast(v, num_ptr_type); + auto offset_0 = create_size(0); + auto offset_1 = create_size(1); + auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 }); + return builder.CreateLoad(int_ptr); +} +Value* llvm_context::create_num(Value* v) { + auto alloc_num_f = functions.at("alloc_num"); + return builder.CreateCall(alloc_num_f, { v }); +} + +Value* llvm_context::create_global(Value* f, Value* a) { + auto alloc_global_f = functions.at("alloc_global"); + return builder.CreateCall(alloc_global_f, { f, a }); +} + +Value* llvm_context::create_app(Value* l, Value* r) { + auto alloc_app_f = functions.at("alloc_app"); + return builder.CreateCall(alloc_app_f, { l, r }); } diff --git a/code/compiler/08/llvm_context.hpp b/code/compiler/08/llvm_context.hpp index 5863a85..5067ba1 100644 --- a/code/compiler/08/llvm_context.hpp +++ b/code/compiler/08/llvm_context.hpp @@ -6,7 +6,7 @@ #include #include -struct llvm_state { +struct llvm_context { llvm::LLVMContext ctx; llvm::IRBuilder<> builder; llvm::Module module; @@ -18,8 +18,9 @@ struct llvm_state { llvm::PointerType* stack_ptr_type; llvm::PointerType* node_ptr_type; llvm::IntegerType* tag_type; + llvm::FunctionType* function_type; - llvm_state() + llvm_context() : builder(ctx), module("bloglang", ctx) { create_types(); create_functions(); @@ -27,4 +28,27 @@ struct llvm_state { void create_types(); void create_functions(); + + llvm::Value* create_i8(int8_t); + llvm::Value* create_i32(int32_t); + llvm::Value* create_size(size_t); + + llvm::Value* create_pop(llvm::Function*); + llvm::Value* create_peek(llvm::Function*, llvm::Value*); + void create_push(llvm::Function*, llvm::Value*); + void create_popn(llvm::Function*, llvm::Value*); + void create_update(llvm::Function*, llvm::Value*); + void create_pack(llvm::Function*, llvm::Value*, llvm::Value*); + void create_split(llvm::Function*, llvm::Value*); + void create_slide(llvm::Function*, llvm::Value*); + void create_alloc(llvm::Function*, llvm::Value*); + + llvm::Value* create_eval(llvm::Value*); + + llvm::Value* unwrap_num(llvm::Value*); + llvm::Value* create_num(llvm::Value*); + + llvm::Value* create_global(llvm::Value*, llvm::Value*); + + llvm::Value* create_app(llvm::Value*, llvm::Value*); }; diff --git a/content/blog/08_compiler_llvm.md b/content/blog/08_compiler_llvm.md index 81b0550..c0380e1 100644 --- a/content/blog/08_compiler_llvm.md +++ b/content/blog/08_compiler_llvm.md @@ -55,6 +55,7 @@ a `Module` object, which represents some collection of code and declarations {{< codeblock "C++" "compiler/08/llvm_context.hpp" >}} {{< todo >}} Consistently name context / state.{{< /todo >}} +{{< todo >}} Explain creation functions. {{< /todo >}} We include the LLVM context, builder, and module as members of the context struct. Since the builder and the module need @@ -97,6 +98,8 @@ which you can extrapolate the rest: {{< codelines "C++" "compiler/08/llvm_context.cpp" 7 11 >}} +{{< todo >}} Also show struct body setters. {{< /todo >}} + Similarly, here are a few lines from `create_functions()`, which give a very good idea of the rest of that method: @@ -180,5 +183,7 @@ methods. With these things in mind, here's the signature for `gen_llvm`: ```C++ -virtual void gen_llvm(const llvm_context&, llvm::Function*) const; +virtual void gen_llvm(llvm_context&, llvm::Function*) const; ``` + +{{< todo >}} Fix pointer type inconsistencies. {{< /todo >}}