From 81ee50d0d4910cdddde92cb21bf624e88d930732 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 4 Nov 2019 18:25:54 -0800 Subject: [PATCH] Implement function and type creation, add text to blog in compiler series --- code/compiler/08/CMakeLists.txt | 1 + code/compiler/08/llvm_context.cpp | 90 +++++++++++++++++--- code/compiler/08/llvm_context.hpp | 1 + content/blog/08_compiler_llvm.md | 133 +++++++++++++++++++++++++++++- 4 files changed, 214 insertions(+), 11 deletions(-) diff --git a/code/compiler/08/CMakeLists.txt b/code/compiler/08/CMakeLists.txt index bce62c8..f31664b 100644 --- a/code/compiler/08/CMakeLists.txt +++ b/code/compiler/08/CMakeLists.txt @@ -22,6 +22,7 @@ llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen) # Create compiler executable add_executable(compiler ast.cpp ast.hpp definition.cpp + llvm_context.cpp llvm_context.hpp type_env.cpp type_env.hpp env.cpp env.hpp type.cpp type.hpp diff --git a/code/compiler/08/llvm_context.cpp b/code/compiler/08/llvm_context.cpp index a019db5..582b45c 100644 --- a/code/compiler/08/llvm_context.cpp +++ b/code/compiler/08/llvm_context.cpp @@ -1,18 +1,88 @@ #include "llvm_context.hpp" #include +using namespace llvm; + void llvm_state::create_types() { - stack_type = llvm::StructType::create(ctx, "stack"); - tag_type = llvm::IntegerType::getInt8Ty(ctx); - struct_types["node_base"] = llvm::StructType::create(ctx, "node_base"); - struct_types["node_app"] = llvm::StructType::create(ctx, "node_app"); - struct_types["node_num"] = llvm::StructType::create(ctx, "node_num"); - struct_types["node_global"] = llvm::StructType::create(ctx, "node_global"); - struct_types["node_ind"] = llvm::StructType::create(ctx, "node_ind"); - struct_types["node_data"] = llvm::StructType::create(ctx, "node_data"); - node_ptr_type = llvm::PointerType::getUnqual(struct_types.at("node_base")); + stack_type = StructType::create(ctx, "stack"); + stack_ptr_type = PointerType::getUnqual(stack_type); + tag_type = IntegerType::getInt8Ty(ctx); + struct_types["node_base"] = StructType::create(ctx, "node_base"); + struct_types["node_app"] = StructType::create(ctx, "node_app"); + struct_types["node_num"] = StructType::create(ctx, "node_num"); + struct_types["node_global"] = StructType::create(ctx, "node_global"); + struct_types["node_ind"] = StructType::create(ctx, "node_ind"); + struct_types["node_data"] = StructType::create(ctx, "node_data"); + node_ptr_type = PointerType::getUnqual(struct_types.at("node_base")); } void llvm_state::create_functions() { - + auto void_type = Type::getVoidTy(ctx); + auto sizet_type = IntegerType::getInt64Ty(ctx); + functions["stack_init"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_init", + &module + ); + functions["stack_free"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_free", + &module + ); + functions["stack_push"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_pop"] = Function::Create( + FunctionType::get(node_ptr_type, { stack_ptr_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_peek"] = Function::Create( + FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_popn"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_slide"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_update"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_alloc"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_pack"] = Function::Create( + FunctionType::get(void_type, { stack_ptr_type, sizet_type, tag_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); + functions["stack_split"] = Function::Create( + FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false), + Function::LinkageTypes::ExternalLinkage, + "stack_push", + &module + ); } diff --git a/code/compiler/08/llvm_context.hpp b/code/compiler/08/llvm_context.hpp index 3e6ab90..5863a85 100644 --- a/code/compiler/08/llvm_context.hpp +++ b/code/compiler/08/llvm_context.hpp @@ -15,6 +15,7 @@ struct llvm_state { std::map struct_types; llvm::StructType* stack_type; + llvm::PointerType* stack_ptr_type; llvm::PointerType* node_ptr_type; llvm::IntegerType* tag_type; diff --git a/content/blog/08_compiler_llvm.md b/content/blog/08_compiler_llvm.md index 5e16916..81b0550 100644 --- a/content/blog/08_compiler_llvm.md +++ b/content/blog/08_compiler_llvm.md @@ -50,4 +50,135 @@ Additionally, we want an `IRBuilder`, which will help us generate IR instruction placing them into basic blocks (more on that in a bit). Also, we want a `Module` object, which represents some collection of code and declarations (perhaps like a C++ source file). Let's keep these things in our own -`llvm_state` class. +`llvm_context` class. Here's what that looks like: + +{{< codeblock "C++" "compiler/08/llvm_context.hpp" >}} + +{{< todo >}} Consistently name context / state.{{< /todo >}} + +We include the LLVM context, builder, and module as members +of the context struct. Since the builder and the module need +the context, we initialize them in the constructor, where they +can safely reference it. + +Besides these fields, we added +a few others, namely the `functions` and `struct_types` maps, +and the various `llvm::Type` subclasses such as `stack_type`. +We did this because we want to be able to call our runtime +functions (and use our runtime structs) from LLVM. To generate +a function call from LLVM, we need to have access to an +`llvm::Function` object. We thus want to have an `llvm::Function` +object for each runtime function we want to call. We could declare +a member variable in our `llvm_context` for each runtime function, +but it's easier to leave this to be an implementation +detail, and only have a dynamically created map between runtime +function names and their corresponding `llvm::Function` objects. + +We populate the maps and other type-related variables in the +two methods, `create_functions()` and `create_types()`. To +create an `llvm::Function`, we must provide an `llvm::FunctionType`, +an `llvm::LinkageType`, the name of the function, and the module +in which the function is declared. Since we only have one +module (the one we initialized in the constructor) that's +the module we pass in. The name of the function is the same +as its name in the runtime, and the linkage type is always +external. The only remaining parameter is +the `llvm::FunctionType`, which is created using code like: + +{{< todo >}} Why external? {{< /todo >}} + +```C++ +llvm::FunctionType::get(return_type, {param_type_1, param_type_2, ...}, is_variadic) +``` + +Declaring all the functions and types in our runtime is mostly +just tedious. Here are a few lines from `create_types()`, from +which you can extrapolate the rest: + +{{< codelines "C++" "compiler/08/llvm_context.cpp" 7 11 >}} + +Similarly, here are a few lines from `create_functions()`, which +give a very good idea of the rest of that method: + +{{< codelines "C++" "compiler/08/llvm_context.cpp" 20 27 >}} + +This completes our implementation of the context. + +### LLVM IR +It's now time to look at generating actual code for each G-machine instruction. +Before we do this, we need to get a little bit of an understanding of what LLVM +IR is like. An important property of LLVM IR is that it is in __Single Static Assignment__ +(SSA) form. This means that each variable can only be assigned to once. For instance, +if we use `<-` to represent assignment, the following program is valid: + +``` +x <- 1 +y <- 2 +z <- x + y +``` + +However, the following program is __not__ valid: + +``` +x <- 1 +x <- x + 1 +``` + +But what if we __do__ want to modify a variable `x`? +We can declare another "version" of `x` every time we modify it. +For instance, if we wanted to increment `x` twice, we'd do this: + +``` +x <- 1 +x1 <- x + 1 +x2 <- x1 + 1 +``` + +In practice, LLVM's C++ API can take care of versioning variables on its own, by +auto-incrementing numbers associated with each variable we use. + +We need not get too deep into the specifics of LLVM IR's textual +representation, since we will largely be working with the C++ +API to interact with it. We do, however, need to understand one more +concept from the world of compiler design: __basic blocks__. A basic +block is a sequence of instructions that are guaranteed to be executed +one after another. This means that a basic block cannot have +an if/else, jump, or any other type of control flow anywhere +except at the end. If control flow could appear inside the basic block, +there would be opporunity for execution of some, but not all, +instructions in the block, violating the definition. Every time +we add an IR instruction in LLVM, we add it to a basic block. +Writing control flow involves creating several blocks, with each +block serving as the destination of a potential jump. We will +see this used to compile the Jump instruction. + +### Generating LLVM +Let's envision a `gen_llvm` method on the `instruction` struct. +We need access to all the other functions from our runtime, +such as `stack_init`, and functions from our program such +as `f_custom_function`. Thus, we need access to our +`llvm_context`. The current basic block is part +of the builder, which is part of the context, so that's +also taken care of. There's only one more thing that we will +need, and that's access to the `llvm::Function` that's +currently being compiled. To understand why, consider +the signature of `f_main` from the previous post: + +```C +void f_main(struct stack*); +``` + +The function takes a stack as a parameter. What if +we want to try use this stack in a method call, like +`stack_push(s, node)`? We need to have access to the +LLVM representation of the stack parameter. The easiest +way to do this is to use `llvm::Function::arg_begin()`, +which gives the first argument of the function. We thus +carry the function pointer throughout our code generation +methods. + +With these things in mind, here's the signature for `gen_llvm`: + +```C++ +virtual void gen_llvm(const llvm_context&, llvm::Function*) const; +```