Implement function and type creation, add text to blog in compiler series
This commit is contained in:
parent
43b140285f
commit
81ee50d0d4
@ -22,6 +22,7 @@ llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||
# Create compiler executable
|
||||
add_executable(compiler
|
||||
ast.cpp ast.hpp definition.cpp
|
||||
llvm_context.cpp llvm_context.hpp
|
||||
type_env.cpp type_env.hpp
|
||||
env.cpp env.hpp
|
||||
type.cpp type.hpp
|
||||
|
@ -1,18 +1,88 @@
|
||||
#include "llvm_context.hpp"
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void llvm_state::create_types() {
|
||||
stack_type = llvm::StructType::create(ctx, "stack");
|
||||
tag_type = llvm::IntegerType::getInt8Ty(ctx);
|
||||
struct_types["node_base"] = llvm::StructType::create(ctx, "node_base");
|
||||
struct_types["node_app"] = llvm::StructType::create(ctx, "node_app");
|
||||
struct_types["node_num"] = llvm::StructType::create(ctx, "node_num");
|
||||
struct_types["node_global"] = llvm::StructType::create(ctx, "node_global");
|
||||
struct_types["node_ind"] = llvm::StructType::create(ctx, "node_ind");
|
||||
struct_types["node_data"] = llvm::StructType::create(ctx, "node_data");
|
||||
node_ptr_type = llvm::PointerType::getUnqual(struct_types.at("node_base"));
|
||||
stack_type = StructType::create(ctx, "stack");
|
||||
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||
tag_type = IntegerType::getInt8Ty(ctx);
|
||||
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||
struct_types["node_num"] = StructType::create(ctx, "node_num");
|
||||
struct_types["node_global"] = StructType::create(ctx, "node_global");
|
||||
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||
}
|
||||
|
||||
void llvm_state::create_functions() {
|
||||
|
||||
auto void_type = Type::getVoidTy(ctx);
|
||||
auto sizet_type = IntegerType::getInt64Ty(ctx);
|
||||
functions["stack_init"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_init",
|
||||
&module
|
||||
);
|
||||
functions["stack_free"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_free",
|
||||
&module
|
||||
);
|
||||
functions["stack_push"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_pop"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_peek"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_popn"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_slide"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_update"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_alloc"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_pack"] = Function::Create(
|
||||
FunctionType::get(void_type, { stack_ptr_type, sizet_type, tag_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
functions["stack_split"] = Function::Create(
|
||||
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||
Function::LinkageTypes::ExternalLinkage,
|
||||
"stack_push",
|
||||
&module
|
||||
);
|
||||
}
|
||||
|
@ -15,6 +15,7 @@ struct llvm_state {
|
||||
std::map<std::string, llvm::StructType*> struct_types;
|
||||
|
||||
llvm::StructType* stack_type;
|
||||
llvm::PointerType* stack_ptr_type;
|
||||
llvm::PointerType* node_ptr_type;
|
||||
llvm::IntegerType* tag_type;
|
||||
|
||||
|
@ -50,4 +50,135 @@ Additionally, we want an `IRBuilder`, which will help us generate IR instruction
|
||||
placing them into basic blocks (more on that in a bit). Also, we want
|
||||
a `Module` object, which represents some collection of code and declarations
|
||||
(perhaps like a C++ source file). Let's keep these things in our own
|
||||
`llvm_state` class.
|
||||
`llvm_context` class. Here's what that looks like:
|
||||
|
||||
{{< codeblock "C++" "compiler/08/llvm_context.hpp" >}}
|
||||
|
||||
{{< todo >}} Consistently name context / state.{{< /todo >}}
|
||||
|
||||
We include the LLVM context, builder, and module as members
|
||||
of the context struct. Since the builder and the module need
|
||||
the context, we initialize them in the constructor, where they
|
||||
can safely reference it.
|
||||
|
||||
Besides these fields, we added
|
||||
a few others, namely the `functions` and `struct_types` maps,
|
||||
and the various `llvm::Type` subclasses such as `stack_type`.
|
||||
We did this because we want to be able to call our runtime
|
||||
functions (and use our runtime structs) from LLVM. To generate
|
||||
a function call from LLVM, we need to have access to an
|
||||
`llvm::Function` object. We thus want to have an `llvm::Function`
|
||||
object for each runtime function we want to call. We could declare
|
||||
a member variable in our `llvm_context` for each runtime function,
|
||||
but it's easier to leave this to be an implementation
|
||||
detail, and only have a dynamically created map between runtime
|
||||
function names and their corresponding `llvm::Function` objects.
|
||||
|
||||
We populate the maps and other type-related variables in the
|
||||
two methods, `create_functions()` and `create_types()`. To
|
||||
create an `llvm::Function`, we must provide an `llvm::FunctionType`,
|
||||
an `llvm::LinkageType`, the name of the function, and the module
|
||||
in which the function is declared. Since we only have one
|
||||
module (the one we initialized in the constructor) that's
|
||||
the module we pass in. The name of the function is the same
|
||||
as its name in the runtime, and the linkage type is always
|
||||
external. The only remaining parameter is
|
||||
the `llvm::FunctionType`, which is created using code like:
|
||||
|
||||
{{< todo >}} Why external? {{< /todo >}}
|
||||
|
||||
```C++
|
||||
llvm::FunctionType::get(return_type, {param_type_1, param_type_2, ...}, is_variadic)
|
||||
```
|
||||
|
||||
Declaring all the functions and types in our runtime is mostly
|
||||
just tedious. Here are a few lines from `create_types()`, from
|
||||
which you can extrapolate the rest:
|
||||
|
||||
{{< codelines "C++" "compiler/08/llvm_context.cpp" 7 11 >}}
|
||||
|
||||
Similarly, here are a few lines from `create_functions()`, which
|
||||
give a very good idea of the rest of that method:
|
||||
|
||||
{{< codelines "C++" "compiler/08/llvm_context.cpp" 20 27 >}}
|
||||
|
||||
This completes our implementation of the context.
|
||||
|
||||
### LLVM IR
|
||||
It's now time to look at generating actual code for each G-machine instruction.
|
||||
Before we do this, we need to get a little bit of an understanding of what LLVM
|
||||
IR is like. An important property of LLVM IR is that it is in __Single Static Assignment__
|
||||
(SSA) form. This means that each variable can only be assigned to once. For instance,
|
||||
if we use `<-` to represent assignment, the following program is valid:
|
||||
|
||||
```
|
||||
x <- 1
|
||||
y <- 2
|
||||
z <- x + y
|
||||
```
|
||||
|
||||
However, the following program is __not__ valid:
|
||||
|
||||
```
|
||||
x <- 1
|
||||
x <- x + 1
|
||||
```
|
||||
|
||||
But what if we __do__ want to modify a variable `x`?
|
||||
We can declare another "version" of `x` every time we modify it.
|
||||
For instance, if we wanted to increment `x` twice, we'd do this:
|
||||
|
||||
```
|
||||
x <- 1
|
||||
x1 <- x + 1
|
||||
x2 <- x1 + 1
|
||||
```
|
||||
|
||||
In practice, LLVM's C++ API can take care of versioning variables on its own, by
|
||||
auto-incrementing numbers associated with each variable we use.
|
||||
|
||||
We need not get too deep into the specifics of LLVM IR's textual
|
||||
representation, since we will largely be working with the C++
|
||||
API to interact with it. We do, however, need to understand one more
|
||||
concept from the world of compiler design: __basic blocks__. A basic
|
||||
block is a sequence of instructions that are guaranteed to be executed
|
||||
one after another. This means that a basic block cannot have
|
||||
an if/else, jump, or any other type of control flow anywhere
|
||||
except at the end. If control flow could appear inside the basic block,
|
||||
there would be opporunity for execution of some, but not all,
|
||||
instructions in the block, violating the definition. Every time
|
||||
we add an IR instruction in LLVM, we add it to a basic block.
|
||||
Writing control flow involves creating several blocks, with each
|
||||
block serving as the destination of a potential jump. We will
|
||||
see this used to compile the Jump instruction.
|
||||
|
||||
### Generating LLVM
|
||||
Let's envision a `gen_llvm` method on the `instruction` struct.
|
||||
We need access to all the other functions from our runtime,
|
||||
such as `stack_init`, and functions from our program such
|
||||
as `f_custom_function`. Thus, we need access to our
|
||||
`llvm_context`. The current basic block is part
|
||||
of the builder, which is part of the context, so that's
|
||||
also taken care of. There's only one more thing that we will
|
||||
need, and that's access to the `llvm::Function` that's
|
||||
currently being compiled. To understand why, consider
|
||||
the signature of `f_main` from the previous post:
|
||||
|
||||
```C
|
||||
void f_main(struct stack*);
|
||||
```
|
||||
|
||||
The function takes a stack as a parameter. What if
|
||||
we want to try use this stack in a method call, like
|
||||
`stack_push(s, node)`? We need to have access to the
|
||||
LLVM representation of the stack parameter. The easiest
|
||||
way to do this is to use `llvm::Function::arg_begin()`,
|
||||
which gives the first argument of the function. We thus
|
||||
carry the function pointer throughout our code generation
|
||||
methods.
|
||||
|
||||
With these things in mind, here's the signature for `gen_llvm`:
|
||||
|
||||
```C++
|
||||
virtual void gen_llvm(const llvm_context&, llvm::Function*) const;
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user