Implement function and type creation, add text to blog in compiler series
This commit is contained in:
parent
43b140285f
commit
81ee50d0d4
|
@ -22,6 +22,7 @@ llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||||
# Create compiler executable
|
# Create compiler executable
|
||||||
add_executable(compiler
|
add_executable(compiler
|
||||||
ast.cpp ast.hpp definition.cpp
|
ast.cpp ast.hpp definition.cpp
|
||||||
|
llvm_context.cpp llvm_context.hpp
|
||||||
type_env.cpp type_env.hpp
|
type_env.cpp type_env.hpp
|
||||||
env.cpp env.hpp
|
env.cpp env.hpp
|
||||||
type.cpp type.hpp
|
type.cpp type.hpp
|
||||||
|
|
|
@ -1,18 +1,88 @@
|
||||||
#include "llvm_context.hpp"
|
#include "llvm_context.hpp"
|
||||||
#include <llvm/IR/DerivedTypes.h>
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
void llvm_state::create_types() {
|
void llvm_state::create_types() {
|
||||||
stack_type = llvm::StructType::create(ctx, "stack");
|
stack_type = StructType::create(ctx, "stack");
|
||||||
tag_type = llvm::IntegerType::getInt8Ty(ctx);
|
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||||
struct_types["node_base"] = llvm::StructType::create(ctx, "node_base");
|
tag_type = IntegerType::getInt8Ty(ctx);
|
||||||
struct_types["node_app"] = llvm::StructType::create(ctx, "node_app");
|
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||||
struct_types["node_num"] = llvm::StructType::create(ctx, "node_num");
|
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||||
struct_types["node_global"] = llvm::StructType::create(ctx, "node_global");
|
struct_types["node_num"] = StructType::create(ctx, "node_num");
|
||||||
struct_types["node_ind"] = llvm::StructType::create(ctx, "node_ind");
|
struct_types["node_global"] = StructType::create(ctx, "node_global");
|
||||||
struct_types["node_data"] = llvm::StructType::create(ctx, "node_data");
|
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||||
node_ptr_type = llvm::PointerType::getUnqual(struct_types.at("node_base"));
|
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||||
|
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||||
}
|
}
|
||||||
|
|
||||||
void llvm_state::create_functions() {
|
void llvm_state::create_functions() {
|
||||||
|
auto void_type = Type::getVoidTy(ctx);
|
||||||
|
auto sizet_type = IntegerType::getInt64Ty(ctx);
|
||||||
|
functions["stack_init"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_init",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_free"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_free",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_push"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_pop"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_peek"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_popn"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_slide"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_update"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_alloc"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_pack"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type, tag_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_split"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,7 @@ struct llvm_state {
|
||||||
std::map<std::string, llvm::StructType*> struct_types;
|
std::map<std::string, llvm::StructType*> struct_types;
|
||||||
|
|
||||||
llvm::StructType* stack_type;
|
llvm::StructType* stack_type;
|
||||||
|
llvm::PointerType* stack_ptr_type;
|
||||||
llvm::PointerType* node_ptr_type;
|
llvm::PointerType* node_ptr_type;
|
||||||
llvm::IntegerType* tag_type;
|
llvm::IntegerType* tag_type;
|
||||||
|
|
||||||
|
|
|
@ -50,4 +50,135 @@ Additionally, we want an `IRBuilder`, which will help us generate IR instruction
|
||||||
placing them into basic blocks (more on that in a bit). Also, we want
|
placing them into basic blocks (more on that in a bit). Also, we want
|
||||||
a `Module` object, which represents some collection of code and declarations
|
a `Module` object, which represents some collection of code and declarations
|
||||||
(perhaps like a C++ source file). Let's keep these things in our own
|
(perhaps like a C++ source file). Let's keep these things in our own
|
||||||
`llvm_state` class.
|
`llvm_context` class. Here's what that looks like:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/08/llvm_context.hpp" >}}
|
||||||
|
|
||||||
|
{{< todo >}} Consistently name context / state.{{< /todo >}}
|
||||||
|
|
||||||
|
We include the LLVM context, builder, and module as members
|
||||||
|
of the context struct. Since the builder and the module need
|
||||||
|
the context, we initialize them in the constructor, where they
|
||||||
|
can safely reference it.
|
||||||
|
|
||||||
|
Besides these fields, we added
|
||||||
|
a few others, namely the `functions` and `struct_types` maps,
|
||||||
|
and the various `llvm::Type` subclasses such as `stack_type`.
|
||||||
|
We did this because we want to be able to call our runtime
|
||||||
|
functions (and use our runtime structs) from LLVM. To generate
|
||||||
|
a function call from LLVM, we need to have access to an
|
||||||
|
`llvm::Function` object. We thus want to have an `llvm::Function`
|
||||||
|
object for each runtime function we want to call. We could declare
|
||||||
|
a member variable in our `llvm_context` for each runtime function,
|
||||||
|
but it's easier to leave this to be an implementation
|
||||||
|
detail, and only have a dynamically created map between runtime
|
||||||
|
function names and their corresponding `llvm::Function` objects.
|
||||||
|
|
||||||
|
We populate the maps and other type-related variables in the
|
||||||
|
two methods, `create_functions()` and `create_types()`. To
|
||||||
|
create an `llvm::Function`, we must provide an `llvm::FunctionType`,
|
||||||
|
an `llvm::LinkageType`, the name of the function, and the module
|
||||||
|
in which the function is declared. Since we only have one
|
||||||
|
module (the one we initialized in the constructor) that's
|
||||||
|
the module we pass in. The name of the function is the same
|
||||||
|
as its name in the runtime, and the linkage type is always
|
||||||
|
external. The only remaining parameter is
|
||||||
|
the `llvm::FunctionType`, which is created using code like:
|
||||||
|
|
||||||
|
{{< todo >}} Why external? {{< /todo >}}
|
||||||
|
|
||||||
|
```C++
|
||||||
|
llvm::FunctionType::get(return_type, {param_type_1, param_type_2, ...}, is_variadic)
|
||||||
|
```
|
||||||
|
|
||||||
|
Declaring all the functions and types in our runtime is mostly
|
||||||
|
just tedious. Here are a few lines from `create_types()`, from
|
||||||
|
which you can extrapolate the rest:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/llvm_context.cpp" 7 11 >}}
|
||||||
|
|
||||||
|
Similarly, here are a few lines from `create_functions()`, which
|
||||||
|
give a very good idea of the rest of that method:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/08/llvm_context.cpp" 20 27 >}}
|
||||||
|
|
||||||
|
This completes our implementation of the context.
|
||||||
|
|
||||||
|
### LLVM IR
|
||||||
|
It's now time to look at generating actual code for each G-machine instruction.
|
||||||
|
Before we do this, we need to get a little bit of an understanding of what LLVM
|
||||||
|
IR is like. An important property of LLVM IR is that it is in __Single Static Assignment__
|
||||||
|
(SSA) form. This means that each variable can only be assigned to once. For instance,
|
||||||
|
if we use `<-` to represent assignment, the following program is valid:
|
||||||
|
|
||||||
|
```
|
||||||
|
x <- 1
|
||||||
|
y <- 2
|
||||||
|
z <- x + y
|
||||||
|
```
|
||||||
|
|
||||||
|
However, the following program is __not__ valid:
|
||||||
|
|
||||||
|
```
|
||||||
|
x <- 1
|
||||||
|
x <- x + 1
|
||||||
|
```
|
||||||
|
|
||||||
|
But what if we __do__ want to modify a variable `x`?
|
||||||
|
We can declare another "version" of `x` every time we modify it.
|
||||||
|
For instance, if we wanted to increment `x` twice, we'd do this:
|
||||||
|
|
||||||
|
```
|
||||||
|
x <- 1
|
||||||
|
x1 <- x + 1
|
||||||
|
x2 <- x1 + 1
|
||||||
|
```
|
||||||
|
|
||||||
|
In practice, LLVM's C++ API can take care of versioning variables on its own, by
|
||||||
|
auto-incrementing numbers associated with each variable we use.
|
||||||
|
|
||||||
|
We need not get too deep into the specifics of LLVM IR's textual
|
||||||
|
representation, since we will largely be working with the C++
|
||||||
|
API to interact with it. We do, however, need to understand one more
|
||||||
|
concept from the world of compiler design: __basic blocks__. A basic
|
||||||
|
block is a sequence of instructions that are guaranteed to be executed
|
||||||
|
one after another. This means that a basic block cannot have
|
||||||
|
an if/else, jump, or any other type of control flow anywhere
|
||||||
|
except at the end. If control flow could appear inside the basic block,
|
||||||
|
there would be opporunity for execution of some, but not all,
|
||||||
|
instructions in the block, violating the definition. Every time
|
||||||
|
we add an IR instruction in LLVM, we add it to a basic block.
|
||||||
|
Writing control flow involves creating several blocks, with each
|
||||||
|
block serving as the destination of a potential jump. We will
|
||||||
|
see this used to compile the Jump instruction.
|
||||||
|
|
||||||
|
### Generating LLVM
|
||||||
|
Let's envision a `gen_llvm` method on the `instruction` struct.
|
||||||
|
We need access to all the other functions from our runtime,
|
||||||
|
such as `stack_init`, and functions from our program such
|
||||||
|
as `f_custom_function`. Thus, we need access to our
|
||||||
|
`llvm_context`. The current basic block is part
|
||||||
|
of the builder, which is part of the context, so that's
|
||||||
|
also taken care of. There's only one more thing that we will
|
||||||
|
need, and that's access to the `llvm::Function` that's
|
||||||
|
currently being compiled. To understand why, consider
|
||||||
|
the signature of `f_main` from the previous post:
|
||||||
|
|
||||||
|
```C
|
||||||
|
void f_main(struct stack*);
|
||||||
|
```
|
||||||
|
|
||||||
|
The function takes a stack as a parameter. What if
|
||||||
|
we want to try use this stack in a method call, like
|
||||||
|
`stack_push(s, node)`? We need to have access to the
|
||||||
|
LLVM representation of the stack parameter. The easiest
|
||||||
|
way to do this is to use `llvm::Function::arg_begin()`,
|
||||||
|
which gives the first argument of the function. We thus
|
||||||
|
carry the function pointer throughout our code generation
|
||||||
|
methods.
|
||||||
|
|
||||||
|
With these things in mind, here's the signature for `gen_llvm`:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
virtual void gen_llvm(const llvm_context&, llvm::Function*) const;
|
||||||
|
```
|
||||||
|
|
Loading…
Reference in New Issue
Block a user