diff --git a/code/compiler/08/CMakeLists.txt b/code/compiler/08/CMakeLists.txt index 5e5dab4..bce62c8 100644 --- a/code/compiler/08/CMakeLists.txt +++ b/code/compiler/08/CMakeLists.txt @@ -1,8 +1,12 @@ cmake_minimum_required(VERSION 3.1) project(compiler) +# Find all the required packages find_package(BISON) find_package(FLEX) +find_package(LLVM REQUIRED CONFIG) + +# Set up the flex and bison targets bison_target(parser ${CMAKE_CURRENT_SOURCE_DIR}/parser.y ${CMAKE_CURRENT_BINARY_DIR}/parser.cpp @@ -12,6 +16,10 @@ flex_target(scanner ${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp) add_flex_bison_dependency(scanner parser) +# Find all the relevant LLVM components +llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen) + +# Create compiler executable add_executable(compiler ast.cpp ast.hpp definition.cpp type_env.cpp type_env.hpp @@ -24,5 +32,10 @@ add_executable(compiler ${FLEX_scanner_OUTPUTS} main.cpp ) + +# Configure compiler executable target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(compiler PUBLIC ${LLVM_DEFINITIONS}) +target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS}) +target_link_libraries(compiler ${LLVM_LIBS}) diff --git a/code/compiler/08/llvm_context.cpp b/code/compiler/08/llvm_context.cpp new file mode 100644 index 0000000..a019db5 --- /dev/null +++ b/code/compiler/08/llvm_context.cpp @@ -0,0 +1,18 @@ +#include "llvm_context.hpp" +#include + +void llvm_state::create_types() { + stack_type = llvm::StructType::create(ctx, "stack"); + tag_type = llvm::IntegerType::getInt8Ty(ctx); + struct_types["node_base"] = llvm::StructType::create(ctx, "node_base"); + struct_types["node_app"] = llvm::StructType::create(ctx, "node_app"); + struct_types["node_num"] = llvm::StructType::create(ctx, "node_num"); + struct_types["node_global"] = llvm::StructType::create(ctx, "node_global"); + struct_types["node_ind"] = llvm::StructType::create(ctx, "node_ind"); + struct_types["node_data"] = llvm::StructType::create(ctx, "node_data"); + node_ptr_type = llvm::PointerType::getUnqual(struct_types.at("node_base")); +} + +void llvm_state::create_functions() { + +} diff --git a/code/compiler/08/llvm_context.hpp b/code/compiler/08/llvm_context.hpp new file mode 100644 index 0000000..3e6ab90 --- /dev/null +++ b/code/compiler/08/llvm_context.hpp @@ -0,0 +1,29 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +struct llvm_state { + llvm::LLVMContext ctx; + llvm::IRBuilder<> builder; + llvm::Module module; + + std::map functions; + std::map struct_types; + + llvm::StructType* stack_type; + llvm::PointerType* node_ptr_type; + llvm::IntegerType* tag_type; + + llvm_state() + : builder(ctx), module("bloglang", ctx) { + create_types(); + create_functions(); + } + + void create_types(); + void create_functions(); +}; diff --git a/content/blog/08_compiler_llvm.md b/content/blog/08_compiler_llvm.md new file mode 100644 index 0000000..5e16916 --- /dev/null +++ b/content/blog/08_compiler_llvm.md @@ -0,0 +1,53 @@ +--- +title: Compiling a Functional Language Using C++, Part 8 - LLVM +date: 2019-10-30T22:16:22-07:00 +draft: true +tags: ["C and C++", "Functional Languages", "Compilers"] +--- + +We don't want a compiler that can only generate code for a single +platform. Our language should work on macOS, Windows, and Linux, +on x86\_64, ARM, and maybe some other architectures. We also +don't want to manually implement the compiler for each platform, +dealing with the specifics of each architecture and operating +system. + +This is where LLVM comes in. LLVM (which stands for __Low Level Virtual Machine__), +is a project which presents us with a kind of generic assembly language, +an __Intermediate Representation__ (IR). It also provides tooling to compile the +IR into platform-specific instructions, as well as to apply a host of various +optimizations. We can thus translate our G-machine instructions to LLVM, +and then use LLVM to generate machine code, which gets us to our ultimate +goal of compiling our language. + +We start with adding LLVM to our CMake project. +{{< codelines "CMake" "compiler/08/CMakeLists.txt" 7 7 >}} + +LLVM is a huge project, and has many components. We don't need +most of them. We do need the core libraries, the x86 assembly +generator, and x86 assembly parser. I'm +not sure why we need the last one, but I ran into linking +errors without them. We find the required link targets +for these components using this CMake command: + +{{< codelines "CMake" "compiler/08/CMakeLists.txt" 19 20 >}} + +Finally, we add the new include directories, link targets, +and definitions to our compiler executable: + +{{< codelines "CMake" "compiler/08/CMakeLists.txt" 39 41 >}} + +Great, we have the infrastructure updated to work with LLVM. It's +now time to start using the LLVM API to compile our G-machine instructions +into assembly. We start with `LLVMContext`. The LLVM documentation states: + +> This is an important class for using LLVM in a threaded context. +> It (opaquely) owns and manages the core "global" data of LLVM's core infrastructure, including the type and constant uniquing tables. + +We will have exactly one instance of such a class in our program. + +Additionally, we want an `IRBuilder`, which will help us generate IR instructions, +placing them into basic blocks (more on that in a bit). Also, we want +a `Module` object, which represents some collection of code and declarations +(perhaps like a C++ source file). Let's keep these things in our own +`llvm_state` class.