Start work on chapter 8 code for compilers

This commit is contained in:
Danila Fedorin 2019-11-02 17:53:15 -07:00
parent 9531f4d8e3
commit 1f6032a30e
4 changed files with 113 additions and 0 deletions

View File

@ -1,8 +1,12 @@
cmake_minimum_required(VERSION 3.1) cmake_minimum_required(VERSION 3.1)
project(compiler) project(compiler)
# Find all the required packages
find_package(BISON) find_package(BISON)
find_package(FLEX) find_package(FLEX)
find_package(LLVM REQUIRED CONFIG)
# Set up the flex and bison targets
bison_target(parser bison_target(parser
${CMAKE_CURRENT_SOURCE_DIR}/parser.y ${CMAKE_CURRENT_SOURCE_DIR}/parser.y
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp ${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
@ -12,6 +16,10 @@ flex_target(scanner
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp) ${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
add_flex_bison_dependency(scanner parser) add_flex_bison_dependency(scanner parser)
# Find all the relevant LLVM components
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
# Create compiler executable
add_executable(compiler add_executable(compiler
ast.cpp ast.hpp definition.cpp ast.cpp ast.hpp definition.cpp
type_env.cpp type_env.hpp type_env.cpp type_env.hpp
@ -24,5 +32,10 @@ add_executable(compiler
${FLEX_scanner_OUTPUTS} ${FLEX_scanner_OUTPUTS}
main.cpp main.cpp
) )
# Configure compiler executable
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories(compiler PUBLIC ${LLVM_DEFINITIONS})
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
target_link_libraries(compiler ${LLVM_LIBS})

View File

@ -0,0 +1,18 @@
#include "llvm_context.hpp"
#include <llvm/IR/DerivedTypes.h>
void llvm_state::create_types() {
stack_type = llvm::StructType::create(ctx, "stack");
tag_type = llvm::IntegerType::getInt8Ty(ctx);
struct_types["node_base"] = llvm::StructType::create(ctx, "node_base");
struct_types["node_app"] = llvm::StructType::create(ctx, "node_app");
struct_types["node_num"] = llvm::StructType::create(ctx, "node_num");
struct_types["node_global"] = llvm::StructType::create(ctx, "node_global");
struct_types["node_ind"] = llvm::StructType::create(ctx, "node_ind");
struct_types["node_data"] = llvm::StructType::create(ctx, "node_data");
node_ptr_type = llvm::PointerType::getUnqual(struct_types.at("node_base"));
}
void llvm_state::create_functions() {
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Module.h>
#include <map>
struct llvm_state {
llvm::LLVMContext ctx;
llvm::IRBuilder<> builder;
llvm::Module module;
std::map<std::string, llvm::Function*> functions;
std::map<std::string, llvm::StructType*> struct_types;
llvm::StructType* stack_type;
llvm::PointerType* node_ptr_type;
llvm::IntegerType* tag_type;
llvm_state()
: builder(ctx), module("bloglang", ctx) {
create_types();
create_functions();
}
void create_types();
void create_functions();
};

View File

@ -0,0 +1,53 @@
---
title: Compiling a Functional Language Using C++, Part 8 - LLVM
date: 2019-10-30T22:16:22-07:00
draft: true
tags: ["C and C++", "Functional Languages", "Compilers"]
---
We don't want a compiler that can only generate code for a single
platform. Our language should work on macOS, Windows, and Linux,
on x86\_64, ARM, and maybe some other architectures. We also
don't want to manually implement the compiler for each platform,
dealing with the specifics of each architecture and operating
system.
This is where LLVM comes in. LLVM (which stands for __Low Level Virtual Machine__),
is a project which presents us with a kind of generic assembly language,
an __Intermediate Representation__ (IR). It also provides tooling to compile the
IR into platform-specific instructions, as well as to apply a host of various
optimizations. We can thus translate our G-machine instructions to LLVM,
and then use LLVM to generate machine code, which gets us to our ultimate
goal of compiling our language.
We start with adding LLVM to our CMake project.
{{< codelines "CMake" "compiler/08/CMakeLists.txt" 7 7 >}}
LLVM is a huge project, and has many components. We don't need
most of them. We do need the core libraries, the x86 assembly
generator, and x86 assembly parser. I'm
not sure why we need the last one, but I ran into linking
errors without them. We find the required link targets
for these components using this CMake command:
{{< codelines "CMake" "compiler/08/CMakeLists.txt" 19 20 >}}
Finally, we add the new include directories, link targets,
and definitions to our compiler executable:
{{< codelines "CMake" "compiler/08/CMakeLists.txt" 39 41 >}}
Great, we have the infrastructure updated to work with LLVM. It's
now time to start using the LLVM API to compile our G-machine instructions
into assembly. We start with `LLVMContext`. The LLVM documentation states:
> This is an important class for using LLVM in a threaded context.
> It (opaquely) owns and manages the core "global" data of LLVM's core infrastructure, including the type and constant uniquing tables.
We will have exactly one instance of such a class in our program.
Additionally, we want an `IRBuilder`, which will help us generate IR instructions,
placing them into basic blocks (more on that in a bit). Also, we want
a `Module` object, which represents some collection of code and declarations
(perhaps like a C++ source file). Let's keep these things in our own
`llvm_state` class.