Compare commits
109 Commits
d7846e0b32
...
colors
| Author | SHA1 | Date | |
|---|---|---|---|
| 49469bdf12 | |||
| 020417e971 | |||
| eff0de5330 | |||
| b219f6855e | |||
| 65215ccdd6 | |||
| 3e9f6a14f2 | |||
| 7623787b1c | |||
| e15daa8f6d | |||
| 298cf6599c | |||
| 841930a8ef | |||
| 9b37e496cb | |||
| 58e6ad9e79 | |||
| 3aa2a6783e | |||
| d64a0d1fcd | |||
| ba141031dd | |||
| ebdc63f5a0 | |||
| 5af0a09714 | |||
| 8a2bc2660c | |||
| e59b8cf403 | |||
| b078ef9a22 | |||
| fdaec6d5a9 | |||
| b631346379 | |||
| e9f2378b47 | |||
| 7d2f78d25c | |||
| 1f734a613c | |||
| a3c299b057 | |||
| 12aedfce92 | |||
| 65645346a2 | |||
| cb65e89e53 | |||
| 6a2fec8ef4 | |||
| aa59c90810 | |||
| 2b317930a0 | |||
| e7d56dd4bd | |||
| a4fedb276d | |||
| 277c0a2ce6 | |||
| ef3c61e9e6 | |||
| 1908126607 | |||
| 2d77f8489f | |||
| 0371651fdd | |||
| 01734d24f7 | |||
| 71fc0546e0 | |||
| 871a745702 | |||
| 3f0df8ae0d | |||
| 1746011c16 | |||
| 7c4cfbf3d4 | |||
| 8524e098a8 | |||
| 971f58da9b | |||
| c496be1031 | |||
| 21851e3a9c | |||
| 600d5b91ea | |||
| 09b90c3bbc | |||
| f6ca13d6dc | |||
| 9c4d7c514f | |||
| ad1946e9fb | |||
| 68910458e8 | |||
| 240e87eca4 | |||
| 6b5f7e25b7 | |||
| e7229e644f | |||
| 08c8aca144 | |||
| 7f8dae74ac | |||
| 08503116ff | |||
| a1d679a59d | |||
| 4586bd0188 | |||
| a97b50f497 | |||
| c84ff11d0d | |||
| e966e74487 | |||
| 3865abfb4d | |||
| 1905601aaa | |||
| aacb9e2874 | |||
| 78f3b18969 | |||
| 9f73ca2950 | |||
| 035b98a602 | |||
| 17f4ebc297 | |||
| 906e15674e | |||
| 85bd0b6c9c | |||
| b19e8713e0 | |||
| 68fb78e765 | |||
| be8a0a4a3a | |||
| e883e3c60e | |||
| 4ede62b39a | |||
| 7d9f487a78 | |||
| 9da584ded4 | |||
| 9452c90cf3 | |||
| a80064f40a | |||
| 49691803cc | |||
| ee4738b245 | |||
| b270fa78da | |||
| 18339d7e4d | |||
| 78563448fb | |||
| 144d5f3324 | |||
| 0fb315ec47 | |||
| 1ff67341a1 | |||
| a441280812 | |||
| eda9bbb191 | |||
| 2d9da2899f | |||
| a95490d9d4 | |||
| 44135b1824 | |||
| 4a0367b401 | |||
| c1f0104edb | |||
| c9a7fbf6dd | |||
| 1f00b6a3f8 | |||
| acb22c4119 | |||
| be2b855ffe | |||
| 88c9418350 | |||
| 2255543d94 | |||
| b4c91d2dd4 | |||
| 98c1b5a3b2 | |||
| 122a1d73d3 | |||
| 74e6dba914 |
30
.drone.yml
@@ -10,7 +10,7 @@ steps:
|
|||||||
- name: test-compiler
|
- name: test-compiler
|
||||||
image: archlinux
|
image: archlinux
|
||||||
commands:
|
commands:
|
||||||
- pacman -Sy cmake gcc make llvm bison flex gettext --noconfirm
|
- pacman -Sy cmake gcc make llvm bison flex gettext libffi --noconfirm
|
||||||
- cd code/compiler
|
- cd code/compiler
|
||||||
- ./test.sh
|
- ./test.sh
|
||||||
- name: build-live
|
- name: build-live
|
||||||
@@ -22,17 +22,17 @@ steps:
|
|||||||
path: /live-output
|
path: /live-output
|
||||||
environment:
|
environment:
|
||||||
HUGO_DESTINATION: /live-output
|
HUGO_DESTINATION: /live-output
|
||||||
- name: upload-live
|
# - name: upload-live
|
||||||
image: eeacms/rsync
|
# image: eeacms/rsync
|
||||||
commands:
|
# commands:
|
||||||
- eval `ssh-agent -s`
|
# - eval `ssh-agent -s`
|
||||||
- echo "$CUSTOM_KEY" | ssh-add -
|
# - echo "$CUSTOM_KEY" | ssh-add -
|
||||||
- mkdir -p ~/.ssh
|
# - mkdir -p ~/.ssh
|
||||||
- echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
|
# - echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
|
||||||
- rsync -rv -e "ssh -p 22" /live-output/ blog-live@danilafe.com:/var/www/blog-live/ --checksum
|
# - rsync -rv -e "ssh -p 22" /live-output/ blog-live@danilafe.com:/var/www/blog-live/ --checksum
|
||||||
environment:
|
# environment:
|
||||||
CUSTOM_KEY:
|
# CUSTOM_KEY:
|
||||||
from_secret: live_ssh_key
|
# from_secret: live_ssh_key
|
||||||
volumes:
|
# volumes:
|
||||||
- name: live-output
|
# - name: live-output
|
||||||
path: /live-output
|
# path: /live-output
|
||||||
|
|||||||
@@ -21,7 +21,9 @@ llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
|||||||
|
|
||||||
# Create compiler executable
|
# Create compiler executable
|
||||||
add_executable(compiler
|
add_executable(compiler
|
||||||
ast.cpp ast.hpp definition.cpp
|
definition.cpp definition.hpp
|
||||||
|
parsed_type.cpp parsed_type.hpp
|
||||||
|
ast.cpp ast.hpp
|
||||||
llvm_context.cpp llvm_context.hpp
|
llvm_context.cpp llvm_context.hpp
|
||||||
type_env.cpp type_env.hpp
|
type_env.cpp type_env.hpp
|
||||||
env.cpp env.hpp
|
env.cpp env.hpp
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ void ast_int::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>&
|
|||||||
}
|
}
|
||||||
|
|
||||||
type_ptr ast_int::typecheck(type_mgr& mgr) {
|
type_ptr ast_int::typecheck(type_mgr& mgr) {
|
||||||
return type_ptr(new type_base("Int"));
|
return type_ptr(new type_app(env->lookup_type("Int")));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
@@ -161,7 +161,9 @@ type_ptr ast_case::typecheck(type_mgr& mgr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
input_type = mgr.resolve(case_type, var);
|
input_type = mgr.resolve(case_type, var);
|
||||||
if(!dynamic_cast<type_data*>(input_type.get())) {
|
type_app* app_type;
|
||||||
|
if(!(app_type = dynamic_cast<type_app*>(input_type.get())) ||
|
||||||
|
!dynamic_cast<type_data*>(app_type->constructor.get())) {
|
||||||
throw type_error("attempting case analysis of non-data type");
|
throw type_error("attempting case analysis of non-data type");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -169,7 +171,8 @@ type_ptr ast_case::typecheck(type_mgr& mgr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
type_data* type = dynamic_cast<type_data*>(input_type.get());
|
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
||||||
|
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
|
||||||
|
|
||||||
of->compile(env, into);
|
of->compile(env, into);
|
||||||
into.push_back(instruction_ptr(new instruction_eval()));
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|||||||
@@ -56,28 +56,38 @@ void definition_defn::generate_llvm(llvm_context& ctx) {
|
|||||||
ctx.builder.CreateRetVoid();
|
ctx.builder.CreateRetVoid();
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_data::insert_types(type_mgr& mgr, type_env_ptr& env) {
|
void definition_data::insert_types(type_env_ptr& env) {
|
||||||
this->env = env;
|
this->env = env;
|
||||||
env->bind_type(name, type_ptr(new type_data(name)));
|
env->bind_type(name, type_ptr(new type_data(name, vars.size())));
|
||||||
}
|
}
|
||||||
|
|
||||||
void definition_data::insert_constructors() const {
|
void definition_data::insert_constructors() const {
|
||||||
type_ptr return_type = env->lookup_type(name);
|
type_ptr this_type_ptr = env->lookup_type(name);
|
||||||
type_data* this_type = static_cast<type_data*>(return_type.get());
|
type_data* this_type = static_cast<type_data*>(this_type_ptr.get());
|
||||||
int next_tag = 0;
|
int next_tag = 0;
|
||||||
|
|
||||||
|
std::set<std::string> var_set;
|
||||||
|
type_app* return_app = new type_app(std::move(this_type_ptr));
|
||||||
|
type_ptr return_type(return_app);
|
||||||
|
for(auto& var : vars) {
|
||||||
|
if(var_set.find(var) != var_set.end()) throw 0;
|
||||||
|
var_set.insert(var);
|
||||||
|
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
||||||
|
}
|
||||||
|
|
||||||
for(auto& constructor : constructors) {
|
for(auto& constructor : constructors) {
|
||||||
constructor->tag = next_tag;
|
constructor->tag = next_tag;
|
||||||
this_type->constructors[constructor->name] = { next_tag++ };
|
this_type->constructors[constructor->name] = { next_tag++ };
|
||||||
|
|
||||||
type_ptr full_type = return_type;
|
type_ptr full_type = return_type;
|
||||||
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||||
type_ptr type = env->lookup_type(*it);
|
type_ptr type = (*it)->to_type(var_set, env);
|
||||||
if(!type) throw 0;
|
|
||||||
full_type = type_ptr(new type_arr(type, full_type));
|
full_type = type_ptr(new type_arr(type, full_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
env->bind(constructor->name, full_type);
|
type_scheme_ptr full_scheme(new type_scheme(std::move(full_type)));
|
||||||
|
full_scheme->forall.insert(full_scheme->forall.begin(), vars.begin(), vars.end());
|
||||||
|
env->bind(constructor->name, full_scheme);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include "instruction.hpp"
|
#include "instruction.hpp"
|
||||||
#include "llvm_context.hpp"
|
#include "llvm_context.hpp"
|
||||||
|
#include "parsed_type.hpp"
|
||||||
#include "type_env.hpp"
|
#include "type_env.hpp"
|
||||||
|
|
||||||
struct ast;
|
struct ast;
|
||||||
@@ -11,10 +12,10 @@ using ast_ptr = std::unique_ptr<ast>;
|
|||||||
|
|
||||||
struct constructor {
|
struct constructor {
|
||||||
std::string name;
|
std::string name;
|
||||||
std::vector<std::string> types;
|
std::vector<parsed_type_ptr> types;
|
||||||
int8_t tag;
|
int8_t tag;
|
||||||
|
|
||||||
constructor(std::string n, std::vector<std::string> ts)
|
constructor(std::string n, std::vector<parsed_type_ptr> ts)
|
||||||
: name(std::move(n)), types(std::move(ts)) {}
|
: name(std::move(n)), types(std::move(ts)) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -52,14 +53,18 @@ using definition_defn_ptr = std::unique_ptr<definition_defn>;
|
|||||||
|
|
||||||
struct definition_data {
|
struct definition_data {
|
||||||
std::string name;
|
std::string name;
|
||||||
|
std::vector<std::string> vars;
|
||||||
std::vector<constructor_ptr> constructors;
|
std::vector<constructor_ptr> constructors;
|
||||||
|
|
||||||
type_env_ptr env;
|
type_env_ptr env;
|
||||||
|
|
||||||
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
definition_data(
|
||||||
: name(std::move(n)), constructors(std::move(cs)) {}
|
std::string n,
|
||||||
|
std::vector<std::string> vs,
|
||||||
|
std::vector<constructor_ptr> cs)
|
||||||
|
: name(std::move(n)), vars(std::move(vs)), constructors(std::move(cs)) {}
|
||||||
|
|
||||||
void insert_types(type_mgr& mgr, type_env_ptr& env);
|
void insert_types(type_env_ptr& env);
|
||||||
void insert_constructors() const;
|
void insert_constructors() const;
|
||||||
void generate_llvm(llvm_context& ctx);
|
void generate_llvm(llvm_context& ctx);
|
||||||
};
|
};
|
||||||
|
|||||||
32
code/compiler/11/examples/list.txt
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
data List a = { Nil, Cons a (List a) }
|
||||||
|
|
||||||
|
defn map f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { Cons (f x) (map f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn foldl f b l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { b }
|
||||||
|
Cons x xs -> { foldl f (f b x) xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn foldr f b l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { b }
|
||||||
|
Cons x xs -> { f x (foldr f b xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn list = { Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))) }
|
||||||
|
|
||||||
|
defn add x y = { x + y }
|
||||||
|
defn sum l = { foldr add 0 l }
|
||||||
|
|
||||||
|
defn skipAdd x y = { y + 1 }
|
||||||
|
defn length l = { foldr skipAdd 0 l }
|
||||||
|
|
||||||
|
defn main = { sum list + length list }
|
||||||
17
code/compiler/11/examples/pair.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
data Pair a b = { MkPair a b }
|
||||||
|
|
||||||
|
defn fst p = {
|
||||||
|
case p of {
|
||||||
|
MkPair a b -> { a }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn snd p = {
|
||||||
|
case p of {
|
||||||
|
MkPair a b -> { b }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn pair = { MkPair 1 (MkPair 2 3) }
|
||||||
|
|
||||||
|
defn main = { fst pair + snd (snd pair) }
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
data List = { Nil, Cons Int List }
|
data List a = { Nil, Cons a (List a) }
|
||||||
data Bool = { True, False }
|
data Bool = { True, False }
|
||||||
defn length l = {
|
defn length l = {
|
||||||
case l of {
|
case l of {
|
||||||
@@ -6,4 +6,4 @@ defn length l = {
|
|||||||
Cons x xs -> { 1 + length xs }
|
Cons x xs -> { 1 + length xs }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
defn main = { length (Cons True (Cons False (Cons True Nil))) }
|
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) + length (Cons True (Cons False (Cons True Nil))) }
|
||||||
|
|||||||
@@ -30,17 +30,18 @@ void typecheck_program(
|
|||||||
type_mgr& mgr, type_env_ptr& env) {
|
type_mgr& mgr, type_env_ptr& env) {
|
||||||
type_ptr int_type = type_ptr(new type_base("Int"));
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
env->bind_type("Int", int_type);
|
env->bind_type("Int", int_type);
|
||||||
|
type_ptr int_type_app = type_ptr(new type_app(int_type));
|
||||||
|
|
||||||
type_ptr binop_type = type_ptr(new type_arr(
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
int_type,
|
int_type_app,
|
||||||
type_ptr(new type_arr(int_type, int_type))));
|
type_ptr(new type_arr(int_type_app, int_type_app))));
|
||||||
env->bind("+", binop_type);
|
env->bind("+", binop_type);
|
||||||
env->bind("-", binop_type);
|
env->bind("-", binop_type);
|
||||||
env->bind("*", binop_type);
|
env->bind("*", binop_type);
|
||||||
env->bind("/", binop_type);
|
env->bind("/", binop_type);
|
||||||
|
|
||||||
for(auto& def_data : defs_data) {
|
for(auto& def_data : defs_data) {
|
||||||
def_data.second->insert_types(mgr, env);
|
def_data.second->insert_types(env);
|
||||||
}
|
}
|
||||||
for(auto& def_data : defs_data) {
|
for(auto& def_data : defs_data) {
|
||||||
def_data.second->insert_constructors();
|
def_data.second->insert_constructors();
|
||||||
@@ -138,7 +139,7 @@ void output_llvm(llvm_context& ctx, const std::string& filename) {
|
|||||||
if (ec) {
|
if (ec) {
|
||||||
throw 0;
|
throw 0;
|
||||||
} else {
|
} else {
|
||||||
llvm::TargetMachine::CodeGenFileType type = llvm::TargetMachine::CGFT_ObjectFile;
|
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
||||||
llvm::legacy::PassManager pm;
|
llvm::legacy::PassManager pm;
|
||||||
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||||
throw 0;
|
throw 0;
|
||||||
|
|||||||
36
code/compiler/11/parsed_type.cpp
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
#include "parsed_type.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
type_ptr parsed_type_app::to_type(
|
||||||
|
const std::set<std::string>& vars,
|
||||||
|
const type_env& e) const {
|
||||||
|
auto parent_type = e.lookup_type(name);
|
||||||
|
if(parent_type == nullptr) throw 0;
|
||||||
|
type_base* base_type;
|
||||||
|
if(!(base_type = dynamic_cast<type_base*>(parent_type.get()))) throw 0;
|
||||||
|
if(base_type->arity != arguments.size()) throw 0;
|
||||||
|
|
||||||
|
type_app* new_app = new type_app(std::move(parent_type));
|
||||||
|
type_ptr to_return(new_app);
|
||||||
|
for(auto& arg : arguments) {
|
||||||
|
new_app->arguments.push_back(arg->to_type(vars, e));
|
||||||
|
}
|
||||||
|
return to_return;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr parsed_type_var::to_type(
|
||||||
|
const std::set<std::string>& vars,
|
||||||
|
const type_env& e) const {
|
||||||
|
if(vars.find(var) == vars.end()) throw 0;
|
||||||
|
return type_ptr(new type_var(var));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
type_ptr parsed_type_arr::to_type(
|
||||||
|
const std::set<std::string>& vars,
|
||||||
|
const type_env& env) const {
|
||||||
|
auto new_left = left->to_type(vars, env);
|
||||||
|
auto new_right = right->to_type(vars, env);
|
||||||
|
return type_ptr(new type_arr(std::move(new_left), std::move(new_right)));
|
||||||
|
}
|
||||||
43
code/compiler/11/parsed_type.hpp
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
struct parsed_type {
|
||||||
|
virtual type_ptr to_type(
|
||||||
|
const std::set<std::string>& vars,
|
||||||
|
const type_env& env) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using parsed_type_ptr = std::unique_ptr<parsed_type>;
|
||||||
|
|
||||||
|
struct parsed_type_app : parsed_type {
|
||||||
|
std::string name;
|
||||||
|
std::vector<parsed_type_ptr> arguments;
|
||||||
|
|
||||||
|
parsed_type_app(
|
||||||
|
std::string n,
|
||||||
|
std::vector<parsed_type_ptr> as)
|
||||||
|
: name(std::move(n)), arguments(std::move(as)) {}
|
||||||
|
|
||||||
|
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct parsed_type_var : parsed_type {
|
||||||
|
std::string var;
|
||||||
|
|
||||||
|
parsed_type_var(std::string v) : var(std::move(v)) {}
|
||||||
|
|
||||||
|
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct parsed_type_arr : parsed_type {
|
||||||
|
parsed_type_ptr left;
|
||||||
|
parsed_type_ptr right;
|
||||||
|
|
||||||
|
parsed_type_arr(parsed_type_ptr l, parsed_type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||||
|
};
|
||||||
@@ -5,6 +5,7 @@
|
|||||||
#include "ast.hpp"
|
#include "ast.hpp"
|
||||||
#include "definition.hpp"
|
#include "definition.hpp"
|
||||||
#include "parser.hpp"
|
#include "parser.hpp"
|
||||||
|
#include "parsed_type.hpp"
|
||||||
|
|
||||||
std::map<std::string, definition_data_ptr> defs_data;
|
std::map<std::string, definition_data_ptr> defs_data;
|
||||||
std::map<std::string, definition_defn_ptr> defs_defn;
|
std::map<std::string, definition_defn_ptr> defs_defn;
|
||||||
@@ -36,9 +37,11 @@ extern yy::parser::symbol_type yylex();
|
|||||||
%define api.value.type variant
|
%define api.value.type variant
|
||||||
%define api.token.constructor
|
%define api.token.constructor
|
||||||
|
|
||||||
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
%type <std::vector<std::string>> lowercaseParams
|
||||||
%type <std::vector<branch_ptr>> branches
|
%type <std::vector<branch_ptr>> branches
|
||||||
%type <std::vector<constructor_ptr>> constructors
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
|
%type <std::vector<parsed_type_ptr>> typeList
|
||||||
|
%type <parsed_type_ptr> type nonArrowType typeListElement
|
||||||
%type <ast_ptr> aAdd aMul case app appBase
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
%type <definition_data_ptr> data
|
%type <definition_data_ptr> data
|
||||||
%type <definition_defn_ptr> defn
|
%type <definition_defn_ptr> defn
|
||||||
@@ -75,11 +78,6 @@ lowercaseParams
|
|||||||
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
;
|
;
|
||||||
|
|
||||||
uppercaseParams
|
|
||||||
: %empty { $$ = std::vector<std::string>(); }
|
|
||||||
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
|
||||||
;
|
|
||||||
|
|
||||||
aAdd
|
aAdd
|
||||||
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||||
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||||
@@ -127,8 +125,8 @@ pattern
|
|||||||
;
|
;
|
||||||
|
|
||||||
data
|
data
|
||||||
: DATA UID EQUAL OCURLY constructors CCURLY
|
: DATA UID lowercaseParams EQUAL OCURLY constructors CCURLY
|
||||||
{ $$ = definition_data_ptr(new definition_data(std::move($2), std::move($5))); }
|
{ $$ = definition_data_ptr(new definition_data(std::move($2), std::move($3), std::move($6))); }
|
||||||
;
|
;
|
||||||
|
|
||||||
constructors
|
constructors
|
||||||
@@ -138,7 +136,28 @@ constructors
|
|||||||
;
|
;
|
||||||
|
|
||||||
constructor
|
constructor
|
||||||
: UID uppercaseParams
|
: UID typeList
|
||||||
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||||
;
|
;
|
||||||
|
|
||||||
|
type
|
||||||
|
: nonArrowType ARROW type { $$ = parsed_type_ptr(new parsed_type_arr(std::move($1), std::move($3))); }
|
||||||
|
| nonArrowType { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
nonArrowType
|
||||||
|
: UID typeList { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), std::move($2))); }
|
||||||
|
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||||
|
| OPAREN type CPAREN { $$ = std::move($2); }
|
||||||
|
;
|
||||||
|
|
||||||
|
typeListElement
|
||||||
|
: OPAREN type CPAREN { $$ = std::move($2); }
|
||||||
|
| UID { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), {})); }
|
||||||
|
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
typeList
|
||||||
|
: %empty { $$ = std::vector<parsed_type_ptr>(); }
|
||||||
|
| typeList typeListElement { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
#include <ostream>
|
#include <ostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
#include "error.hpp"
|
#include "error.hpp"
|
||||||
|
|
||||||
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
@@ -15,29 +16,13 @@ void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
|||||||
monotype->print(mgr, to);
|
monotype->print(mgr, to);
|
||||||
}
|
}
|
||||||
|
|
||||||
type_ptr substitute(const type_mgr& mgr, const std::map<std::string, type_ptr>& subst, const type_ptr& t) {
|
|
||||||
type_var* var;
|
|
||||||
type_ptr resolved = mgr.resolve(t, var);
|
|
||||||
if(var) {
|
|
||||||
auto subst_it = subst.find(var->name);
|
|
||||||
if(subst_it == subst.end()) return resolved;
|
|
||||||
return subst_it->second;
|
|
||||||
} else if(type_arr* arr = dynamic_cast<type_arr*>(t.get())) {
|
|
||||||
auto left_result = substitute(mgr, subst, arr->left);
|
|
||||||
auto right_result = substitute(mgr, subst, arr->right);
|
|
||||||
if(left_result == arr->left && right_result == arr->right) return t;
|
|
||||||
return type_ptr(new type_arr(left_result, right_result));
|
|
||||||
}
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
type_ptr type_scheme::instantiate(type_mgr& mgr) const {
|
type_ptr type_scheme::instantiate(type_mgr& mgr) const {
|
||||||
if(forall.size() == 0) return monotype;
|
if(forall.size() == 0) return monotype;
|
||||||
std::map<std::string, type_ptr> subst;
|
std::map<std::string, type_ptr> subst;
|
||||||
for(auto& var : forall) {
|
for(auto& var : forall) {
|
||||||
subst[var] = mgr.new_type();
|
subst[var] = mgr.new_type();
|
||||||
}
|
}
|
||||||
return substitute(mgr, subst, monotype);
|
return mgr.substitute(subst, monotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
@@ -60,6 +45,15 @@ void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
|||||||
to << ")";
|
to << ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void type_app::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
constructor->print(mgr, to);
|
||||||
|
to << "* ";
|
||||||
|
for(auto& arg : arguments) {
|
||||||
|
to << " ";
|
||||||
|
arg->print(mgr, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string type_mgr::new_type_name() {
|
std::string type_mgr::new_type_name() {
|
||||||
int temp = last_id++;
|
int temp = last_id++;
|
||||||
std::string str = "";
|
std::string str = "";
|
||||||
@@ -99,12 +93,10 @@ type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void type_mgr::unify(type_ptr l, type_ptr r) {
|
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
type_var* lvar;
|
type_var *lvar, *rvar;
|
||||||
type_var* rvar;
|
type_arr *larr, *rarr;
|
||||||
type_arr* larr;
|
type_base *lid, *rid;
|
||||||
type_arr* rarr;
|
type_app *lapp, *rapp;
|
||||||
type_base* lid;
|
|
||||||
type_base* rid;
|
|
||||||
|
|
||||||
l = resolve(l, lvar);
|
l = resolve(l, lvar);
|
||||||
r = resolve(r, rvar);
|
r = resolve(r, rvar);
|
||||||
@@ -122,12 +114,56 @@ void type_mgr::unify(type_ptr l, type_ptr r) {
|
|||||||
return;
|
return;
|
||||||
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
(rid = dynamic_cast<type_base*>(r.get()))) {
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
if(lid->name == rid->name) return;
|
if(lid->name == rid->name && lid->arity == rid->arity) return;
|
||||||
|
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
||||||
|
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
||||||
|
unify(lapp->constructor, rapp->constructor);
|
||||||
|
auto left_it = lapp->arguments.begin();
|
||||||
|
auto right_it = rapp->arguments.begin();
|
||||||
|
while(left_it != lapp->arguments.end() &&
|
||||||
|
right_it != rapp->arguments.end()) {
|
||||||
|
unify(*left_it, *right_it);
|
||||||
|
left_it++, right_it++;
|
||||||
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw unification_error(l, r);
|
throw unification_error(l, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::substitute(const std::map<std::string, type_ptr>& subst, const type_ptr& t) const {
|
||||||
|
type_ptr temp = t;
|
||||||
|
while(type_var* var = dynamic_cast<type_var*>(temp.get())) {
|
||||||
|
auto subst_it = subst.find(var->name);
|
||||||
|
if(subst_it != subst.end()) return subst_it->second;
|
||||||
|
auto var_it = types.find(var->name);
|
||||||
|
if(var_it == types.end()) return t;
|
||||||
|
temp = var_it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(type_arr* arr = dynamic_cast<type_arr*>(temp.get())) {
|
||||||
|
auto left_result = substitute(subst, arr->left);
|
||||||
|
auto right_result = substitute(subst, arr->right);
|
||||||
|
if(left_result == arr->left && right_result == arr->right) return t;
|
||||||
|
return type_ptr(new type_arr(left_result, right_result));
|
||||||
|
} else if(type_app* app = dynamic_cast<type_app*>(temp.get())) {
|
||||||
|
auto constructor_result = substitute(subst, app->constructor);
|
||||||
|
bool arg_changed = false;
|
||||||
|
std::vector<type_ptr> new_args;
|
||||||
|
for(auto& arg : app->arguments) {
|
||||||
|
auto arg_result = substitute(subst, arg);
|
||||||
|
arg_changed |= arg_result != arg;
|
||||||
|
new_args.push_back(std::move(arg_result));
|
||||||
|
}
|
||||||
|
|
||||||
|
if(constructor_result == app->constructor && !arg_changed) return t;
|
||||||
|
type_app* new_app = new type_app(std::move(constructor_result));
|
||||||
|
std::swap(new_app->arguments, new_args);
|
||||||
|
return type_ptr(new_app);
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
void type_mgr::bind(const std::string& s, type_ptr t) {
|
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||||
type_var* other = dynamic_cast<type_var*>(t.get());
|
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||||
|
|
||||||
@@ -144,5 +180,8 @@ void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
|
|||||||
} else if(type_arr* arr = dynamic_cast<type_arr*>(resolved.get())) {
|
} else if(type_arr* arr = dynamic_cast<type_arr*>(resolved.get())) {
|
||||||
find_free(arr->left, into);
|
find_free(arr->left, into);
|
||||||
find_free(arr->right, into);
|
find_free(arr->right, into);
|
||||||
|
} else if(type_app* app = dynamic_cast<type_app*>(resolved.get())) {
|
||||||
|
find_free(app->constructor, into);
|
||||||
|
for(auto& arg : app->arguments) find_free(arg, into);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,9 +38,10 @@ struct type_var : public type {
|
|||||||
|
|
||||||
struct type_base : public type {
|
struct type_base : public type {
|
||||||
std::string name;
|
std::string name;
|
||||||
|
int32_t arity;
|
||||||
|
|
||||||
type_base(std::string n)
|
type_base(std::string n, int32_t a = 0)
|
||||||
: name(std::move(n)) {}
|
: name(std::move(n)), arity(a) {}
|
||||||
|
|
||||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
};
|
};
|
||||||
@@ -52,8 +53,8 @@ struct type_data : public type_base {
|
|||||||
|
|
||||||
std::map<std::string, constructor> constructors;
|
std::map<std::string, constructor> constructors;
|
||||||
|
|
||||||
type_data(std::string n)
|
type_data(std::string n, int32_t a = 0)
|
||||||
: type_base(std::move(n)) {}
|
: type_base(std::move(n), a) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct type_arr : public type {
|
struct type_arr : public type {
|
||||||
@@ -66,6 +67,16 @@ struct type_arr : public type {
|
|||||||
void print(const type_mgr& mgr, std::ostream& to) const;
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct type_app : public type {
|
||||||
|
type_ptr constructor;
|
||||||
|
std::vector<type_ptr> arguments;
|
||||||
|
|
||||||
|
type_app(type_ptr c)
|
||||||
|
: constructor(std::move(c)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
struct type_mgr {
|
struct type_mgr {
|
||||||
int last_id = 0;
|
int last_id = 0;
|
||||||
std::map<std::string, type_ptr> types;
|
std::map<std::string, type_ptr> types;
|
||||||
@@ -75,6 +86,9 @@ struct type_mgr {
|
|||||||
type_ptr new_arrow_type();
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
void unify(type_ptr l, type_ptr r);
|
void unify(type_ptr l, type_ptr r);
|
||||||
|
type_ptr substitute(
|
||||||
|
const std::map<std::string, type_ptr>& subst,
|
||||||
|
const type_ptr& t) const;
|
||||||
type_ptr resolve(type_ptr t, type_var*& var) const;
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
void bind(const std::string& s, type_ptr t);
|
void bind(const std::string& s, type_ptr t);
|
||||||
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
||||||
|
|||||||
46
code/compiler/12/CMakeLists.txt
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.1)
|
||||||
|
project(compiler)
|
||||||
|
|
||||||
|
# Find all the required packages
|
||||||
|
find_package(BISON)
|
||||||
|
find_package(FLEX)
|
||||||
|
find_package(LLVM REQUIRED CONFIG)
|
||||||
|
|
||||||
|
# Set up the flex and bison targets
|
||||||
|
bison_target(parser
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
|
||||||
|
COMPILE_FLAGS "-d")
|
||||||
|
flex_target(scanner
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
|
||||||
|
add_flex_bison_dependency(scanner parser)
|
||||||
|
|
||||||
|
# Find all the relevant LLVM components
|
||||||
|
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||||
|
|
||||||
|
# Create compiler executable
|
||||||
|
add_executable(compiler
|
||||||
|
definition.cpp definition.hpp
|
||||||
|
parsed_type.cpp parsed_type.hpp
|
||||||
|
ast.cpp ast.hpp
|
||||||
|
llvm_context.cpp llvm_context.hpp
|
||||||
|
type_env.cpp type_env.hpp
|
||||||
|
env.cpp env.hpp
|
||||||
|
type.cpp type.hpp
|
||||||
|
error.cpp error.hpp
|
||||||
|
binop.cpp binop.hpp
|
||||||
|
instruction.cpp instruction.hpp
|
||||||
|
graph.cpp graph.hpp
|
||||||
|
global_scope.cpp global_scope.hpp
|
||||||
|
${BISON_parser_OUTPUTS}
|
||||||
|
${FLEX_scanner_OUTPUTS}
|
||||||
|
main.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configure compiler executable
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS})
|
||||||
|
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
|
||||||
|
target_link_libraries(compiler ${LLVM_LIBS})
|
||||||
437
code/compiler/12/ast.cpp
Normal file
@@ -0,0 +1,437 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "INT: " << value << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::find_free(std::set<std::string>& into) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_int::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
return type_ptr(new type_app(env->lookup_type("Int")));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::translate(global_scope& scope) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::find_free(std::set<std::string>& into) {
|
||||||
|
into.insert(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_lid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
return env->lookup(id)->instantiate(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::translate(global_scope& scope) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
auto mangled_name = this->env->get_mangled_name(id);
|
||||||
|
into.push_back(instruction_ptr(
|
||||||
|
(env->has_variable(mangled_name) && !this->env->is_global(id)) ?
|
||||||
|
(instruction*) new instruction_push(env->get_offset(mangled_name)) :
|
||||||
|
(instruction*) new instruction_pushglobal(mangled_name)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "UID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::find_free(std::set<std::string>& into) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_uid::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
return env->lookup(id)->instantiate(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::translate(global_scope& scope) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(
|
||||||
|
new instruction_pushglobal(this->env->get_mangled_name(id))));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BINOP: " << op_name(op) << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::find_free(std::set<std::string>& into) {
|
||||||
|
left->find_free(into);
|
||||||
|
right->find_free(into);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_binop::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
|
type_ptr ftype = env->lookup(op_name(op))->instantiate(mgr);
|
||||||
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
|
||||||
|
|
||||||
|
mgr.unify(arrow_two, ftype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::translate(global_scope& scope) {
|
||||||
|
left->translate(scope);
|
||||||
|
right->translate(scope);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "APP:" << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::find_free(std::set<std::string>& into) {
|
||||||
|
left->find_free(into);
|
||||||
|
right->find_free(into);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_app::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
mgr.unify(arrow, ltype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::translate(global_scope& scope) {
|
||||||
|
left->translate(scope);
|
||||||
|
right->translate(scope);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "CASE: " << std::endl;
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
print_indent(indent + 1, to);
|
||||||
|
branch->pat->print(to);
|
||||||
|
to << std::endl;
|
||||||
|
branch->expr->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::find_free(std::set<std::string>& into) {
|
||||||
|
of->find_free(into);
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
std::set<std::string> free_in_branch;
|
||||||
|
std::set<std::string> pattern_variables;
|
||||||
|
branch->pat->find_variables(pattern_variables);
|
||||||
|
branch->expr->find_free(free_in_branch);
|
||||||
|
for(auto& free : free_in_branch) {
|
||||||
|
if(pattern_variables.find(free) == pattern_variables.end())
|
||||||
|
into.insert(free);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_case::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
type_var* var;
|
||||||
|
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||||
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
type_env_ptr new_env = type_scope(env);
|
||||||
|
branch->pat->typecheck(case_type, mgr, new_env);
|
||||||
|
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
|
||||||
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
input_type = mgr.resolve(case_type, var);
|
||||||
|
type_app* app_type;
|
||||||
|
if(!(app_type = dynamic_cast<type_app*>(input_type.get())) ||
|
||||||
|
!dynamic_cast<type_data*>(app_type->constructor.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
|
return branch_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::translate(global_scope& scope) {
|
||||||
|
of->translate(scope);
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
branch->expr->translate(scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
type_app* app_type = dynamic_cast<type_app*>(input_type.get());
|
||||||
|
type_data* type = dynamic_cast<type_data*>(app_type->constructor.get());
|
||||||
|
|
||||||
|
of->compile(env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|
||||||
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
|
into.push_back(instruction_ptr(jump_instruction));
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
std::vector<instruction_ptr> branch_instructions;
|
||||||
|
pattern_var* vpat;
|
||||||
|
pattern_constr* cpat;
|
||||||
|
|
||||||
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
break;
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
}
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(branch->expr->env->get_mangled_name(*it), new_env));
|
||||||
|
}
|
||||||
|
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
||||||
|
cpat->params.size())));
|
||||||
|
branch->expr->compile(new_env, branch_instructions);
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||||
|
cpat->params.size())));
|
||||||
|
|
||||||
|
int new_tag = type->constructors[cpat->constr].tag;
|
||||||
|
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("technically not a type error: duplicate pattern");
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[new_tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("non-total pattern");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_let::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LET: " << std::endl;
|
||||||
|
in->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_let::find_free(std::set<std::string>& into) {
|
||||||
|
definitions.find_free(into);
|
||||||
|
std::set<std::string> all_free;
|
||||||
|
in->find_free(all_free);
|
||||||
|
for(auto& free_var : all_free) {
|
||||||
|
if(definitions.defs_defn.find(free_var) == definitions.defs_defn.end())
|
||||||
|
into.insert(free_var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_let::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
definitions.typecheck(mgr, env);
|
||||||
|
return in->typecheck(mgr, definitions.env);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_let::translate(global_scope& scope) {
|
||||||
|
for(auto& def : definitions.defs_data) {
|
||||||
|
def.second->into_globals(scope);
|
||||||
|
}
|
||||||
|
for(auto& def : definitions.defs_defn) {
|
||||||
|
size_t original_params = def.second->params.size();
|
||||||
|
std::string original_name = def.second->name;
|
||||||
|
auto& global_definition = def.second->into_global(scope);
|
||||||
|
size_t captured = global_definition.params.size() - original_params;
|
||||||
|
|
||||||
|
type_env_ptr mangled_env = type_scope(env);
|
||||||
|
mangled_env->bind(def.first, env->lookup(def.first), visibility::global);
|
||||||
|
mangled_env->set_mangled_name(def.first, global_definition.name);
|
||||||
|
|
||||||
|
ast_ptr global_app(new ast_lid(original_name));
|
||||||
|
global_app->env = mangled_env;
|
||||||
|
for(auto& param : global_definition.params) {
|
||||||
|
if(!(captured--)) break;
|
||||||
|
ast_ptr new_arg(new ast_lid(param));
|
||||||
|
new_arg->env = env;
|
||||||
|
global_app = ast_ptr(new ast_app(std::move(global_app), std::move(new_arg)));
|
||||||
|
global_app->env = env;
|
||||||
|
}
|
||||||
|
translated_definitions.push_back({ def.first, std::move(global_app) });
|
||||||
|
}
|
||||||
|
in->translate(scope);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_let::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_alloc(translated_definitions.size())));
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto& def : translated_definitions) {
|
||||||
|
new_env = env_ptr(new env_var(definitions.env->get_mangled_name(def.first), std::move(new_env)));
|
||||||
|
}
|
||||||
|
int offset = translated_definitions.size() - 1;
|
||||||
|
for(auto& def : translated_definitions) {
|
||||||
|
def.second->compile(new_env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_update(offset--)));
|
||||||
|
}
|
||||||
|
in->compile(new_env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_slide(translated_definitions.size())));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lambda::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LAMBDA";
|
||||||
|
for(auto& param : params) {
|
||||||
|
to << " " << param;
|
||||||
|
}
|
||||||
|
to << std::endl;
|
||||||
|
body->print(indent+1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lambda::find_free(std::set<std::string>& into) {
|
||||||
|
body->find_free(free_variables);
|
||||||
|
for(auto& param : params) {
|
||||||
|
free_variables.erase(param);
|
||||||
|
}
|
||||||
|
into.insert(free_variables.begin(), free_variables.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_lambda::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
var_env = type_scope(env);
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
type_ptr param_type = mgr.new_type();
|
||||||
|
var_env->bind(*it, param_type);
|
||||||
|
full_type = type_ptr(new type_arr(std::move(param_type), full_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr.unify(return_type, body->typecheck(mgr, var_env));
|
||||||
|
return full_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lambda::translate(global_scope& scope) {
|
||||||
|
std::vector<std::string> function_params;
|
||||||
|
for(auto& free_variable : free_variables) {
|
||||||
|
if(env->is_global(free_variable)) continue;
|
||||||
|
function_params.push_back(free_variable);
|
||||||
|
}
|
||||||
|
size_t captured_count = function_params.size();
|
||||||
|
function_params.insert(function_params.end(), params.begin(), params.end());
|
||||||
|
|
||||||
|
auto& new_function = scope.add_function("lambda", std::move(function_params), std::move(body));
|
||||||
|
type_env_ptr mangled_env = type_scope(env);
|
||||||
|
mangled_env->bind("lambda", type_scheme_ptr(nullptr), visibility::global);
|
||||||
|
mangled_env->set_mangled_name("lambda", new_function.name);
|
||||||
|
ast_ptr new_application = ast_ptr(new ast_lid("lambda"));
|
||||||
|
new_application->env = mangled_env;
|
||||||
|
|
||||||
|
for(auto& param : new_function.params) {
|
||||||
|
if(!(captured_count--)) break;
|
||||||
|
ast_ptr new_arg = ast_ptr(new ast_lid(param));
|
||||||
|
new_arg->env = env;
|
||||||
|
new_application = ast_ptr(new ast_app(std::move(new_application), std::move(new_arg)));
|
||||||
|
new_application->env = env;
|
||||||
|
}
|
||||||
|
translated = std::move(new_application);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lambda::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
translated->compile(env, into);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::print(std::ostream& to) const {
|
||||||
|
to << var;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::find_variables(std::set<std::string>& into) const {
|
||||||
|
into.insert(var);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||||
|
env->bind(var, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::print(std::ostream& to) const {
|
||||||
|
to << constr;
|
||||||
|
for(auto& param : params) {
|
||||||
|
to << " " << param;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::find_variables(std::set<std::string>& into) const {
|
||||||
|
into.insert(params.begin(), params.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const {
|
||||||
|
type_scheme_ptr constructor_type_scheme = env->lookup(constr);
|
||||||
|
if(!constructor_type_scheme) {
|
||||||
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
|
}
|
||||||
|
type_ptr constructor_type = constructor_type_scheme->instantiate(mgr);
|
||||||
|
|
||||||
|
for(auto& param : params) {
|
||||||
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
|
env->bind(param, arr->left);
|
||||||
|
constructor_type = arr->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr.unify(t, constructor_type);
|
||||||
|
}
|
||||||
189
code/compiler/12/ast.hpp
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <set>
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "env.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "global_scope.hpp"
|
||||||
|
|
||||||
|
struct ast {
|
||||||
|
type_env_ptr env;
|
||||||
|
|
||||||
|
virtual ~ast() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual void find_free(std::set<std::string>& into) = 0;
|
||||||
|
virtual type_ptr typecheck(type_mgr& mgr, type_env_ptr& env) = 0;
|
||||||
|
virtual void translate(global_scope& scope) = 0;
|
||||||
|
virtual void compile(const env_ptr& env,
|
||||||
|
std::vector<instruction_ptr>& into) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct pattern {
|
||||||
|
virtual ~pattern() = default;
|
||||||
|
|
||||||
|
virtual void print(std::ostream& to) const = 0;
|
||||||
|
virtual void find_variables(std::set<std::string>& into) const = 0;
|
||||||
|
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using pattern_ptr = std::unique_ptr<pattern>;
|
||||||
|
|
||||||
|
struct branch {
|
||||||
|
pattern_ptr pat;
|
||||||
|
ast_ptr expr;
|
||||||
|
|
||||||
|
branch(pattern_ptr p, ast_ptr a)
|
||||||
|
: pat(std::move(p)), expr(std::move(a)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using branch_ptr = std::unique_ptr<branch>;
|
||||||
|
|
||||||
|
struct ast_int : public ast {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
explicit ast_int(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_lid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_lid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_uid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_uid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_binop : public ast {
|
||||||
|
binop op;
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_binop(binop o, ast_ptr l, ast_ptr r)
|
||||||
|
: op(o), left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_app : public ast {
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_app(ast_ptr l, ast_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_case : public ast {
|
||||||
|
ast_ptr of;
|
||||||
|
type_ptr input_type;
|
||||||
|
std::vector<branch_ptr> branches;
|
||||||
|
|
||||||
|
ast_case(ast_ptr o, std::vector<branch_ptr> b)
|
||||||
|
: of(std::move(o)), branches(std::move(b)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_let : public ast {
|
||||||
|
using basic_definition = std::pair<std::string, ast_ptr>;
|
||||||
|
|
||||||
|
definition_group definitions;
|
||||||
|
ast_ptr in;
|
||||||
|
|
||||||
|
std::vector<basic_definition> translated_definitions;
|
||||||
|
|
||||||
|
ast_let(definition_group g, ast_ptr i)
|
||||||
|
: definitions(std::move(g)), in(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_lambda : public ast {
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
type_env_ptr var_env;
|
||||||
|
|
||||||
|
std::set<std::string> free_variables;
|
||||||
|
ast_ptr translated;
|
||||||
|
|
||||||
|
ast_lambda(std::vector<std::string> ps, ast_ptr b)
|
||||||
|
: params(std::move(ps)), body(std::move(b)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
type_ptr typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
void translate(global_scope& scope);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_var : public pattern {
|
||||||
|
std::string var;
|
||||||
|
|
||||||
|
pattern_var(std::string v)
|
||||||
|
: var(std::move(v)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void find_variables(std::set<std::string>& into) const;
|
||||||
|
void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_constr : public pattern {
|
||||||
|
std::string constr;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
|
||||||
|
pattern_constr(std::string c, std::vector<std::string> p)
|
||||||
|
: constr(std::move(c)), params(std::move(p)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void find_variables(std::set<std::string>& into) const;
|
||||||
|
virtual void typecheck(type_ptr t, type_mgr& mgr, type_env_ptr& env) const;
|
||||||
|
};
|
||||||
21
code/compiler/12/binop.cpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#include "binop.hpp"
|
||||||
|
|
||||||
|
std::string op_name(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "+";
|
||||||
|
case MINUS: return "-";
|
||||||
|
case TIMES: return "*";
|
||||||
|
case DIVIDE: return "/";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string op_action(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "plus";
|
||||||
|
case MINUS: return "minus";
|
||||||
|
case TIMES: return "times";
|
||||||
|
case DIVIDE: return "divide";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
12
code/compiler/12/binop.hpp
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum binop {
|
||||||
|
PLUS,
|
||||||
|
MINUS,
|
||||||
|
TIMES,
|
||||||
|
DIVIDE
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string op_name(binop op);
|
||||||
|
std::string op_action(binop op);
|
||||||
145
code/compiler/12/definition.cpp
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
#include "definition.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "graph.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/Type.h>
|
||||||
|
|
||||||
|
void definition_defn::find_free() {
|
||||||
|
body->find_free(free_variables);
|
||||||
|
for(auto& param : params) {
|
||||||
|
free_variables.erase(param);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::insert_types(type_mgr& mgr, type_env_ptr& env, visibility v) {
|
||||||
|
this->env = env;
|
||||||
|
var_env = type_scope(env);
|
||||||
|
return_type = mgr.new_type();
|
||||||
|
full_type = return_type;
|
||||||
|
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
type_ptr param_type = mgr.new_type();
|
||||||
|
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||||
|
var_env->bind(*it, param_type);
|
||||||
|
}
|
||||||
|
env->bind(name, full_type, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::typecheck(type_mgr& mgr) {
|
||||||
|
type_ptr body_type = body->typecheck(mgr, var_env);
|
||||||
|
mgr.unify(return_type, body_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
global_function& definition_defn::into_global(global_scope& scope) {
|
||||||
|
std::vector<std::string> all_params;
|
||||||
|
for(auto& free : free_variables) {
|
||||||
|
if(env->is_global(free)) continue;
|
||||||
|
all_params.push_back(free);
|
||||||
|
}
|
||||||
|
all_params.insert(all_params.end(), params.begin(), params.end());
|
||||||
|
body->translate(scope);
|
||||||
|
return scope.add_function(name, std::move(all_params), std::move(body));
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::insert_types(type_env_ptr& env) {
|
||||||
|
this->env = env;
|
||||||
|
env->bind_type(name, type_ptr(new type_data(name, vars.size())));
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::insert_constructors() const {
|
||||||
|
type_ptr this_type_ptr = env->lookup_type(name);
|
||||||
|
type_data* this_type = static_cast<type_data*>(this_type_ptr.get());
|
||||||
|
int next_tag = 0;
|
||||||
|
|
||||||
|
std::set<std::string> var_set;
|
||||||
|
type_app* return_app = new type_app(std::move(this_type_ptr));
|
||||||
|
type_ptr return_type(return_app);
|
||||||
|
for(auto& var : vars) {
|
||||||
|
if(var_set.find(var) != var_set.end()) throw 0;
|
||||||
|
var_set.insert(var);
|
||||||
|
return_app->arguments.push_back(type_ptr(new type_var(var)));
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
constructor->tag = next_tag;
|
||||||
|
this_type->constructors[constructor->name] = { next_tag++ };
|
||||||
|
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||||
|
type_ptr type = (*it)->to_type(var_set, env);
|
||||||
|
full_type = type_ptr(new type_arr(type, full_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_scheme_ptr full_scheme(new type_scheme(std::move(full_type)));
|
||||||
|
full_scheme->forall.insert(full_scheme->forall.begin(), vars.begin(), vars.end());
|
||||||
|
env->bind(constructor->name, full_scheme);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::into_globals(global_scope& scope) {
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
global_constructor& c = scope.add_constructor(
|
||||||
|
constructor->name, constructor->tag, constructor->types.size());
|
||||||
|
env->set_mangled_name(constructor->name, c.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_group::find_free(std::set<std::string>& into) {
|
||||||
|
for(auto& def_pair : defs_defn) {
|
||||||
|
def_pair.second->find_free();
|
||||||
|
for(auto& free_var : def_pair.second->free_variables) {
|
||||||
|
if(defs_defn.find(free_var) == defs_defn.end()) {
|
||||||
|
into.insert(free_var);
|
||||||
|
} else {
|
||||||
|
def_pair.second->nearby_variables.insert(free_var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_group::typecheck(type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
this->env = type_scope(env);
|
||||||
|
|
||||||
|
for(auto& def_data : defs_data) {
|
||||||
|
def_data.second->insert_types(this->env);
|
||||||
|
}
|
||||||
|
for(auto& def_data : defs_data) {
|
||||||
|
def_data.second->insert_constructors();
|
||||||
|
}
|
||||||
|
|
||||||
|
function_graph dependency_graph;
|
||||||
|
|
||||||
|
for(auto& def_defn : defs_defn) {
|
||||||
|
def_defn.second->find_free();
|
||||||
|
dependency_graph.add_function(def_defn.second->name);
|
||||||
|
|
||||||
|
for(auto& dependency : def_defn.second->nearby_variables) {
|
||||||
|
if(defs_defn.find(dependency) == defs_defn.end())
|
||||||
|
throw 0;
|
||||||
|
dependency_graph.add_edge(def_defn.second->name, dependency);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<group_ptr> groups = dependency_graph.compute_order();
|
||||||
|
for(auto it = groups.rbegin(); it != groups.rend(); it++) {
|
||||||
|
auto& group = *it;
|
||||||
|
for(auto& def_defnn_name : group->members) {
|
||||||
|
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
||||||
|
def_defn->insert_types(mgr, this->env, vis);
|
||||||
|
}
|
||||||
|
for(auto& def_defnn_name : group->members) {
|
||||||
|
auto& def_defn = defs_defn.find(def_defnn_name)->second;
|
||||||
|
def_defn->typecheck(mgr);
|
||||||
|
}
|
||||||
|
for(auto& def_defnn_name : group->members) {
|
||||||
|
this->env->generalize(def_defnn_name, *group, mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
83
code/compiler/12/definition.hpp
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "parsed_type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "global_scope.hpp"
|
||||||
|
|
||||||
|
struct ast;
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct constructor {
|
||||||
|
std::string name;
|
||||||
|
std::vector<parsed_type_ptr> types;
|
||||||
|
int8_t tag;
|
||||||
|
|
||||||
|
constructor(std::string n, std::vector<parsed_type_ptr> ts)
|
||||||
|
: name(std::move(n)), types(std::move(ts)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using constructor_ptr = std::unique_ptr<constructor>;
|
||||||
|
|
||||||
|
struct definition_defn {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
type_env_ptr env;
|
||||||
|
type_env_ptr var_env;
|
||||||
|
std::set<std::string> free_variables;
|
||||||
|
std::set<std::string> nearby_variables;
|
||||||
|
type_ptr full_type;
|
||||||
|
type_ptr return_type;
|
||||||
|
|
||||||
|
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
|
||||||
|
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void find_free();
|
||||||
|
void insert_types(type_mgr& mgr, type_env_ptr& env, visibility v);
|
||||||
|
void typecheck(type_mgr& mgr);
|
||||||
|
|
||||||
|
global_function& into_global(global_scope& scope);
|
||||||
|
};
|
||||||
|
|
||||||
|
using definition_defn_ptr = std::unique_ptr<definition_defn>;
|
||||||
|
|
||||||
|
struct definition_data {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> vars;
|
||||||
|
std::vector<constructor_ptr> constructors;
|
||||||
|
|
||||||
|
type_env_ptr env;
|
||||||
|
|
||||||
|
definition_data(
|
||||||
|
std::string n,
|
||||||
|
std::vector<std::string> vs,
|
||||||
|
std::vector<constructor_ptr> cs)
|
||||||
|
: name(std::move(n)), vars(std::move(vs)), constructors(std::move(cs)) {}
|
||||||
|
|
||||||
|
void insert_types(type_env_ptr& env);
|
||||||
|
void insert_constructors() const;
|
||||||
|
|
||||||
|
void into_globals(global_scope& scope);
|
||||||
|
};
|
||||||
|
|
||||||
|
using definition_data_ptr = std::unique_ptr<definition_data>;
|
||||||
|
|
||||||
|
struct definition_group {
|
||||||
|
std::map<std::string, definition_data_ptr> defs_data;
|
||||||
|
std::map<std::string, definition_defn_ptr> defs_defn;
|
||||||
|
visibility vis;
|
||||||
|
type_env_ptr env;
|
||||||
|
|
||||||
|
definition_group(visibility v = visibility::local) : vis(v) {}
|
||||||
|
|
||||||
|
void find_free(std::set<std::string>& into);
|
||||||
|
void typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
};
|
||||||
23
code/compiler/12/env.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
int env_var::get_offset(const std::string& name) const {
|
||||||
|
if(name == this->name) return 0;
|
||||||
|
if(parent) return parent->get_offset(name) + 1;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_var::has_variable(const std::string& name) const {
|
||||||
|
if(name == this->name) return true;
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int env_offset::get_offset(const std::string& name) const {
|
||||||
|
if(parent) return parent->get_offset(name) + offset;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_offset::has_variable(const std::string& name) const {
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
34
code/compiler/12/env.hpp
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct env {
|
||||||
|
virtual ~env() = default;
|
||||||
|
|
||||||
|
virtual int get_offset(const std::string& name) const = 0;
|
||||||
|
virtual bool has_variable(const std::string& name) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using env_ptr = std::shared_ptr<env>;
|
||||||
|
|
||||||
|
struct env_var : public env {
|
||||||
|
std::string name;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_var(std::string n, env_ptr p)
|
||||||
|
: name(std::move(n)), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct env_offset : public env {
|
||||||
|
int offset;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_offset(int o, env_ptr p)
|
||||||
|
: offset(o), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
5
code/compiler/12/error.cpp
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* type_error::what() const noexcept {
|
||||||
|
return "an error occured while checking the types of the program";
|
||||||
|
}
|
||||||
21
code/compiler/12/error.hpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_error : std::exception {
|
||||||
|
std::string description;
|
||||||
|
|
||||||
|
type_error(std::string d)
|
||||||
|
: description(std::move(d)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct unification_error : public type_error {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
unification_error(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)),
|
||||||
|
type_error("failed to unify types") {}
|
||||||
|
};
|
||||||
2
code/compiler/12/examples/bad1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
defn main = { 3 + True }
|
||||||
1
code/compiler/12/examples/bad2.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
defn main = { 1 2 3 4 5 }
|
||||||
8
code/compiler/12/examples/bad3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x y z -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
17
code/compiler/12/examples/fixpoint.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
data List a = { Nil, Cons a (List a) }
|
||||||
|
|
||||||
|
defn fix f = { let { defn x = { f x } } in { x } }
|
||||||
|
defn fixpointOnes fo = { Cons 1 fo }
|
||||||
|
defn sumTwo l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> {
|
||||||
|
x + case xs of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons y ys -> { y }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = { sumTwo (fix fixpointOnes) }
|
||||||
8
code/compiler/12/examples/if.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
defn if c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn main = { if (if True False True) 11 3 }
|
||||||
19
code/compiler/12/examples/lambda.txt
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
data List a = { Nil, Cons a (List a) }
|
||||||
|
|
||||||
|
defn sum l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> { x + sum xs}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn map f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { Cons (f x) (map f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
sum (map \x -> { x * x } (map (\x -> { x + x }) (Cons 1 (Cons 2 (Cons 3 Nil)))))
|
||||||
|
}
|
||||||
47
code/compiler/12/examples/letin.txt
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
|
||||||
|
data List a = { Nil, Cons a (List a) }
|
||||||
|
|
||||||
|
defn if c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn mergeUntil l r p = {
|
||||||
|
let {
|
||||||
|
defn mergeLeft nl nr = {
|
||||||
|
case nl of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { if (p x) (Cons x (mergeRight xs nr)) Nil }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn mergeRight nl nr = {
|
||||||
|
case nr of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { if (p x) (Cons x (mergeLeft nl xs)) Nil }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} in {
|
||||||
|
mergeLeft l r
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn const x y = { x }
|
||||||
|
|
||||||
|
defn sum l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> { x + sum xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
let {
|
||||||
|
defn firstList = { Cons 1 (Cons 3 (Cons 5 Nil)) }
|
||||||
|
defn secondList = { Cons 2 (Cons 4 (Cons 6 Nil)) }
|
||||||
|
} in {
|
||||||
|
sum (mergeUntil firstList secondList (const True))
|
||||||
|
}
|
||||||
|
}
|
||||||
32
code/compiler/12/examples/list.txt
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
data List a = { Nil, Cons a (List a) }
|
||||||
|
|
||||||
|
defn map f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { Cons (f x) (map f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn foldl f b l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { b }
|
||||||
|
Cons x xs -> { foldl f (f b x) xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn foldr f b l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { b }
|
||||||
|
Cons x xs -> { f x (foldr f b xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn list = { Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))) }
|
||||||
|
|
||||||
|
defn add x y = { x + y }
|
||||||
|
defn sum l = { foldr add 0 l }
|
||||||
|
|
||||||
|
defn skipAdd x y = { y + 1 }
|
||||||
|
defn length l = { foldr skipAdd 0 l }
|
||||||
|
|
||||||
|
defn main = { sum list + length list }
|
||||||
25
code/compiler/12/examples/mutual_recursion.txt
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn if c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn oddEven l e = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { e }
|
||||||
|
Cons x xs -> { evenOdd xs e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn evenOdd l e = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { e }
|
||||||
|
Cons x xs -> { oddEven xs e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = { if (oddEven (Cons 1 (Cons 2 (Cons 3 Nil))) True) (oddEven (Cons 1 (Cons 2 (Cons 3 Nil))) 1) 3 }
|
||||||
23
code/compiler/12/examples/packed.txt
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
data Pair a b = { Pair a b }
|
||||||
|
|
||||||
|
defn packer = {
|
||||||
|
let {
|
||||||
|
data Packed a = { Packed a }
|
||||||
|
defn pack a = { Packed a }
|
||||||
|
defn unpack p = {
|
||||||
|
case p of {
|
||||||
|
Packed a -> { a }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} in {
|
||||||
|
Pair pack unpack
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
case packer of {
|
||||||
|
Pair pack unpack -> {
|
||||||
|
unpack (pack 3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
17
code/compiler/12/examples/pair.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
data Pair a b = { MkPair a b }
|
||||||
|
|
||||||
|
defn fst p = {
|
||||||
|
case p of {
|
||||||
|
MkPair a b -> { a }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn snd p = {
|
||||||
|
case p of {
|
||||||
|
MkPair a b -> { b }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn pair = { MkPair 1 (MkPair 2 3) }
|
||||||
|
|
||||||
|
defn main = { fst pair + snd (snd pair) }
|
||||||
122
code/compiler/12/examples/primes.txt
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
data List = { Nil, Cons Nat List }
|
||||||
|
data Bool = { True, False }
|
||||||
|
data Nat = { O, S Nat }
|
||||||
|
|
||||||
|
defn if c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn toInt n = {
|
||||||
|
case n of {
|
||||||
|
O -> { 0 }
|
||||||
|
S np -> { 1 + toInt np }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn lte n m = {
|
||||||
|
case m of {
|
||||||
|
O -> {
|
||||||
|
case n of {
|
||||||
|
O -> { True }
|
||||||
|
S np -> { False }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
S mp -> {
|
||||||
|
case n of {
|
||||||
|
O -> { True }
|
||||||
|
S np -> { lte np mp }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn minus n m = {
|
||||||
|
case m of {
|
||||||
|
O -> { n }
|
||||||
|
S mp -> {
|
||||||
|
case n of {
|
||||||
|
O -> { O }
|
||||||
|
S np -> {
|
||||||
|
minus np mp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn mod n m = {
|
||||||
|
if (lte m n) (mod (minus n m) m) n
|
||||||
|
}
|
||||||
|
|
||||||
|
defn notDivisibleBy n m = {
|
||||||
|
case (mod m n) of {
|
||||||
|
O -> { False }
|
||||||
|
S mp -> { True }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn filter f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { if (f x) (Cons x (filter f xs)) (filter f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn map f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { Cons (f x) (map f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn nats = {
|
||||||
|
Cons (S (S O)) (map S nats)
|
||||||
|
}
|
||||||
|
|
||||||
|
defn primesRec l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons p xs -> { Cons p (primesRec (filter (notDivisibleBy p) xs)) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn primes = {
|
||||||
|
primesRec nats
|
||||||
|
}
|
||||||
|
|
||||||
|
defn take n l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> {
|
||||||
|
case n of {
|
||||||
|
O -> { Nil }
|
||||||
|
S np -> { Cons x (take np xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { O }
|
||||||
|
Cons x xs -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn reverseAcc a l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { a }
|
||||||
|
Cons x xs -> { reverseAcc (Cons x a) xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn reverse l = {
|
||||||
|
reverseAcc Nil l
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
toInt (head (reverse (take ((S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S O))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) primes)))
|
||||||
|
}
|
||||||
31
code/compiler/12/examples/runtime1.c
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#include "../runtime.h"
|
||||||
|
|
||||||
|
void f_add(struct stack* s) {
|
||||||
|
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
|
||||||
|
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
|
||||||
|
}
|
||||||
|
|
||||||
|
void f_main(struct stack* s) {
|
||||||
|
// PushInt 320
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(320));
|
||||||
|
|
||||||
|
// PushInt 6
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(6));
|
||||||
|
|
||||||
|
// PushGlobal f_add (the function for +)
|
||||||
|
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
|
||||||
|
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
}
|
||||||
2
code/compiler/12/examples/works1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
defn main = { sum 320 6 }
|
||||||
|
defn sum x y = { x + y }
|
||||||
3
code/compiler/12/examples/works2.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
defn add x y = { x + y }
|
||||||
|
defn double x = { add x x }
|
||||||
|
defn main = { double 163 }
|
||||||
9
code/compiler/12/examples/works3.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
data List a = { Nil, Cons a (List a) }
|
||||||
|
data Bool = { True, False }
|
||||||
|
defn length l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> { 1 + length xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) + length (Cons True (Cons False (Cons True Nil))) }
|
||||||
16
code/compiler/12/examples/works4.txt
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn add x y = { x + y }
|
||||||
|
defn mul x y = { x * y }
|
||||||
|
|
||||||
|
defn foldr f b l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { b }
|
||||||
|
Cons x xs -> { f x (foldr f b xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +
|
||||||
|
foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))))
|
||||||
|
}
|
||||||
17
code/compiler/12/examples/works5.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn sumZip l m = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> {
|
||||||
|
case m of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons y ys -> { x + y + sumZip xs ys }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn ones = { Cons 1 ones }
|
||||||
|
|
||||||
|
defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||||
83
code/compiler/12/global_scope.cpp
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
#include "global_scope.hpp"
|
||||||
|
#include "ast.hpp"
|
||||||
|
|
||||||
|
void global_function::compile() {
|
||||||
|
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
body->compile(new_env, instructions);
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_function::declare_llvm(llvm_context& ctx) {
|
||||||
|
generated_function = ctx.create_custom_function(name, params.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_function::generate_llvm(llvm_context& ctx) {
|
||||||
|
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, generated_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_constructor::generate_llvm(llvm_context& ctx) {
|
||||||
|
auto new_function =
|
||||||
|
ctx.create_custom_function(name, arity);
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pack(tag, arity)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for (auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
global_function& global_scope::add_function(std::string n, std::vector<std::string> ps, ast_ptr b) {
|
||||||
|
global_function* new_function = new global_function(mangle_name(n), std::move(ps), std::move(b));
|
||||||
|
functions.push_back(global_function_ptr(new_function));
|
||||||
|
return *new_function;
|
||||||
|
}
|
||||||
|
|
||||||
|
global_constructor& global_scope::add_constructor(std::string n, int8_t t, size_t a) {
|
||||||
|
global_constructor* new_constructor = new global_constructor(mangle_name(n), t, a);
|
||||||
|
constructors.push_back(global_constructor_ptr(new_constructor));
|
||||||
|
return *new_constructor;
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_scope::compile() {
|
||||||
|
for(auto& function : functions) {
|
||||||
|
function->compile();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void global_scope::generate_llvm(llvm_context& ctx) {
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
constructor->generate_llvm(ctx);
|
||||||
|
}
|
||||||
|
for(auto& function : functions) {
|
||||||
|
function->declare_llvm(ctx);
|
||||||
|
}
|
||||||
|
for(auto& function : functions) {
|
||||||
|
function->generate_llvm(ctx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string global_scope::mangle_name(const std::string& n) {
|
||||||
|
auto occurence_it = occurence_count.find(n);
|
||||||
|
int occurence = 0;
|
||||||
|
if(occurence_it != occurence_count.end()) {
|
||||||
|
occurence = occurence_it->second + 1;
|
||||||
|
}
|
||||||
|
occurence_count[n] = occurence;
|
||||||
|
|
||||||
|
std::string final_name = n;
|
||||||
|
if (occurence != 0) {
|
||||||
|
final_name += "_";
|
||||||
|
final_name += std::to_string(occurence);
|
||||||
|
}
|
||||||
|
return final_name;
|
||||||
|
}
|
||||||
55
code/compiler/12/global_scope.hpp
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include "instruction.hpp"
|
||||||
|
|
||||||
|
struct ast;
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct global_function {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
llvm::Function* generated_function;
|
||||||
|
|
||||||
|
global_function(std::string n, std::vector<std::string> ps, ast_ptr b)
|
||||||
|
: name(std::move(n)), params(std::move(ps)), body(std::move(b)) {}
|
||||||
|
|
||||||
|
void compile();
|
||||||
|
void declare_llvm(llvm_context& ctx);
|
||||||
|
void generate_llvm(llvm_context& ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
using global_function_ptr = std::unique_ptr<global_function>;
|
||||||
|
|
||||||
|
struct global_constructor {
|
||||||
|
std::string name;
|
||||||
|
int8_t tag;
|
||||||
|
size_t arity;
|
||||||
|
|
||||||
|
global_constructor(std::string n, int8_t t, size_t a)
|
||||||
|
: name(std::move(n)), tag(t), arity(a) {}
|
||||||
|
|
||||||
|
void generate_llvm(llvm_context& ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
using global_constructor_ptr = std::unique_ptr<global_constructor>;
|
||||||
|
|
||||||
|
struct global_scope {
|
||||||
|
std::map<std::string, int> occurence_count;
|
||||||
|
std::vector<global_function_ptr> functions;
|
||||||
|
std::vector<global_constructor_ptr> constructors;
|
||||||
|
|
||||||
|
global_function& add_function(std::string n, std::vector<std::string> ps, ast_ptr b);
|
||||||
|
global_constructor& add_constructor(std::string n, int8_t t, size_t a);
|
||||||
|
|
||||||
|
void compile();
|
||||||
|
void generate_llvm(llvm_context& ctx);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string mangle_name(const std::string& n);
|
||||||
|
};
|
||||||
114
code/compiler/12/graph.cpp
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
#include "graph.hpp"
|
||||||
|
|
||||||
|
std::set<function_graph::edge> function_graph::compute_transitive_edges() {
|
||||||
|
std::set<edge> transitive_edges;
|
||||||
|
transitive_edges.insert(edges.begin(), edges.end());
|
||||||
|
for(auto& connector : adjacency_lists) {
|
||||||
|
for(auto& from : adjacency_lists) {
|
||||||
|
edge to_connector { from.first, connector.first };
|
||||||
|
for(auto& to : adjacency_lists) {
|
||||||
|
edge full_jump { from.first, to.first };
|
||||||
|
if(transitive_edges.find(full_jump) != transitive_edges.end()) continue;
|
||||||
|
|
||||||
|
edge from_connector { connector.first, to.first };
|
||||||
|
if(transitive_edges.find(to_connector) != transitive_edges.end() &&
|
||||||
|
transitive_edges.find(from_connector) != transitive_edges.end())
|
||||||
|
transitive_edges.insert(std::move(full_jump));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return transitive_edges;
|
||||||
|
}
|
||||||
|
|
||||||
|
void function_graph::create_groups(
|
||||||
|
const std::set<edge>& transitive_edges,
|
||||||
|
std::map<function, group_id>& group_ids,
|
||||||
|
std::map<group_id, data_ptr>& group_data_map) {
|
||||||
|
group_id id_counter = 0;
|
||||||
|
for(auto& vertex : adjacency_lists) {
|
||||||
|
if(group_ids.find(vertex.first) != group_ids.end())
|
||||||
|
continue;
|
||||||
|
data_ptr new_group(new group_data);
|
||||||
|
new_group->functions.insert(vertex.first);
|
||||||
|
group_data_map[id_counter] = new_group;
|
||||||
|
group_ids[vertex.first] = id_counter;
|
||||||
|
for(auto& other_vertex : adjacency_lists) {
|
||||||
|
if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() &&
|
||||||
|
transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) {
|
||||||
|
group_ids[other_vertex.first] = id_counter;
|
||||||
|
new_group->functions.insert(other_vertex.first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id_counter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void function_graph::create_edges(
|
||||||
|
std::map<function, group_id>& group_ids,
|
||||||
|
std::map<group_id, data_ptr>& group_data_map) {
|
||||||
|
std::set<std::pair<group_id, group_id>> group_edges;
|
||||||
|
for(auto& vertex : adjacency_lists) {
|
||||||
|
auto vertex_id = group_ids[vertex.first];
|
||||||
|
auto& vertex_data = group_data_map[vertex_id];
|
||||||
|
for(auto& other_vertex : vertex.second) {
|
||||||
|
auto other_id = group_ids[other_vertex];
|
||||||
|
if(vertex_id == other_id) continue;
|
||||||
|
if(group_edges.find({vertex_id, other_id}) != group_edges.end())
|
||||||
|
continue;
|
||||||
|
group_edges.insert({vertex_id, other_id});
|
||||||
|
vertex_data->adjacency_list.insert(other_id);
|
||||||
|
group_data_map[other_id]->indegree++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<group_ptr> function_graph::generate_order(
|
||||||
|
std::map<function, group_id>& group_ids,
|
||||||
|
std::map<group_id, data_ptr>& group_data_map) {
|
||||||
|
std::queue<group_id> id_queue;
|
||||||
|
std::vector<group_ptr> output;
|
||||||
|
for(auto& group : group_data_map) {
|
||||||
|
if(group.second->indegree == 0) id_queue.push(group.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
while(!id_queue.empty()) {
|
||||||
|
auto new_id = id_queue.front();
|
||||||
|
auto& group_data = group_data_map[new_id];
|
||||||
|
group_ptr output_group(new group);
|
||||||
|
output_group->members = std::move(group_data->functions);
|
||||||
|
id_queue.pop();
|
||||||
|
|
||||||
|
for(auto& adjacent_group : group_data->adjacency_list) {
|
||||||
|
if(--group_data_map[adjacent_group]->indegree == 0)
|
||||||
|
id_queue.push(adjacent_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
output.push_back(std::move(output_group));
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::set<function>& function_graph::add_function(const function& f) {
|
||||||
|
auto adjacency_list_it = adjacency_lists.find(f);
|
||||||
|
if(adjacency_list_it != adjacency_lists.end()) {
|
||||||
|
return adjacency_list_it->second;
|
||||||
|
} else {
|
||||||
|
return adjacency_lists[f] = { };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void function_graph::add_edge(const function& from, const function& to) {
|
||||||
|
add_function(from).insert(to);
|
||||||
|
edges.insert({ from, to });
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<group_ptr> function_graph::compute_order() {
|
||||||
|
std::set<edge> transitive_edges = compute_transitive_edges();
|
||||||
|
std::map<function, group_id> group_ids;
|
||||||
|
std::map<group_id, data_ptr> group_data_map;
|
||||||
|
|
||||||
|
create_groups(transitive_edges, group_ids, group_data_map);
|
||||||
|
create_edges(group_ids, group_data_map);
|
||||||
|
return generate_order(group_ids, group_data_map);
|
||||||
|
}
|
||||||
51
code/compiler/12/graph.hpp
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <queue>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using function = std::string;
|
||||||
|
|
||||||
|
struct group {
|
||||||
|
std::set<function> members;
|
||||||
|
};
|
||||||
|
|
||||||
|
using group_ptr = std::unique_ptr<group>;
|
||||||
|
|
||||||
|
class function_graph {
|
||||||
|
using group_id = size_t;
|
||||||
|
|
||||||
|
struct group_data {
|
||||||
|
std::set<function> functions;
|
||||||
|
std::set<group_id> adjacency_list;
|
||||||
|
size_t indegree;
|
||||||
|
};
|
||||||
|
|
||||||
|
using data_ptr = std::shared_ptr<group_data>;
|
||||||
|
using edge = std::pair<function, function>;
|
||||||
|
using group_edge = std::pair<group_id, group_id>;
|
||||||
|
|
||||||
|
std::map<function, std::set<function>> adjacency_lists;
|
||||||
|
std::set<edge> edges;
|
||||||
|
|
||||||
|
std::set<edge> compute_transitive_edges();
|
||||||
|
void create_groups(
|
||||||
|
const std::set<edge>&,
|
||||||
|
std::map<function, group_id>&,
|
||||||
|
std::map<group_id, data_ptr>&);
|
||||||
|
void create_edges(
|
||||||
|
std::map<function, group_id>&,
|
||||||
|
std::map<group_id, data_ptr>&);
|
||||||
|
std::vector<group_ptr> generate_order(
|
||||||
|
std::map<function, group_id>&,
|
||||||
|
std::map<group_id, data_ptr>&);
|
||||||
|
|
||||||
|
public:
|
||||||
|
std::set<function>& add_function(const function& f);
|
||||||
|
void add_edge(const function& from, const function& to);
|
||||||
|
std::vector<group_ptr> compute_order();
|
||||||
|
};
|
||||||
177
code/compiler/12/instruction.cpp
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/BasicBlock.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushInt(" << value << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_num(f, ctx.create_i32(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushGlobal(" << name << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto& global_f = ctx.custom_functions.at("f_" + name);
|
||||||
|
auto arity = ctx.create_i32(global_f->arity);
|
||||||
|
ctx.create_push(f, ctx.create_global(f, global_f->function, arity));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Push(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pop(" << count << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_popn(f, ctx.create_size(count));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "MkApp()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left = ctx.create_pop(f);
|
||||||
|
auto right = ctx.create_pop(f);
|
||||||
|
ctx.create_push(f, ctx.create_app(f, left, right));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Update(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_update(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pack(" << tag << ", " << size << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Split()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_split(f, ctx.create_size(size));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Jump(" << std::endl;
|
||||||
|
for(auto& instruction_set : branches) {
|
||||||
|
for(auto& instruction : instruction_set) {
|
||||||
|
instruction->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
to << std::endl;
|
||||||
|
}
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||||
|
auto tag = ctx.unwrap_data_tag(top_node);
|
||||||
|
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
|
||||||
|
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||||
|
std::vector<BasicBlock*> blocks;
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
|
||||||
|
ctx.builder.SetInsertPoint(branch_block);
|
||||||
|
for(auto& instruction : branch) {
|
||||||
|
instruction->gen_llvm(ctx, f);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateBr(safety_block);
|
||||||
|
blocks.push_back(branch_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& mapping : tag_mappings) {
|
||||||
|
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.builder.SetInsertPoint(safety_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Slide(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_slide(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BinOp(" << op_action(op) << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
llvm::Value* result;
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
|
||||||
|
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
|
||||||
|
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
|
||||||
|
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
|
||||||
|
}
|
||||||
|
ctx.create_push(f, ctx.create_num(f, result));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Eval()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_unwind(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Alloc(" << amount << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_alloc(f, ctx.create_size(amount));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Unwind()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
142
code/compiler/12/instruction.hpp
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
|
||||||
|
struct instruction {
|
||||||
|
virtual ~instruction() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using instruction_ptr = std::unique_ptr<instruction>;
|
||||||
|
|
||||||
|
struct instruction_pushint : public instruction {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
instruction_pushint(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pushglobal : public instruction {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
instruction_pushglobal(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_push : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_push(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pop : public instruction {
|
||||||
|
int count;
|
||||||
|
|
||||||
|
instruction_pop(int c)
|
||||||
|
: count(c) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_mkapp : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_update : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_update(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pack : public instruction {
|
||||||
|
int tag;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_pack(int t, int s)
|
||||||
|
: tag(t), size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_split : public instruction {
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_split(int s)
|
||||||
|
: size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_jump : public instruction {
|
||||||
|
std::vector<std::vector<instruction_ptr>> branches;
|
||||||
|
std::map<int, int> tag_mappings;
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_slide : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_slide(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_binop : public instruction {
|
||||||
|
binop op;
|
||||||
|
|
||||||
|
instruction_binop(binop o)
|
||||||
|
: op(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_eval : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_alloc : public instruction {
|
||||||
|
int amount;
|
||||||
|
|
||||||
|
instruction_alloc(int a)
|
||||||
|
: amount(a) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_unwind : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
278
code/compiler/12/llvm_context.cpp
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
void llvm_context::create_types() {
|
||||||
|
stack_type = StructType::create(ctx, "stack");
|
||||||
|
gmachine_type = StructType::create(ctx, "gmachine");
|
||||||
|
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||||
|
gmachine_ptr_type = PointerType::getUnqual(gmachine_type);
|
||||||
|
tag_type = IntegerType::getInt8Ty(ctx);
|
||||||
|
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||||
|
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||||
|
struct_types["node_num"] = StructType::create(ctx, "node_num");
|
||||||
|
struct_types["node_global"] = StructType::create(ctx, "node_global");
|
||||||
|
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||||
|
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||||
|
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||||
|
function_type = FunctionType::get(Type::getVoidTy(ctx), { gmachine_ptr_type }, false);
|
||||||
|
|
||||||
|
gmachine_type->setBody(
|
||||||
|
stack_ptr_type,
|
||||||
|
node_ptr_type,
|
||||||
|
IntegerType::getInt64Ty(ctx),
|
||||||
|
IntegerType::getInt64Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_base")->setBody(
|
||||||
|
IntegerType::getInt32Ty(ctx),
|
||||||
|
IntegerType::getInt8Ty(ctx),
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_app")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type,
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_num")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt32Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_global")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false)
|
||||||
|
);
|
||||||
|
struct_types.at("node_ind")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_data")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt8Ty(ctx),
|
||||||
|
PointerType::getUnqual(node_ptr_type)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void llvm_context::create_functions() {
|
||||||
|
auto void_type = Type::getVoidTy(ctx);
|
||||||
|
auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8);
|
||||||
|
functions["stack_init"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_init",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_free"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_free",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_push"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_pop"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_pop",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_peek"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_peek",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_popn"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_popn",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_slide"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_slide",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_update"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_update",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_alloc"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_alloc",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_pack"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type, tag_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_pack",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_split"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_split",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_track"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { gmachine_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_track",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
auto int32_type = IntegerType::getInt32Ty(ctx);
|
||||||
|
functions["alloc_app"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_app",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_num"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_num",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_global"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { function_type, int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_global",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_ind"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_ind",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
functions["unwind"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"unwind",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantInt* llvm_context::create_i8(int8_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(8, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_i32(int32_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(32, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_size(size_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_pop(Function* f) {
|
||||||
|
auto pop_f = functions.at("stack_pop");
|
||||||
|
return builder.CreateCall(pop_f, { unwrap_gmachine_stack_ptr(f->arg_begin()) });
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_peek(Function* f, Value* off) {
|
||||||
|
auto peek_f = functions.at("stack_peek");
|
||||||
|
return builder.CreateCall(peek_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_push(Function* f, Value* v) {
|
||||||
|
auto push_f = functions.at("stack_push");
|
||||||
|
builder.CreateCall(push_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), v });
|
||||||
|
}
|
||||||
|
void llvm_context::create_popn(Function* f, Value* off) {
|
||||||
|
auto popn_f = functions.at("stack_popn");
|
||||||
|
builder.CreateCall(popn_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_update(Function* f, Value* off) {
|
||||||
|
auto update_f = functions.at("gmachine_update");
|
||||||
|
builder.CreateCall(update_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
|
||||||
|
auto pack_f = functions.at("gmachine_pack");
|
||||||
|
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
|
||||||
|
}
|
||||||
|
void llvm_context::create_split(Function* f, Value* c) {
|
||||||
|
auto split_f = functions.at("gmachine_split");
|
||||||
|
builder.CreateCall(split_f, { f->arg_begin(), c });
|
||||||
|
}
|
||||||
|
void llvm_context::create_slide(Function* f, Value* off) {
|
||||||
|
auto slide_f = functions.at("gmachine_slide");
|
||||||
|
builder.CreateCall(slide_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_alloc(Function* f, Value* n) {
|
||||||
|
auto alloc_f = functions.at("gmachine_alloc");
|
||||||
|
builder.CreateCall(alloc_f, { f->arg_begin(), n });
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_track(Function* f, Value* v) {
|
||||||
|
auto track_f = functions.at("gmachine_track");
|
||||||
|
return builder.CreateCall(track_f, { f->arg_begin(), v });
|
||||||
|
}
|
||||||
|
|
||||||
|
void llvm_context::create_unwind(Function* f) {
|
||||||
|
auto unwind_f = functions.at("unwind");
|
||||||
|
builder.CreateCall(unwind_f, { f->args().begin() });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
return builder.CreateGEP(g, { offset_0, offset_0 });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_num(Value* v) {
|
||||||
|
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, num_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(int_ptr);
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_num(Function* f, Value* v) {
|
||||||
|
auto alloc_num_f = functions.at("alloc_num");
|
||||||
|
auto alloc_num_call = builder.CreateCall(alloc_num_f, { v });
|
||||||
|
return create_track(f, alloc_num_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_data_tag(Value* v) {
|
||||||
|
auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, data_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(tag_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_global(Function* f, Value* gf, Value* a) {
|
||||||
|
auto alloc_global_f = functions.at("alloc_global");
|
||||||
|
auto alloc_global_call = builder.CreateCall(alloc_global_f, { gf, a });
|
||||||
|
return create_track(f, alloc_global_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
|
||||||
|
auto alloc_app_f = functions.at("alloc_app");
|
||||||
|
auto alloc_app_call = builder.CreateCall(alloc_app_f, { l, r });
|
||||||
|
return create_track(f, alloc_app_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
|
||||||
|
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||||
|
auto new_function = llvm::Function::Create(
|
||||||
|
function_type,
|
||||||
|
llvm::Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"f_" + name,
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function);
|
||||||
|
|
||||||
|
auto new_custom_f = custom_function_ptr(new custom_function());
|
||||||
|
new_custom_f->arity = arity;
|
||||||
|
new_custom_f->function = new_function;
|
||||||
|
custom_functions["f_" + name] = std::move(new_custom_f);
|
||||||
|
|
||||||
|
return new_function;
|
||||||
|
}
|
||||||
72
code/compiler/12/llvm_context.hpp
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/LLVMContext.h>
|
||||||
|
#include <llvm/IR/IRBuilder.h>
|
||||||
|
#include <llvm/IR/Module.h>
|
||||||
|
#include <llvm/IR/Value.h>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct llvm_context {
|
||||||
|
struct custom_function {
|
||||||
|
llvm::Function* function;
|
||||||
|
int32_t arity;
|
||||||
|
};
|
||||||
|
|
||||||
|
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||||
|
|
||||||
|
llvm::LLVMContext ctx;
|
||||||
|
llvm::IRBuilder<> builder;
|
||||||
|
llvm::Module module;
|
||||||
|
|
||||||
|
std::map<std::string, custom_function_ptr> custom_functions;
|
||||||
|
std::map<std::string, llvm::Function*> functions;
|
||||||
|
std::map<std::string, llvm::StructType*> struct_types;
|
||||||
|
|
||||||
|
llvm::StructType* stack_type;
|
||||||
|
llvm::StructType* gmachine_type;
|
||||||
|
llvm::PointerType* stack_ptr_type;
|
||||||
|
llvm::PointerType* gmachine_ptr_type;
|
||||||
|
llvm::PointerType* node_ptr_type;
|
||||||
|
llvm::IntegerType* tag_type;
|
||||||
|
llvm::FunctionType* function_type;
|
||||||
|
|
||||||
|
llvm_context()
|
||||||
|
: builder(ctx), module("bloglang", ctx) {
|
||||||
|
create_types();
|
||||||
|
create_functions();
|
||||||
|
}
|
||||||
|
|
||||||
|
void create_types();
|
||||||
|
void create_functions();
|
||||||
|
|
||||||
|
llvm::ConstantInt* create_i8(int8_t);
|
||||||
|
llvm::ConstantInt* create_i32(int32_t);
|
||||||
|
llvm::ConstantInt* create_size(size_t);
|
||||||
|
|
||||||
|
llvm::Value* create_pop(llvm::Function*);
|
||||||
|
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||||
|
void create_push(llvm::Function*, llvm::Value*);
|
||||||
|
void create_popn(llvm::Function*, llvm::Value*);
|
||||||
|
void create_update(llvm::Function*, llvm::Value*);
|
||||||
|
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
void create_split(llvm::Function*, llvm::Value*);
|
||||||
|
void create_slide(llvm::Function*, llvm::Value*);
|
||||||
|
void create_alloc(llvm::Function*, llvm::Value*);
|
||||||
|
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
|
void create_unwind(llvm::Function*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_num(llvm::Value*);
|
||||||
|
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Function* create_custom_function(std::string name, int32_t arity);
|
||||||
|
};
|
||||||
163
code/compiler/12/main.cpp
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "graph.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
|
#include "llvm/IR/Verifier.h"
|
||||||
|
#include "llvm/Support/TargetSelect.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "llvm/Support/FileSystem.h"
|
||||||
|
#include "llvm/Target/TargetOptions.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
void yy::parser::error(const std::string& msg) {
|
||||||
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern definition_group global_defs;
|
||||||
|
|
||||||
|
void typecheck_program(
|
||||||
|
definition_group& defs,
|
||||||
|
type_mgr& mgr, type_env_ptr& env) {
|
||||||
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
|
env->bind_type("Int", int_type);
|
||||||
|
type_ptr int_type_app = type_ptr(new type_app(int_type));
|
||||||
|
|
||||||
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
|
int_type_app,
|
||||||
|
type_ptr(new type_arr(int_type_app, int_type_app))));
|
||||||
|
env->bind("+", binop_type, visibility::global);
|
||||||
|
env->bind("-", binop_type, visibility::global);
|
||||||
|
env->bind("*", binop_type, visibility::global);
|
||||||
|
env->bind("/", binop_type, visibility::global);
|
||||||
|
|
||||||
|
std::set<std::string> free;
|
||||||
|
defs.find_free(free);
|
||||||
|
defs.typecheck(mgr, env);
|
||||||
|
|
||||||
|
for(auto& pair : defs.env->names) {
|
||||||
|
std::cout << pair.first << ": ";
|
||||||
|
pair.second.type->print(mgr, std::cout);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
global_scope translate_program(definition_group& group) {
|
||||||
|
global_scope scope;
|
||||||
|
for(auto& data : group.defs_data) {
|
||||||
|
data.second->into_globals(scope);
|
||||||
|
}
|
||||||
|
for(auto& defn : group.defs_defn) {
|
||||||
|
auto& function = defn.second->into_global(scope);
|
||||||
|
function.body->env->parent->set_mangled_name(defn.first, function.name);
|
||||||
|
}
|
||||||
|
return scope;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
||||||
|
auto new_function = ctx.create_custom_function(op_action(op), 2);
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void output_llvm(llvm_context& ctx, const std::string& filename) {
|
||||||
|
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||||
|
|
||||||
|
llvm::InitializeNativeTarget();
|
||||||
|
llvm::InitializeNativeTargetAsmParser();
|
||||||
|
llvm::InitializeNativeTargetAsmPrinter();
|
||||||
|
|
||||||
|
std::string error;
|
||||||
|
const llvm::Target* target =
|
||||||
|
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||||
|
if (!target) {
|
||||||
|
std::cerr << error << std::endl;
|
||||||
|
} else {
|
||||||
|
std::string cpu = "generic";
|
||||||
|
std::string features = "";
|
||||||
|
llvm::TargetOptions options;
|
||||||
|
llvm::TargetMachine* targetMachine =
|
||||||
|
target->createTargetMachine(targetTriple, cpu, features,
|
||||||
|
options, llvm::Optional<llvm::Reloc::Model>());
|
||||||
|
|
||||||
|
ctx.module.setDataLayout(targetMachine->createDataLayout());
|
||||||
|
ctx.module.setTargetTriple(targetTriple);
|
||||||
|
|
||||||
|
std::error_code ec;
|
||||||
|
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
||||||
|
if (ec) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
llvm::CodeGenFileType type = llvm::CGFT_ObjectFile;
|
||||||
|
llvm::legacy::PassManager pm;
|
||||||
|
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
pm.run(ctx.module);
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm(global_scope& scope) {
|
||||||
|
llvm_context ctx;
|
||||||
|
gen_llvm_internal_op(ctx, PLUS);
|
||||||
|
gen_llvm_internal_op(ctx, MINUS);
|
||||||
|
gen_llvm_internal_op(ctx, TIMES);
|
||||||
|
gen_llvm_internal_op(ctx, DIVIDE);
|
||||||
|
|
||||||
|
scope.generate_llvm(ctx);
|
||||||
|
|
||||||
|
ctx.module.print(llvm::outs(), nullptr);
|
||||||
|
output_llvm(ctx, "program.o");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
yy::parser parser;
|
||||||
|
type_mgr mgr;
|
||||||
|
type_env_ptr env(new type_env);
|
||||||
|
|
||||||
|
parser.parse();
|
||||||
|
for(auto& def_defn : global_defs.defs_defn) {
|
||||||
|
std::cout << def_defn.second->name;
|
||||||
|
for(auto& param : def_defn.second->params) std::cout << " " << param;
|
||||||
|
std::cout << ":" << std::endl;
|
||||||
|
|
||||||
|
def_defn.second->body->print(1, std::cout);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
typecheck_program(global_defs, mgr, env);
|
||||||
|
global_scope scope = translate_program(global_defs);
|
||||||
|
scope.compile();
|
||||||
|
gen_llvm(scope);
|
||||||
|
} catch(unification_error& err) {
|
||||||
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
|
std::cout << " (1) \033[34m";
|
||||||
|
err.left->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
std::cout << " (2) \033[32m";
|
||||||
|
err.right->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
} catch(type_error& err) {
|
||||||
|
std::cout << "failed to type check program: " << err.description << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
36
code/compiler/12/parsed_type.cpp
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
#include "parsed_type.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
type_ptr parsed_type_app::to_type(
|
||||||
|
const std::set<std::string>& vars,
|
||||||
|
const type_env& e) const {
|
||||||
|
auto parent_type = e.lookup_type(name);
|
||||||
|
if(parent_type == nullptr) throw 0;
|
||||||
|
type_base* base_type;
|
||||||
|
if(!(base_type = dynamic_cast<type_base*>(parent_type.get()))) throw 0;
|
||||||
|
if(base_type->arity != arguments.size()) throw 0;
|
||||||
|
|
||||||
|
type_app* new_app = new type_app(std::move(parent_type));
|
||||||
|
type_ptr to_return(new_app);
|
||||||
|
for(auto& arg : arguments) {
|
||||||
|
new_app->arguments.push_back(arg->to_type(vars, e));
|
||||||
|
}
|
||||||
|
return to_return;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr parsed_type_var::to_type(
|
||||||
|
const std::set<std::string>& vars,
|
||||||
|
const type_env& e) const {
|
||||||
|
if(vars.find(var) == vars.end()) throw 0;
|
||||||
|
return type_ptr(new type_var(var));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
type_ptr parsed_type_arr::to_type(
|
||||||
|
const std::set<std::string>& vars,
|
||||||
|
const type_env& env) const {
|
||||||
|
auto new_left = left->to_type(vars, env);
|
||||||
|
auto new_right = right->to_type(vars, env);
|
||||||
|
return type_ptr(new type_arr(std::move(new_left), std::move(new_right)));
|
||||||
|
}
|
||||||
43
code/compiler/12/parsed_type.hpp
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
struct parsed_type {
|
||||||
|
virtual type_ptr to_type(
|
||||||
|
const std::set<std::string>& vars,
|
||||||
|
const type_env& env) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using parsed_type_ptr = std::unique_ptr<parsed_type>;
|
||||||
|
|
||||||
|
struct parsed_type_app : parsed_type {
|
||||||
|
std::string name;
|
||||||
|
std::vector<parsed_type_ptr> arguments;
|
||||||
|
|
||||||
|
parsed_type_app(
|
||||||
|
std::string n,
|
||||||
|
std::vector<parsed_type_ptr> as)
|
||||||
|
: name(std::move(n)), arguments(std::move(as)) {}
|
||||||
|
|
||||||
|
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct parsed_type_var : parsed_type {
|
||||||
|
std::string var;
|
||||||
|
|
||||||
|
parsed_type_var(std::string v) : var(std::move(v)) {}
|
||||||
|
|
||||||
|
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct parsed_type_arr : parsed_type {
|
||||||
|
parsed_type_ptr left;
|
||||||
|
parsed_type_ptr right;
|
||||||
|
|
||||||
|
parsed_type_arr(parsed_type_ptr l, parsed_type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
type_ptr to_type(const std::set<std::string>& vars, const type_env& env) const;
|
||||||
|
};
|
||||||
174
code/compiler/12/parser.y
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
%{
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
#include "parsed_type.hpp"
|
||||||
|
|
||||||
|
definition_group global_defs;
|
||||||
|
|
||||||
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%token BACKSLASH
|
||||||
|
%token PLUS
|
||||||
|
%token TIMES
|
||||||
|
%token MINUS
|
||||||
|
%token DIVIDE
|
||||||
|
%token <int> INT
|
||||||
|
%token DEFN
|
||||||
|
%token DATA
|
||||||
|
%token CASE
|
||||||
|
%token OF
|
||||||
|
%token LET
|
||||||
|
%token IN
|
||||||
|
%token OCURLY
|
||||||
|
%token CCURLY
|
||||||
|
%token OPAREN
|
||||||
|
%token CPAREN
|
||||||
|
%token COMMA
|
||||||
|
%token ARROW
|
||||||
|
%token EQUAL
|
||||||
|
%token <std::string> LID
|
||||||
|
%token <std::string> UID
|
||||||
|
|
||||||
|
%language "c++"
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.token.constructor
|
||||||
|
|
||||||
|
%type <std::vector<std::string>> lowercaseParams
|
||||||
|
%type <std::vector<branch_ptr>> branches
|
||||||
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
|
%type <std::vector<parsed_type_ptr>> typeList
|
||||||
|
%type <definition_group> definitions
|
||||||
|
%type <parsed_type_ptr> type nonArrowType typeListElement
|
||||||
|
%type <ast_ptr> aAdd aMul case let lambda app appBase
|
||||||
|
%type <definition_data_ptr> data
|
||||||
|
%type <definition_defn_ptr> defn
|
||||||
|
%type <branch_ptr> branch
|
||||||
|
%type <pattern_ptr> pattern
|
||||||
|
%type <constructor_ptr> constructor
|
||||||
|
|
||||||
|
%start program
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
program
|
||||||
|
: definitions { global_defs = std::move($1); global_defs.vis = visibility::global; }
|
||||||
|
;
|
||||||
|
|
||||||
|
definitions
|
||||||
|
: definitions defn { $$ = std::move($1); auto name = $2->name; $$.defs_defn[name] = std::move($2); }
|
||||||
|
| definitions data { $$ = std::move($1); auto name = $2->name; $$.defs_data[name] = std::move($2); }
|
||||||
|
| %empty { $$ = definition_group(); }
|
||||||
|
;
|
||||||
|
|
||||||
|
defn
|
||||||
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
|
{ $$ = definition_defn_ptr(
|
||||||
|
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lowercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aAdd
|
||||||
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||||
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||||
|
| aMul { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aMul
|
||||||
|
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
|
||||||
|
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
|
||||||
|
| app { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
app
|
||||||
|
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
|
||||||
|
| appBase { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
appBase
|
||||||
|
: INT { $$ = ast_ptr(new ast_int($1)); }
|
||||||
|
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
|
||||||
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||||
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
|
| case { $$ = std::move($1); }
|
||||||
|
| let { $$ = std::move($1); }
|
||||||
|
| lambda { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
let
|
||||||
|
: LET OCURLY definitions CCURLY IN OCURLY aAdd CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_let(std::move($3), std::move($7))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lambda
|
||||||
|
: BACKSLASH lowercaseParams ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_lambda(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
case
|
||||||
|
: CASE aAdd OF OCURLY branches CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
branches
|
||||||
|
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||||
|
;
|
||||||
|
|
||||||
|
branch
|
||||||
|
: pattern ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
pattern
|
||||||
|
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
|
||||||
|
| UID lowercaseParams
|
||||||
|
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
data
|
||||||
|
: DATA UID lowercaseParams EQUAL OCURLY constructors CCURLY
|
||||||
|
{ $$ = definition_data_ptr(new definition_data(std::move($2), std::move($3), std::move($6))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructors
|
||||||
|
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
|
||||||
|
| constructor
|
||||||
|
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructor
|
||||||
|
: UID typeList
|
||||||
|
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
type
|
||||||
|
: nonArrowType ARROW type { $$ = parsed_type_ptr(new parsed_type_arr(std::move($1), std::move($3))); }
|
||||||
|
| nonArrowType { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
nonArrowType
|
||||||
|
: UID typeList { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), std::move($2))); }
|
||||||
|
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||||
|
| OPAREN type CPAREN { $$ = std::move($2); }
|
||||||
|
;
|
||||||
|
|
||||||
|
typeListElement
|
||||||
|
: OPAREN type CPAREN { $$ = std::move($2); }
|
||||||
|
| UID { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), {})); }
|
||||||
|
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
typeList
|
||||||
|
: %empty { $$ = std::vector<parsed_type_ptr>(); }
|
||||||
|
| typeList typeListElement { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
269
code/compiler/12/runtime.c
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <memory.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "runtime.h"
|
||||||
|
|
||||||
|
struct node_base* alloc_node() {
|
||||||
|
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||||
|
new_node->gc_next = NULL;
|
||||||
|
new_node->gc_reachable = 0;
|
||||||
|
assert(new_node != NULL);
|
||||||
|
return new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
||||||
|
struct node_app* node = (struct node_app*) alloc_node();
|
||||||
|
node->base.tag = NODE_APP;
|
||||||
|
node->left = l;
|
||||||
|
node->right = r;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_num* alloc_num(int32_t n) {
|
||||||
|
struct node_num* node = (struct node_num*) alloc_node();
|
||||||
|
node->base.tag = NODE_NUM;
|
||||||
|
node->value = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
||||||
|
struct node_global* node = (struct node_global*) alloc_node();
|
||||||
|
node->base.tag = NODE_GLOBAL;
|
||||||
|
node->arity = a;
|
||||||
|
node->function = f;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n) {
|
||||||
|
struct node_ind* node = (struct node_ind*) alloc_node();
|
||||||
|
node->base.tag = NODE_IND;
|
||||||
|
node->next = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_node_direct(struct node_base* n) {
|
||||||
|
if(n->tag == NODE_DATA) {
|
||||||
|
free(((struct node_data*) n)->array);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gc_visit_node(struct node_base* n) {
|
||||||
|
if(n->gc_reachable) return;
|
||||||
|
n->gc_reachable = 1;
|
||||||
|
|
||||||
|
if(n->tag == NODE_APP) {
|
||||||
|
struct node_app* app = (struct node_app*) n;
|
||||||
|
gc_visit_node(app->left);
|
||||||
|
gc_visit_node(app->right);
|
||||||
|
} if(n->tag == NODE_IND) {
|
||||||
|
struct node_ind* ind = (struct node_ind*) n;
|
||||||
|
gc_visit_node(ind->next);
|
||||||
|
} if(n->tag == NODE_DATA) {
|
||||||
|
struct node_data* data = (struct node_data*) n;
|
||||||
|
struct node_base** to_visit = data->array;
|
||||||
|
while(*to_visit) {
|
||||||
|
gc_visit_node(*to_visit);
|
||||||
|
to_visit++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_init(struct stack* s) {
|
||||||
|
s->size = 4;
|
||||||
|
s->count = 0;
|
||||||
|
s->data = malloc(sizeof(*s->data) * s->size);
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_free(struct stack* s) {
|
||||||
|
free(s->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_push(struct stack* s, struct node_base* n) {
|
||||||
|
while(s->count >= s->size) {
|
||||||
|
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
s->data[s->count++] = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_pop(struct stack* s) {
|
||||||
|
assert(s->count > 0);
|
||||||
|
return s->data[--s->count];
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o) {
|
||||||
|
assert(s->count > o);
|
||||||
|
return s->data[s->count - o - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_popn(struct stack* s, size_t n) {
|
||||||
|
assert(s->count >= n);
|
||||||
|
s->count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_init(struct gmachine* g) {
|
||||||
|
stack_init(&g->stack);
|
||||||
|
g->gc_nodes = NULL;
|
||||||
|
g->gc_node_count = 0;
|
||||||
|
g->gc_node_threshold = 128;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_free(struct gmachine* g) {
|
||||||
|
stack_free(&g->stack);
|
||||||
|
struct node_base* to_free = g->gc_nodes;
|
||||||
|
struct node_base* next;
|
||||||
|
|
||||||
|
while(to_free) {
|
||||||
|
next = to_free->gc_next;
|
||||||
|
free_node_direct(to_free);
|
||||||
|
free(to_free);
|
||||||
|
to_free = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_slide(struct gmachine* g, size_t n) {
|
||||||
|
assert(g->stack.count > n);
|
||||||
|
g->stack.data[g->stack.count - n - 1] = g->stack.data[g->stack.count - 1];
|
||||||
|
g->stack.count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_update(struct gmachine* g, size_t o) {
|
||||||
|
assert(g->stack.count > o + 1);
|
||||||
|
struct node_ind* ind =
|
||||||
|
(struct node_ind*) g->stack.data[g->stack.count - o - 2];
|
||||||
|
ind->base.tag = NODE_IND;
|
||||||
|
ind->next = g->stack.data[g->stack.count -= 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_alloc(struct gmachine* g, size_t o) {
|
||||||
|
while(o--) {
|
||||||
|
stack_push(&g->stack,
|
||||||
|
gmachine_track(g, (struct node_base*) alloc_ind(NULL)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_pack(struct gmachine* g, size_t n, int8_t t) {
|
||||||
|
assert(g->stack.count >= n);
|
||||||
|
|
||||||
|
struct node_base** data = malloc(sizeof(*data) * (n + 1));
|
||||||
|
assert(data != NULL);
|
||||||
|
memcpy(data, &g->stack.data[g->stack.count - n], n * sizeof(*data));
|
||||||
|
data[n] = NULL;
|
||||||
|
|
||||||
|
struct node_data* new_node = (struct node_data*) alloc_node();
|
||||||
|
new_node->array = data;
|
||||||
|
new_node->base.tag = NODE_DATA;
|
||||||
|
new_node->tag = t;
|
||||||
|
|
||||||
|
stack_popn(&g->stack, n);
|
||||||
|
stack_push(&g->stack, gmachine_track(g, (struct node_base*) new_node));
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_split(struct gmachine* g, size_t n) {
|
||||||
|
struct node_data* node = (struct node_data*) stack_pop(&g->stack);
|
||||||
|
for(size_t i = 0; i < n; i++) {
|
||||||
|
stack_push(&g->stack, node->array[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
||||||
|
g->gc_node_count++;
|
||||||
|
b->gc_next = g->gc_nodes;
|
||||||
|
g->gc_nodes = b;
|
||||||
|
|
||||||
|
if(g->gc_node_count >= g->gc_node_threshold) {
|
||||||
|
uint64_t nodes_before = g->gc_node_count;
|
||||||
|
gc_visit_node(b);
|
||||||
|
gmachine_gc(g);
|
||||||
|
g->gc_node_threshold = g->gc_node_count * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_gc(struct gmachine* g) {
|
||||||
|
for(size_t i = 0; i < g->stack.count; i++) {
|
||||||
|
gc_visit_node(g->stack.data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base** head_ptr = &g->gc_nodes;
|
||||||
|
while(*head_ptr) {
|
||||||
|
if((*head_ptr)->gc_reachable) {
|
||||||
|
(*head_ptr)->gc_reachable = 0;
|
||||||
|
head_ptr = &(*head_ptr)->gc_next;
|
||||||
|
} else {
|
||||||
|
struct node_base* to_free = *head_ptr;
|
||||||
|
*head_ptr = to_free->gc_next;
|
||||||
|
free_node_direct(to_free);
|
||||||
|
free(to_free);
|
||||||
|
g->gc_node_count--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void unwind(struct gmachine* g) {
|
||||||
|
struct stack* s = &g->stack;
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
struct node_base* peek = stack_peek(s, 0);
|
||||||
|
if(peek->tag == NODE_APP) {
|
||||||
|
struct node_app* n = (struct node_app*) peek;
|
||||||
|
stack_push(s, n->left);
|
||||||
|
} else if(peek->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* n = (struct node_global*) peek;
|
||||||
|
assert(s->count > n->arity);
|
||||||
|
|
||||||
|
for(size_t i = 1; i <= n->arity; i++) {
|
||||||
|
s->data[s->count - i]
|
||||||
|
= ((struct node_app*) s->data[s->count - i - 1])->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
n->function(g);
|
||||||
|
} else if(peek->tag == NODE_IND) {
|
||||||
|
struct node_ind* n = (struct node_ind*) peek;
|
||||||
|
stack_pop(s);
|
||||||
|
stack_push(s, n->next);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void f_main(struct gmachine* s);
|
||||||
|
|
||||||
|
void print_node(struct node_base* n) {
|
||||||
|
if(n->tag == NODE_APP) {
|
||||||
|
struct node_app* app = (struct node_app*) n;
|
||||||
|
print_node(app->left);
|
||||||
|
putchar(' ');
|
||||||
|
print_node(app->right);
|
||||||
|
} else if(n->tag == NODE_DATA) {
|
||||||
|
printf("(Packed)");
|
||||||
|
} else if(n->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* global = (struct node_global*) n;
|
||||||
|
printf("(Global: %p)", global->function);
|
||||||
|
} else if(n->tag == NODE_IND) {
|
||||||
|
print_node(((struct node_ind*) n)->next);
|
||||||
|
} else if(n->tag == NODE_NUM) {
|
||||||
|
struct node_num* num = (struct node_num*) n;
|
||||||
|
printf("%d", num->value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
struct gmachine gmachine;
|
||||||
|
struct node_global* first_node = alloc_global(f_main, 0);
|
||||||
|
struct node_base* result;
|
||||||
|
|
||||||
|
gmachine_init(&gmachine);
|
||||||
|
gmachine_track(&gmachine, (struct node_base*) first_node);
|
||||||
|
stack_push(&gmachine.stack, (struct node_base*) first_node);
|
||||||
|
unwind(&gmachine);
|
||||||
|
result = stack_pop(&gmachine.stack);
|
||||||
|
printf("Result: ");
|
||||||
|
print_node(result);
|
||||||
|
putchar('\n');
|
||||||
|
gmachine_free(&gmachine);
|
||||||
|
}
|
||||||
84
code/compiler/12/runtime.h
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
struct gmachine;
|
||||||
|
|
||||||
|
enum node_tag {
|
||||||
|
NODE_APP,
|
||||||
|
NODE_NUM,
|
||||||
|
NODE_GLOBAL,
|
||||||
|
NODE_IND,
|
||||||
|
NODE_DATA
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base {
|
||||||
|
enum node_tag tag;
|
||||||
|
int8_t gc_reachable;
|
||||||
|
struct node_base* gc_next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_app {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_num {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_global {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t arity;
|
||||||
|
void (*function)(struct gmachine*);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_ind {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_data {
|
||||||
|
struct node_base base;
|
||||||
|
int8_t tag;
|
||||||
|
struct node_base** array;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base* alloc_node();
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
|
||||||
|
struct node_num* alloc_num(int32_t n);
|
||||||
|
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a);
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n);
|
||||||
|
void free_node_direct(struct node_base*);
|
||||||
|
void gc_visit_node(struct node_base*);
|
||||||
|
|
||||||
|
struct stack {
|
||||||
|
size_t size;
|
||||||
|
size_t count;
|
||||||
|
struct node_base** data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void stack_init(struct stack* s);
|
||||||
|
void stack_free(struct stack* s);
|
||||||
|
void stack_push(struct stack* s, struct node_base* n);
|
||||||
|
struct node_base* stack_pop(struct stack* s);
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o);
|
||||||
|
void stack_popn(struct stack* s, size_t n);
|
||||||
|
|
||||||
|
struct gmachine {
|
||||||
|
struct stack stack;
|
||||||
|
struct node_base* gc_nodes;
|
||||||
|
int64_t gc_node_count;
|
||||||
|
int64_t gc_node_threshold;
|
||||||
|
};
|
||||||
|
|
||||||
|
void gmachine_init(struct gmachine* g);
|
||||||
|
void gmachine_free(struct gmachine* g);
|
||||||
|
void gmachine_slide(struct gmachine* g, size_t n);
|
||||||
|
void gmachine_update(struct gmachine* g, size_t o);
|
||||||
|
void gmachine_alloc(struct gmachine* g, size_t o);
|
||||||
|
void gmachine_pack(struct gmachine* g, size_t n, int8_t t);
|
||||||
|
void gmachine_split(struct gmachine* g, size_t n);
|
||||||
|
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b);
|
||||||
|
void gmachine_gc(struct gmachine* g);
|
||||||
38
code/compiler/12/scanner.l
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
%option noyywrap
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
#define YY_DECL yy::parser::symbol_type yylex()
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
[ \n]+ {}
|
||||||
|
\\ { return yy::parser::make_BACKSLASH(); }
|
||||||
|
\+ { return yy::parser::make_PLUS(); }
|
||||||
|
\* { return yy::parser::make_TIMES(); }
|
||||||
|
- { return yy::parser::make_MINUS(); }
|
||||||
|
\/ { return yy::parser::make_DIVIDE(); }
|
||||||
|
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
|
||||||
|
defn { return yy::parser::make_DEFN(); }
|
||||||
|
data { return yy::parser::make_DATA(); }
|
||||||
|
case { return yy::parser::make_CASE(); }
|
||||||
|
of { return yy::parser::make_OF(); }
|
||||||
|
let { return yy::parser::make_LET(); }
|
||||||
|
in { return yy::parser::make_IN(); }
|
||||||
|
\{ { return yy::parser::make_OCURLY(); }
|
||||||
|
\} { return yy::parser::make_CCURLY(); }
|
||||||
|
\( { return yy::parser::make_OPAREN(); }
|
||||||
|
\) { return yy::parser::make_CPAREN(); }
|
||||||
|
, { return yy::parser::make_COMMA(); }
|
||||||
|
-> { return yy::parser::make_ARROW(); }
|
||||||
|
= { return yy::parser::make_EQUAL(); }
|
||||||
|
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
|
||||||
|
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
|
||||||
|
|
||||||
|
%%
|
||||||
23
code/compiler/12/test.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include "graph.hpp"
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
function_graph graph;
|
||||||
|
graph.add_edge("f", "g");
|
||||||
|
graph.add_edge("g", "h");
|
||||||
|
graph.add_edge("h", "f");
|
||||||
|
|
||||||
|
graph.add_edge("i", "j");
|
||||||
|
graph.add_edge("j", "i");
|
||||||
|
|
||||||
|
graph.add_edge("j", "f");
|
||||||
|
|
||||||
|
graph.add_edge("x", "f");
|
||||||
|
graph.add_edge("x", "i");
|
||||||
|
|
||||||
|
for(auto& group : graph.compute_order()) {
|
||||||
|
std::cout << "Group: " << std::endl;
|
||||||
|
for(auto& member : group->members) {
|
||||||
|
std::cout << member << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
219
code/compiler/12/type.cpp
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
#include "type.hpp"
|
||||||
|
#include <ostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
bool type::is_arrow(const type_mgr& mgr) const { return false; }
|
||||||
|
|
||||||
|
void type_scheme::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
if(forall.size() != 0) {
|
||||||
|
to << "forall ";
|
||||||
|
for(auto& var : forall) {
|
||||||
|
to << var << " ";
|
||||||
|
}
|
||||||
|
to << ". ";
|
||||||
|
}
|
||||||
|
monotype->print(mgr, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_scheme::instantiate(type_mgr& mgr) const {
|
||||||
|
if(forall.size() == 0) return monotype;
|
||||||
|
std::map<std::string, type_ptr> subst;
|
||||||
|
for(auto& var : forall) {
|
||||||
|
subst[var] = mgr.new_type();
|
||||||
|
}
|
||||||
|
return mgr.substitute(subst, monotype);
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
auto it = mgr.types.find(name);
|
||||||
|
if(it != mgr.types.end()) {
|
||||||
|
it->second->print(mgr, to);
|
||||||
|
} else {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool type_var::is_arrow(const type_mgr& mgr) const {
|
||||||
|
auto it = mgr.types.find(name);
|
||||||
|
if(it != mgr.types.end()) {
|
||||||
|
return it->second->is_arrow(mgr);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
bool print_parenths = left->is_arrow(mgr);
|
||||||
|
if(print_parenths) to << "(";
|
||||||
|
left->print(mgr, to);
|
||||||
|
if(print_parenths) to << ")";
|
||||||
|
to << " -> ";
|
||||||
|
right->print(mgr, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool type_arr::is_arrow(const type_mgr& mgr) const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_app::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
constructor->print(mgr, to);
|
||||||
|
to << "*";
|
||||||
|
for(auto& arg : arguments) {
|
||||||
|
to << " ";
|
||||||
|
arg->print(mgr, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string type_mgr::new_type_name() {
|
||||||
|
int temp = last_id++;
|
||||||
|
std::string str = "";
|
||||||
|
|
||||||
|
while(temp != -1) {
|
||||||
|
str += (char) ('a' + (temp % 26));
|
||||||
|
temp = temp / 26 - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse(str.begin(), str.end());
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_type() {
|
||||||
|
return type_ptr(new type_var(new_type_name()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_arrow_type() {
|
||||||
|
return type_ptr(new type_arr(new_type(), new_type()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||||
|
type_var* cast;
|
||||||
|
|
||||||
|
var = nullptr;
|
||||||
|
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||||
|
auto it = types.find(cast->name);
|
||||||
|
|
||||||
|
if(it == types.end()) {
|
||||||
|
var = cast;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
t = it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
|
type_var *lvar, *rvar;
|
||||||
|
type_arr *larr, *rarr;
|
||||||
|
type_base *lid, *rid;
|
||||||
|
type_app *lapp, *rapp;
|
||||||
|
|
||||||
|
l = resolve(l, lvar);
|
||||||
|
r = resolve(r, rvar);
|
||||||
|
|
||||||
|
if(lvar) {
|
||||||
|
bind(lvar->name, r);
|
||||||
|
return;
|
||||||
|
} else if(rvar) {
|
||||||
|
bind(rvar->name, l);
|
||||||
|
return;
|
||||||
|
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||||
|
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||||
|
unify(larr->left, rarr->left);
|
||||||
|
unify(larr->right, rarr->right);
|
||||||
|
return;
|
||||||
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
|
if(lid->name == rid->name && lid->arity == rid->arity) return;
|
||||||
|
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
||||||
|
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
||||||
|
unify(lapp->constructor, rapp->constructor);
|
||||||
|
auto left_it = lapp->arguments.begin();
|
||||||
|
auto right_it = rapp->arguments.begin();
|
||||||
|
while(left_it != lapp->arguments.end() &&
|
||||||
|
right_it != rapp->arguments.end()) {
|
||||||
|
unify(*left_it, *right_it);
|
||||||
|
left_it++, right_it++;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw unification_error(l, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::substitute(const std::map<std::string, type_ptr>& subst, const type_ptr& t) const {
|
||||||
|
type_ptr temp = t;
|
||||||
|
while(type_var* var = dynamic_cast<type_var*>(temp.get())) {
|
||||||
|
auto subst_it = subst.find(var->name);
|
||||||
|
if(subst_it != subst.end()) return subst_it->second;
|
||||||
|
auto var_it = types.find(var->name);
|
||||||
|
if(var_it == types.end()) return t;
|
||||||
|
temp = var_it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(type_arr* arr = dynamic_cast<type_arr*>(temp.get())) {
|
||||||
|
auto left_result = substitute(subst, arr->left);
|
||||||
|
auto right_result = substitute(subst, arr->right);
|
||||||
|
if(left_result == arr->left && right_result == arr->right) return t;
|
||||||
|
return type_ptr(new type_arr(left_result, right_result));
|
||||||
|
} else if(type_app* app = dynamic_cast<type_app*>(temp.get())) {
|
||||||
|
auto constructor_result = substitute(subst, app->constructor);
|
||||||
|
bool arg_changed = false;
|
||||||
|
std::vector<type_ptr> new_args;
|
||||||
|
for(auto& arg : app->arguments) {
|
||||||
|
auto arg_result = substitute(subst, arg);
|
||||||
|
arg_changed |= arg_result != arg;
|
||||||
|
new_args.push_back(std::move(arg_result));
|
||||||
|
}
|
||||||
|
|
||||||
|
if(constructor_result == app->constructor && !arg_changed) return t;
|
||||||
|
type_app* new_app = new type_app(std::move(constructor_result));
|
||||||
|
std::swap(new_app->arguments, new_args);
|
||||||
|
return type_ptr(new_app);
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||||
|
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||||
|
|
||||||
|
if(other && other->name == s) return;
|
||||||
|
types[s] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr resolved = resolve(t, var);
|
||||||
|
|
||||||
|
if(var) {
|
||||||
|
into.insert(var->name);
|
||||||
|
} else if(type_arr* arr = dynamic_cast<type_arr*>(resolved.get())) {
|
||||||
|
find_free(arr->left, into);
|
||||||
|
find_free(arr->right, into);
|
||||||
|
} else if(type_app* app = dynamic_cast<type_app*>(resolved.get())) {
|
||||||
|
find_free(app->constructor, into);
|
||||||
|
for(auto& arg : app->arguments) find_free(arg, into);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::find_free(const type_scheme_ptr& t, std::set<std::string>& into) const {
|
||||||
|
std::set<std::string> monotype_free;
|
||||||
|
type_mgr limited_mgr;
|
||||||
|
for(auto& binding : types) {
|
||||||
|
auto existing_position = std::find(t->forall.begin(), t->forall.end(), binding.first);
|
||||||
|
if(existing_position != t->forall.end()) continue;
|
||||||
|
limited_mgr.types[binding.first] = binding.second;
|
||||||
|
}
|
||||||
|
limited_mgr.find_free(t->monotype, monotype_free);
|
||||||
|
for(auto& not_free : t->forall) {
|
||||||
|
monotype_free.erase(not_free);
|
||||||
|
}
|
||||||
|
into.insert(monotype_free.begin(), monotype_free.end());
|
||||||
|
}
|
||||||
99
code/compiler/12/type.hpp
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
struct type_mgr;
|
||||||
|
|
||||||
|
struct type {
|
||||||
|
virtual ~type() = default;
|
||||||
|
|
||||||
|
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||||
|
virtual bool is_arrow(const type_mgr& mgr) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
using type_ptr = std::shared_ptr<type>;
|
||||||
|
|
||||||
|
struct type_scheme {
|
||||||
|
std::vector<std::string> forall;
|
||||||
|
type_ptr monotype;
|
||||||
|
|
||||||
|
type_scheme(type_ptr type) : forall(), monotype(std::move(type)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
type_ptr instantiate(type_mgr& mgr) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
using type_scheme_ptr = std::shared_ptr<type_scheme>;
|
||||||
|
|
||||||
|
struct type_var : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_var(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
bool is_arrow(const type_mgr& mgr) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_base : public type {
|
||||||
|
std::string name;
|
||||||
|
int32_t arity;
|
||||||
|
|
||||||
|
type_base(std::string n, int32_t a = 0)
|
||||||
|
: name(std::move(n)), arity(a) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_data : public type_base {
|
||||||
|
struct constructor {
|
||||||
|
int tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<std::string, constructor> constructors;
|
||||||
|
|
||||||
|
type_data(std::string n, int32_t a = 0)
|
||||||
|
: type_base(std::move(n), a) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_arr : public type {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
type_arr(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
bool is_arrow(const type_mgr& mgr) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_app : public type {
|
||||||
|
type_ptr constructor;
|
||||||
|
std::vector<type_ptr> arguments;
|
||||||
|
|
||||||
|
type_app(type_ptr c)
|
||||||
|
: constructor(std::move(c)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_mgr {
|
||||||
|
int last_id = 0;
|
||||||
|
std::map<std::string, type_ptr> types;
|
||||||
|
|
||||||
|
std::string new_type_name();
|
||||||
|
type_ptr new_type();
|
||||||
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
|
void unify(type_ptr l, type_ptr r);
|
||||||
|
type_ptr substitute(
|
||||||
|
const std::map<std::string, type_ptr>& subst,
|
||||||
|
const type_ptr& t) const;
|
||||||
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
|
void bind(const std::string& s, type_ptr t);
|
||||||
|
void find_free(const type_ptr& t, std::set<std::string>& into) const;
|
||||||
|
void find_free(const type_scheme_ptr& t, std::set<std::string>& into) const;
|
||||||
|
};
|
||||||
85
code/compiler/12/type_env.cpp
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
#include "type_env.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const {
|
||||||
|
if(parent != nullptr) parent->find_free(mgr, into);
|
||||||
|
for(auto& binding : names) {
|
||||||
|
mgr.find_free(binding.second.type, into);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::find_free_except(const type_mgr& mgr, const group& avoid,
|
||||||
|
std::set<std::string>& into) const {
|
||||||
|
if(parent != nullptr) parent->find_free(mgr, into);
|
||||||
|
for(auto& binding : names) {
|
||||||
|
if(avoid.members.find(binding.first) != avoid.members.end()) continue;
|
||||||
|
mgr.find_free(binding.second.type, into);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_scheme_ptr type_env::lookup(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) return it->second.type;
|
||||||
|
if(parent) return parent->lookup(name);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool type_env::is_global(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) return it->second.vis == visibility::global;
|
||||||
|
if(parent) return parent->is_global(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::set_mangled_name(const std::string& name, const std::string& mangled) {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) it->second.mangled_name = mangled;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string& type_env::get_mangled_name(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end())
|
||||||
|
return (it->second.mangled_name != "") ? it->second.mangled_name : name;
|
||||||
|
if(parent) return parent->get_mangled_name(name);
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_env::lookup_type(const std::string& name) const {
|
||||||
|
auto it = type_names.find(name);
|
||||||
|
if(it != type_names.end()) return it->second;
|
||||||
|
if(parent) return parent->lookup_type(name);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::bind(const std::string& name, type_ptr t, visibility v) {
|
||||||
|
type_scheme_ptr new_scheme(new type_scheme(std::move(t)));
|
||||||
|
names[name] = variable_data(std::move(new_scheme), v, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::bind(const std::string& name, type_scheme_ptr t, visibility v) {
|
||||||
|
names[name] = variable_data(std::move(t), v, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::bind_type(const std::string& type_name, type_ptr t) {
|
||||||
|
if(lookup_type(type_name) != nullptr) throw 0;
|
||||||
|
type_names[type_name] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::generalize(const std::string& name, const group& grp, type_mgr& mgr) {
|
||||||
|
auto names_it = names.find(name);
|
||||||
|
if(names_it == names.end()) throw 0;
|
||||||
|
if(names_it->second.type->forall.size() > 0) throw 0;
|
||||||
|
|
||||||
|
std::set<std::string> free_in_type;
|
||||||
|
std::set<std::string> free_in_env;
|
||||||
|
mgr.find_free(names_it->second.type->monotype, free_in_type);
|
||||||
|
find_free_except(mgr, grp, free_in_env);
|
||||||
|
for(auto& free : free_in_type) {
|
||||||
|
if(free_in_env.find(free) != free_in_env.end()) continue;
|
||||||
|
names_it->second.type->forall.push_back(free);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_env_ptr type_scope(type_env_ptr parent) {
|
||||||
|
return type_env_ptr(new type_env(std::move(parent)));
|
||||||
|
}
|
||||||
49
code/compiler/12/type_env.hpp
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <set>
|
||||||
|
#include "graph.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_env;
|
||||||
|
using type_env_ptr = std::shared_ptr<type_env>;
|
||||||
|
|
||||||
|
enum class visibility { global,local };
|
||||||
|
|
||||||
|
struct type_env {
|
||||||
|
struct variable_data {
|
||||||
|
type_scheme_ptr type;
|
||||||
|
visibility vis;
|
||||||
|
std::string mangled_name;
|
||||||
|
|
||||||
|
variable_data()
|
||||||
|
: variable_data(nullptr, visibility::local, "") {}
|
||||||
|
variable_data(type_scheme_ptr t, visibility v, std::string n)
|
||||||
|
: type(std::move(t)), vis(v), mangled_name(std::move(n)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
type_env_ptr parent;
|
||||||
|
std::map<std::string, variable_data> names;
|
||||||
|
std::map<std::string, type_ptr> type_names;
|
||||||
|
|
||||||
|
type_env(type_env_ptr p) : parent(std::move(p)) {}
|
||||||
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
|
void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
|
||||||
|
void find_free_except(const type_mgr& mgr, const group& avoid,
|
||||||
|
std::set<std::string>& into) const;
|
||||||
|
type_scheme_ptr lookup(const std::string& name) const;
|
||||||
|
bool is_global(const std::string& name) const;
|
||||||
|
void set_mangled_name(const std::string& name, const std::string& mangled);
|
||||||
|
const std::string& get_mangled_name(const std::string& name) const;
|
||||||
|
type_ptr lookup_type(const std::string& name) const;
|
||||||
|
void bind(const std::string& name, type_ptr t,
|
||||||
|
visibility v = visibility::local);
|
||||||
|
void bind(const std::string& name, type_scheme_ptr t,
|
||||||
|
visibility v = visibility::local);
|
||||||
|
void bind_type(const std::string& type_name, type_ptr t);
|
||||||
|
void generalize(const std::string& name, const group& grp, type_mgr& mgr);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
type_env_ptr type_scope(type_env_ptr parent);
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
cd 10
|
cd 11
|
||||||
mkdir -p build && cd build
|
mkdir -p build && cd build
|
||||||
cmake ..
|
cmake ..
|
||||||
make -j8
|
make -j8
|
||||||
|
|||||||
21
code/time-traveling/TakeMax.hs
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
takeUntilMax :: [Int] -> Int -> (Int, [Int])
|
||||||
|
takeUntilMax [] m = (m, [])
|
||||||
|
takeUntilMax [x] _ = (x, [x])
|
||||||
|
takeUntilMax (x:xs) m
|
||||||
|
| x == m = (x, [x])
|
||||||
|
| otherwise =
|
||||||
|
let (m', xs') = takeUntilMax xs m
|
||||||
|
in (max m' x, x:xs')
|
||||||
|
|
||||||
|
doTakeUntilMax :: [Int] -> [Int]
|
||||||
|
doTakeUntilMax l = l'
|
||||||
|
where (m, l') = takeUntilMax l m
|
||||||
|
|
||||||
|
takeUntilMax' :: [Int] -> Int -> (Int, [Int])
|
||||||
|
takeUntilMax' [] m = (m, [])
|
||||||
|
takeUntilMax' [x] _ = (x, [x])
|
||||||
|
takeUntilMax' (x:xs) m
|
||||||
|
| x == m = (maximum (x:xs), [x])
|
||||||
|
| otherwise =
|
||||||
|
let (m', xs') = takeUntilMax' xs m
|
||||||
|
in (max m' x, x:xs')
|
||||||
28
code/time-traveling/ValueScore.hs
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import Data.Map as Map
|
||||||
|
import Data.Maybe
|
||||||
|
import Control.Applicative
|
||||||
|
|
||||||
|
data Element = A | B | C | D
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
addElement :: Element -> Map Element Int -> Map Element Int
|
||||||
|
addElement = alter ((<|> Just 1) . fmap (+1))
|
||||||
|
|
||||||
|
getScore :: Element -> Map Element Int -> Float
|
||||||
|
getScore e m = fromMaybe 1.0 $ ((1.0/) . fromIntegral) <$> Map.lookup e m
|
||||||
|
|
||||||
|
data BinaryTree a = Empty | Node a (BinaryTree a) (BinaryTree a) deriving Show
|
||||||
|
type ElementTree = BinaryTree Element
|
||||||
|
type ScoredElementTree = BinaryTree (Element, Float)
|
||||||
|
|
||||||
|
assignScores :: ElementTree -> Map Element Int -> (Map Element Int, ScoredElementTree)
|
||||||
|
assignScores Empty m = (Map.empty, Empty)
|
||||||
|
assignScores (Node e t1 t2) m = (m', Node (e, getScore e m) t1' t2')
|
||||||
|
where
|
||||||
|
(m1, t1') = assignScores t1 m
|
||||||
|
(m2, t2') = assignScores t2 m
|
||||||
|
m' = addElement e $ unionWith (+) m1 m2
|
||||||
|
|
||||||
|
doAssignScores :: ElementTree -> ScoredElementTree
|
||||||
|
doAssignScores t = t'
|
||||||
|
where (m, t') = assignScores t m
|
||||||
99
code/typesafe-interpreter/TypesafeIntrV2.idr
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
data ExprType
|
||||||
|
= IntType
|
||||||
|
| BoolType
|
||||||
|
| StringType
|
||||||
|
|
||||||
|
repr : ExprType -> Type
|
||||||
|
repr IntType = Int
|
||||||
|
repr BoolType = Bool
|
||||||
|
repr StringType = String
|
||||||
|
|
||||||
|
intBoolImpossible : IntType = BoolType -> Void
|
||||||
|
intBoolImpossible Refl impossible
|
||||||
|
|
||||||
|
intStringImpossible : IntType = StringType -> Void
|
||||||
|
intStringImpossible Refl impossible
|
||||||
|
|
||||||
|
boolStringImpossible : BoolType = StringType -> Void
|
||||||
|
boolStringImpossible Refl impossible
|
||||||
|
|
||||||
|
decEq : (a : ExprType) -> (b : ExprType) -> Dec (a = b)
|
||||||
|
decEq IntType IntType = Yes Refl
|
||||||
|
decEq BoolType BoolType = Yes Refl
|
||||||
|
decEq StringType StringType = Yes Refl
|
||||||
|
decEq IntType BoolType = No intBoolImpossible
|
||||||
|
decEq BoolType IntType = No $ intBoolImpossible . sym
|
||||||
|
decEq IntType StringType = No intStringImpossible
|
||||||
|
decEq StringType IntType = No $ intStringImpossible . sym
|
||||||
|
decEq BoolType StringType = No boolStringImpossible
|
||||||
|
decEq StringType BoolType = No $ boolStringImpossible . sym
|
||||||
|
|
||||||
|
data Op
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
|
||||||
|
data Expr
|
||||||
|
= IntLit Int
|
||||||
|
| BoolLit Bool
|
||||||
|
| StringLit String
|
||||||
|
| BinOp Op Expr Expr
|
||||||
|
| IfElse Expr Expr Expr
|
||||||
|
|
||||||
|
data SafeExpr : ExprType -> Type where
|
||||||
|
IntLiteral : Int -> SafeExpr IntType
|
||||||
|
BoolLiteral : Bool -> SafeExpr BoolType
|
||||||
|
StringLiteral : String -> SafeExpr StringType
|
||||||
|
BinOperation : (repr a -> repr b -> repr c) -> SafeExpr a -> SafeExpr b -> SafeExpr c
|
||||||
|
IfThenElse : SafeExpr BoolType -> SafeExpr t -> SafeExpr t -> SafeExpr t
|
||||||
|
|
||||||
|
typecheckOp : Op -> (a : ExprType) -> (b : ExprType) -> Either String (c : ExprType ** repr a -> repr b -> repr c)
|
||||||
|
typecheckOp Add IntType IntType = Right (IntType ** (+))
|
||||||
|
typecheckOp Subtract IntType IntType = Right (IntType ** (-))
|
||||||
|
typecheckOp Multiply IntType IntType = Right (IntType ** (*))
|
||||||
|
typecheckOp Divide IntType IntType = Right (IntType ** div)
|
||||||
|
typecheckOp _ _ _ = Left "Invalid binary operator application"
|
||||||
|
|
||||||
|
requireBool : (n : ExprType ** SafeExpr n) -> Either String (SafeExpr BoolType)
|
||||||
|
requireBool (BoolType ** e) = Right e
|
||||||
|
requireBool _ = Left "Not a boolean."
|
||||||
|
|
||||||
|
typecheck : Expr -> Either String (n : ExprType ** SafeExpr n)
|
||||||
|
typecheck (IntLit i) = Right (_ ** IntLiteral i)
|
||||||
|
typecheck (BoolLit b) = Right (_ ** BoolLiteral b)
|
||||||
|
typecheck (StringLit s) = Right (_ ** StringLiteral s)
|
||||||
|
typecheck (BinOp o l r) = do
|
||||||
|
(lt ** le) <- typecheck l
|
||||||
|
(rt ** re) <- typecheck r
|
||||||
|
(ot ** f) <- typecheckOp o lt rt
|
||||||
|
pure (_ ** BinOperation f le re)
|
||||||
|
typecheck (IfElse c t e) =
|
||||||
|
do
|
||||||
|
ce <- typecheck c >>= requireBool
|
||||||
|
(tt ** te) <- typecheck t
|
||||||
|
(et ** ee) <- typecheck e
|
||||||
|
case decEq tt et of
|
||||||
|
Yes p => pure (_ ** IfThenElse ce (replace p te) ee)
|
||||||
|
No _ => Left "Incompatible branch types."
|
||||||
|
|
||||||
|
eval : SafeExpr t -> repr t
|
||||||
|
eval (IntLiteral i) = i
|
||||||
|
eval (BoolLiteral b) = b
|
||||||
|
eval (StringLiteral s) = s
|
||||||
|
eval (BinOperation f l r) = f (eval l) (eval r)
|
||||||
|
eval (IfThenElse c t e) = if (eval c) then (eval t) else (eval e)
|
||||||
|
|
||||||
|
resultStr : {t : ExprType} -> repr t -> String
|
||||||
|
resultStr {t=IntType} i = show i
|
||||||
|
resultStr {t=BoolType} b = show b
|
||||||
|
resultStr {t=StringType} s = show s
|
||||||
|
|
||||||
|
tryEval : Expr -> String
|
||||||
|
tryEval ex =
|
||||||
|
case typecheck ex of
|
||||||
|
Left err => "Type error: " ++ err
|
||||||
|
Right (t ** e) => resultStr $ eval {t} e
|
||||||
|
|
||||||
|
main : IO ()
|
||||||
|
main = putStrLn $ tryEval $ BinOp Add (IfElse (BoolLit True) (IntLit 6) (IntLit 7)) (BinOp Multiply (IntLit 160) (IntLit 2))
|
||||||
120
code/typesafe-interpreter/TypesafeIntrV3.idr
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
data ExprType
|
||||||
|
= IntType
|
||||||
|
| BoolType
|
||||||
|
| StringType
|
||||||
|
| PairType ExprType ExprType
|
||||||
|
|
||||||
|
repr : ExprType -> Type
|
||||||
|
repr IntType = Int
|
||||||
|
repr BoolType = Bool
|
||||||
|
repr StringType = String
|
||||||
|
repr (PairType t1 t2) = Pair (repr t1) (repr t2)
|
||||||
|
|
||||||
|
decEq : (a : ExprType) -> (b : ExprType) -> Maybe (a = b)
|
||||||
|
decEq IntType IntType = Just Refl
|
||||||
|
decEq BoolType BoolType = Just Refl
|
||||||
|
decEq StringType StringType = Just Refl
|
||||||
|
decEq (PairType lt1 lt2) (PairType rt1 rt2) = do
|
||||||
|
subEq1 <- decEq lt1 rt1
|
||||||
|
subEq2 <- decEq lt2 rt2
|
||||||
|
let firstEqual = replace {P = \t1 => PairType lt1 lt2 = PairType t1 lt2} subEq1 Refl
|
||||||
|
let secondEqual = replace {P = \t2 => PairType lt1 lt2 = PairType rt1 t2} subEq2 firstEqual
|
||||||
|
pure secondEqual
|
||||||
|
decEq _ _ = Nothing
|
||||||
|
|
||||||
|
data Op
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
|
||||||
|
data Expr
|
||||||
|
= IntLit Int
|
||||||
|
| BoolLit Bool
|
||||||
|
| StringLit String
|
||||||
|
| BinOp Op Expr Expr
|
||||||
|
| IfElse Expr Expr Expr
|
||||||
|
| Pair Expr Expr
|
||||||
|
| Fst Expr
|
||||||
|
| Snd Expr
|
||||||
|
|
||||||
|
data SafeExpr : ExprType -> Type where
|
||||||
|
IntLiteral : Int -> SafeExpr IntType
|
||||||
|
BoolLiteral : Bool -> SafeExpr BoolType
|
||||||
|
StringLiteral : String -> SafeExpr StringType
|
||||||
|
BinOperation : (repr a -> repr b -> repr c) -> SafeExpr a -> SafeExpr b -> SafeExpr c
|
||||||
|
IfThenElse : SafeExpr BoolType -> SafeExpr t -> SafeExpr t -> SafeExpr t
|
||||||
|
NewPair : SafeExpr t1 -> SafeExpr t2 -> SafeExpr (PairType t1 t2)
|
||||||
|
First : SafeExpr (PairType t1 t2) -> SafeExpr t1
|
||||||
|
Second : SafeExpr (PairType t1 t2) -> SafeExpr t2
|
||||||
|
|
||||||
|
typecheckOp : Op -> (a : ExprType) -> (b : ExprType) -> Either String (c : ExprType ** repr a -> repr b -> repr c)
|
||||||
|
typecheckOp Add IntType IntType = Right (IntType ** (+))
|
||||||
|
typecheckOp Subtract IntType IntType = Right (IntType ** (-))
|
||||||
|
typecheckOp Multiply IntType IntType = Right (IntType ** (*))
|
||||||
|
typecheckOp Divide IntType IntType = Right (IntType ** div)
|
||||||
|
typecheckOp _ _ _ = Left "Invalid binary operator application"
|
||||||
|
|
||||||
|
requireBool : (n : ExprType ** SafeExpr n) -> Either String (SafeExpr BoolType)
|
||||||
|
requireBool (BoolType ** e) = Right e
|
||||||
|
requireBool _ = Left "Not a boolean."
|
||||||
|
|
||||||
|
typecheck : Expr -> Either String (n : ExprType ** SafeExpr n)
|
||||||
|
typecheck (IntLit i) = Right (_ ** IntLiteral i)
|
||||||
|
typecheck (BoolLit b) = Right (_ ** BoolLiteral b)
|
||||||
|
typecheck (StringLit s) = Right (_ ** StringLiteral s)
|
||||||
|
typecheck (BinOp o l r) = do
|
||||||
|
(lt ** le) <- typecheck l
|
||||||
|
(rt ** re) <- typecheck r
|
||||||
|
(ot ** f) <- typecheckOp o lt rt
|
||||||
|
pure (_ ** BinOperation f le re)
|
||||||
|
typecheck (IfElse c t e) =
|
||||||
|
do
|
||||||
|
ce <- typecheck c >>= requireBool
|
||||||
|
(tt ** te) <- typecheck t
|
||||||
|
(et ** ee) <- typecheck e
|
||||||
|
case decEq tt et of
|
||||||
|
Just p => pure (_ ** IfThenElse ce (replace p te) ee)
|
||||||
|
Nothing => Left "Incompatible branch types."
|
||||||
|
typecheck (Pair l r) =
|
||||||
|
do
|
||||||
|
(lt ** le) <- typecheck l
|
||||||
|
(rt ** re) <- typecheck r
|
||||||
|
pure (_ ** NewPair le re)
|
||||||
|
typecheck (Fst p) =
|
||||||
|
do
|
||||||
|
(pt ** pe) <- typecheck p
|
||||||
|
case pt of
|
||||||
|
PairType _ _ => pure $ (_ ** First pe)
|
||||||
|
_ => Left "Applying fst to non-pair."
|
||||||
|
typecheck (Snd p) =
|
||||||
|
do
|
||||||
|
(pt ** pe) <- typecheck p
|
||||||
|
case pt of
|
||||||
|
PairType _ _ => pure $ (_ ** Second pe)
|
||||||
|
_ => Left "Applying snd to non-pair."
|
||||||
|
|
||||||
|
eval : SafeExpr t -> repr t
|
||||||
|
eval (IntLiteral i) = i
|
||||||
|
eval (BoolLiteral b) = b
|
||||||
|
eval (StringLiteral s) = s
|
||||||
|
eval (BinOperation f l r) = f (eval l) (eval r)
|
||||||
|
eval (IfThenElse c t e) = if (eval c) then (eval t) else (eval e)
|
||||||
|
eval (NewPair l r) = (eval l, eval r)
|
||||||
|
eval (First p) = fst (eval p)
|
||||||
|
eval (Second p) = snd (eval p)
|
||||||
|
|
||||||
|
resultStr : {t : ExprType} -> repr t -> String
|
||||||
|
resultStr {t=IntType} i = show i
|
||||||
|
resultStr {t=BoolType} b = show b
|
||||||
|
resultStr {t=StringType} s = show s
|
||||||
|
resultStr {t=PairType t1 t2} (l,r) = "(" ++ resultStr l ++ ", " ++ resultStr r ++ ")"
|
||||||
|
|
||||||
|
tryEval : Expr -> String
|
||||||
|
tryEval ex =
|
||||||
|
case typecheck ex of
|
||||||
|
Left err => "Type error: " ++ err
|
||||||
|
Right (t ** e) => resultStr $ eval {t} e
|
||||||
|
|
||||||
|
main : IO ()
|
||||||
|
main = putStrLn $ tryEval $ BinOp Add (Fst (IfElse (BoolLit True) (Pair (IntLit 6) (BoolLit True)) (Pair (IntLit 7) (BoolLit False)))) (BinOp Multiply (IntLit 160) (IntLit 2))
|
||||||
@@ -3,4 +3,11 @@ languageCode = "en-us"
|
|||||||
title = "Daniel's Blog"
|
title = "Daniel's Blog"
|
||||||
theme = "vanilla"
|
theme = "vanilla"
|
||||||
pygmentsCodeFences = true
|
pygmentsCodeFences = true
|
||||||
pygmentsStyle = "github"
|
pygmentsUseClasses = true
|
||||||
|
summaryLength = 20
|
||||||
|
|
||||||
|
[markup]
|
||||||
|
[markup.tableOfContents]
|
||||||
|
endLevel = 4
|
||||||
|
ordered = false
|
||||||
|
startLevel = 3
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
---
|
---
|
||||||
title: Daniel's Blog
|
title: Daniel's Blog
|
||||||
|
description: Daniel Fedorin's personal blog, covering topics such as functional programming, compiler development, and more!
|
||||||
---
|
---
|
||||||
## Hello!
|
## Hello!
|
||||||
Welcome to my blog. Here, I write about various subjects, including (but not limited to)
|
Welcome to my blog. Here, I write about various subjects, including (but not limited to)
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
---
|
---
|
||||||
title: About
|
title: About
|
||||||
---
|
---
|
||||||
I'm Daniel, a Computer Science student currently in my third (and final) undergraduate year at Oregon State University.
|
I'm Daniel, a Computer Science student currently working towards my Master's Degree at Oregon State University.
|
||||||
Due my initial interest in calculators and compilers, I got involved in the Programming Language Theory research
|
Due to my initial interest in calculators and compilers, I got involved in the Programming Language Theory research
|
||||||
group, gaining same experience in formal verification, domain specific language, and explainable computing.
|
group, gaining same experience in formal verification, domain specific language, and explainable computing.
|
||||||
|
|
||||||
For work, school, and hobby projects, I use a variety of programming languages, most commonly C/C++,
|
For work, school, and hobby projects, I use a variety of programming languages, most commonly C/C++,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 0 - Intro
|
title: Compiling a Functional Language Using C++, Part 0 - Intro
|
||||||
date: 2019-08-03T01:02:30-07:00
|
date: 2019-08-03T01:02:30-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this first post of a larger series, we embark on a journey of developing a compiler for a lazily evaluated functional language."
|
||||||
---
|
---
|
||||||
During my last academic term, I was enrolled in a compilers course.
|
During my last academic term, I was enrolled in a compilers course.
|
||||||
We had a final project - develop a compiler for a basic Python subset,
|
We had a final project - develop a compiler for a basic Python subset,
|
||||||
@@ -142,3 +143,6 @@ Here are the posts that I've written so far for this series:
|
|||||||
* [LLVM]({{< relref "08_compiler_llvm.md" >}})
|
* [LLVM]({{< relref "08_compiler_llvm.md" >}})
|
||||||
* [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})
|
* [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})
|
||||||
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
|
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
|
||||||
|
* [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}})
|
||||||
|
* [Let/In and Lambdas]({{< relref "12_compiler_let_in_lambda/index.md" >}})
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 1 - Tokenizing
|
title: Compiling a Functional Language Using C++, Part 1 - Tokenizing
|
||||||
date: 2019-08-03T01:02:30-07:00
|
date: 2019-08-03T01:02:30-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we tackle the first component of our compiler: tokenizing."
|
||||||
---
|
---
|
||||||
It makes sense to build a compiler bit by bit, following the stages we outlined in
|
It makes sense to build a compiler bit by bit, following the stages we outlined in
|
||||||
the first post of the series. This is because these stages are essentially a pipeline,
|
the first post of the series. This is because these stages are essentially a pipeline,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 2 - Parsing
|
title: Compiling a Functional Language Using C++, Part 2 - Parsing
|
||||||
date: 2019-08-03T01:02:30-07:00
|
date: 2019-08-03T01:02:30-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we combine our compiler's tokenizer with a parser, allowing us to extract structure from input source code."
|
||||||
---
|
---
|
||||||
In the previous post, we covered tokenizing. We learned how to convert an input string into logical segments, and even wrote up a tokenizer to do it according to the rules of our language. Now, it's time to make sense of the tokens, and parse our language.
|
In the previous post, we covered tokenizing. We learned how to convert an input string into logical segments, and even wrote up a tokenizer to do it according to the rules of our language. Now, it's time to make sense of the tokens, and parse our language.
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 3 - Type Checking
|
title: Compiling a Functional Language Using C++, Part 3 - Type Checking
|
||||||
date: 2019-08-06T14:26:38-07:00
|
date: 2019-08-06T14:26:38-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we allow our compiler to throw away invalid programs, detected using a monomorphic typechecking algorithm."
|
||||||
---
|
---
|
||||||
I think tokenizing and parsing are boring. The thing is, looking at syntax
|
I think tokenizing and parsing are boring. The thing is, looking at syntax
|
||||||
is a pretty shallow measure of how interesting a language is. It's like
|
is a pretty shallow measure of how interesting a language is. It's like
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 4 - Small Improvements
|
title: Compiling a Functional Language Using C++, Part 4 - Small Improvements
|
||||||
date: 2019-08-06T14:26:38-07:00
|
date: 2019-08-06T14:26:38-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we take a little break from pushing our compiler forward to make some improvements to the code we've written so far."
|
||||||
---
|
---
|
||||||
We've done quite a big push in the previous post. We defined
|
We've done quite a big push in the previous post. We defined
|
||||||
type rules for our language, implemented unification,
|
type rules for our language, implemented unification,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 5 - Execution
|
title: Compiling a Functional Language Using C++, Part 5 - Execution
|
||||||
date: 2019-08-06T14:26:38-07:00
|
date: 2019-08-06T14:26:38-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we define the rules for a G-machine, the abstract machine that we will target with our compiler."
|
||||||
---
|
---
|
||||||
{{< gmachine_css >}}
|
{{< gmachine_css >}}
|
||||||
We now have trees representing valid programs in our language,
|
We now have trees representing valid programs in our language,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 6 - Compilation
|
title: Compiling a Functional Language Using C++, Part 6 - Compilation
|
||||||
date: 2019-08-06T14:26:38-07:00
|
date: 2019-08-06T14:26:38-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we enable our compiler to convert programs written in our functional language to G-machine instructions."
|
||||||
---
|
---
|
||||||
In the previous post, we defined a machine for graph reduction,
|
In the previous post, we defined a machine for graph reduction,
|
||||||
called a G-machine. However, this machine is still not particularly
|
called a G-machine. However, this machine is still not particularly
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 7 - Runtime
|
title: Compiling a Functional Language Using C++, Part 7 - Runtime
|
||||||
date: 2019-08-06T14:26:38-07:00
|
date: 2019-08-06T14:26:38-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we implement the supporting code that will be shared between all executables our compiler will create."
|
||||||
---
|
---
|
||||||
Wikipedia has the following definition for a __runtime__:
|
Wikipedia has the following definition for a __runtime__:
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 8 - LLVM
|
title: Compiling a Functional Language Using C++, Part 8 - LLVM
|
||||||
date: 2019-10-30T22:16:22-07:00
|
date: 2019-10-30T22:16:22-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we enable our compiler to convert G-machine instructions to LLVM IR, which finally allows us to generate working executables."
|
||||||
---
|
---
|
||||||
|
|
||||||
We don't want a compiler that can only generate code for a single
|
We don't want a compiler that can only generate code for a single
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 9 - Garbage Collection
|
title: Compiling a Functional Language Using C++, Part 9 - Garbage Collection
|
||||||
date: 2020-02-10T19:22:41-08:00
|
date: 2020-02-10T19:22:41-08:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we implement a garbage collector that frees memory no longer used by the executables our compiler creates."
|
||||||
---
|
---
|
||||||
|
|
||||||
> "When will you learn? When will you learn that __your actions have consequences?__"
|
> "When will you learn? When will you learn that __your actions have consequences?__"
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
title: Compiling a Functional Language Using C++, Part 10 - Polymorphism
|
title: Compiling a Functional Language Using C++, Part 10 - Polymorphism
|
||||||
date: 2020-03-25T17:14:20-07:00
|
date: 2020-03-25T17:14:20-07:00
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we extend our compiler's typechecking algorithm to implement the Hindley-Milner type system, allowing for polymorphic functions."
|
||||||
---
|
---
|
||||||
|
|
||||||
[In part 8]({{< relref "08_compiler_llvm.md" >}}), we wrote some pretty interesting programs in our little language.
|
[In part 8]({{< relref "08_compiler_llvm.md" >}}), we wrote some pretty interesting programs in our little language.
|
||||||
@@ -777,6 +778,6 @@ While this is a major success, we are not yet done. Although our functions can n
|
|||||||
have polymorphic types, the same cannot be said for our data types! We want to
|
have polymorphic types, the same cannot be said for our data types! We want to
|
||||||
have lists of integers __and__ lists of booleans, without having to duplicate any code!
|
have lists of integers __and__ lists of booleans, without having to duplicate any code!
|
||||||
While this also falls into the category of polymorphism, this post has already gotten very long,
|
While this also falls into the category of polymorphism, this post has already gotten very long,
|
||||||
and we will return to it in the near future. Once we're done with that, I still intend
|
and we will return to it in [part 11]({{< relref "11_compiler_polymorphic_data_types.md" >}}). Once we're done with that, I still intend
|
||||||
to go over `let/in` expressions, __lambda functions__, and __Input/Output__ together with
|
to go over `let/in` expressions, __lambda functions__, and __Input/Output__ together with
|
||||||
__strings__. See you in these future posts!
|
__strings__.
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
---
|
---
|
||||||
title: Compiling a Functional Language Using C++, Part 11 - Polymorphic Data Types
|
title: Compiling a Functional Language Using C++, Part 11 - Polymorphic Data Types
|
||||||
date: 2020-03-28T20:10:35-07:00
|
date: 2020-04-14T19:05:42-07:00
|
||||||
draft: true
|
|
||||||
tags: ["C and C++", "Functional Languages", "Compilers"]
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we enable our compiler to understand polymorphic data types."
|
||||||
---
|
---
|
||||||
[In part 10]({{< relref "10_compiler_polymorphism.md" >}}), we managed to get our
|
[In part 10]({{< relref "10_compiler_polymorphism.md" >}}), we managed to get our
|
||||||
compiler to accept functions that were polymorphically typed. However, a piece
|
compiler to accept functions that were polymorphically typed. However, a piece
|
||||||
@@ -99,8 +99,302 @@ Let's now enumerate all the possible forms that (mono)types can take in our syst
|
|||||||
It is convenient to treat regular types (like \(\text{Bool}\)) as
|
It is convenient to treat regular types (like \(\text{Bool}\)) as
|
||||||
type constructors of arity 0 (that is, type constructors with kind \(*\)).
|
type constructors of arity 0 (that is, type constructors with kind \(*\)).
|
||||||
In effect, they take zero arguments and produce types (themselves).
|
In effect, they take zero arguments and produce types (themselves).
|
||||||
{{< /sidenote >}} such as \\(\\text{List} \; \\text{Int}\\) or \\(\\text{Bool}\\).
|
{{< /sidenote >}} such as \\(\\text{List} \\; \\text{Int}\\) or \\(\\text{Bool}\\).
|
||||||
3. A function from one type to another, like \\(\\text{List} \\; a \\rightarrow \\text{Int}\\).
|
3. A function from one type to another, like \\(\\text{List} \\; a \\rightarrow \\text{Int}\\).
|
||||||
|
|
||||||
Polytypes (type schemes) in our system can be all of the above, but may also include a "forall"
|
Polytypes (type schemes) in our system can be all of the above, but may also include a "forall"
|
||||||
quantifier at the front, generalizing the type (like \\(\\forall a \\; . \\; \\text{List} \\; a \\rightarrow \\text{Int}\\)).
|
quantifier at the front, generalizing the type (like \\(\\forall a \\; . \\; \\text{List} \\; a \\rightarrow \\text{Int}\\)).
|
||||||
|
|
||||||
|
Let's start implementing all of this. Why don't we start with the change to the syntax of our language?
|
||||||
|
We have complicated the situation quite a bit. Let's take a look at the _old_ grammar
|
||||||
|
for data type declarations (this is going back as far as [part 2]({{< relref "02_compiler_parsing.md" >}})).
|
||||||
|
Here, \\(L\_D\\) is the nonterminal for the things that go between the curly braces of a data type
|
||||||
|
declaration, \\(D\\) is the nonterminal representing a single constructor definition,
|
||||||
|
and \\(L\_U\\) is a list of zero or more uppercase variable names:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\begin{aligned}
|
||||||
|
L_D & \rightarrow D \; , \; L_D \\
|
||||||
|
L_D & \rightarrow D \\
|
||||||
|
D & \rightarrow \text{upperVar} \; L_U \\
|
||||||
|
L_U & \rightarrow \text{upperVar} \; L_U \\
|
||||||
|
L_U & \rightarrow \epsilon
|
||||||
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
This grammar was actually too simple even for our monomorphically typed language!
|
||||||
|
Since functions are not represented using a single uppercase variable, it wasn't possible for us
|
||||||
|
to define constructors that accept as arguments anything other than integers and user-defined
|
||||||
|
data types. Now, we also need to modify this grammar to allow for constructor applications (which can be nested).
|
||||||
|
To do all of these things, we will define a new nonterminal, \\(Y\\), for types:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\begin{aligned}
|
||||||
|
Y & \rightarrow N \; ``\rightarrow" Y \\
|
||||||
|
Y & \rightarrow N
|
||||||
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
We make it right-recursive (because the \\(\\rightarrow\\) operator is right-associative). Next, we define
|
||||||
|
a nonterminal for all types _except_ those constructed with the arrow, \\(N\\).
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\begin{aligned}
|
||||||
|
N & \rightarrow \text{upperVar} \; L_Y \\
|
||||||
|
N & \rightarrow \text{typeVar} \\
|
||||||
|
N & \rightarrow ( Y )
|
||||||
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
The first of the above rules allows a type to be a constructor applied to zero or more arguments
|
||||||
|
(generated by \\(L\_Y\\)). The second rule allows a type to be a placeholder type variable. Finally,
|
||||||
|
the third rule allows for any type (including functions, again) to occur between parentheses.
|
||||||
|
This is so that higher-order functions, like \\((a \rightarrow b) \rightarrow a \rightarrow a \\),
|
||||||
|
can be represented.
|
||||||
|
|
||||||
|
Unfortunately, the definition of \\(L\_Y\\) is not as straightforward as we imagine. We could define
|
||||||
|
it as just a list of \\(Y\\) nonterminals, but this would make the grammar ambigous: something
|
||||||
|
like `List Maybe Int` could be interpreted as "`List`, applied to types `Maybe` and `Int`", and
|
||||||
|
"`List`, applied to type `Maybe Int`". To avoid this, we define a "type list element" \\(Y'\\),
|
||||||
|
which does not take arguments:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\begin{aligned}
|
||||||
|
Y' & \rightarrow \text{upperVar} \\
|
||||||
|
Y' & \rightarrow \text{lowerVar} \\
|
||||||
|
Y' & \rightarrow ( Y )
|
||||||
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
We then make \\(L\_Y\\) a list of \\(Y'\\):
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\begin{aligned}
|
||||||
|
L_Y & \rightarrow Y' \; L_Y \\
|
||||||
|
L_Y & \rightarrow \epsilon
|
||||||
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
Finally, we update the rules for the data type declaration, as well as for a single
|
||||||
|
constructor. In these new rules, we use \\(L\_T\\) to mean a list of type variables.
|
||||||
|
The rules are as follows:
|
||||||
|
|
||||||
|
{{< latex >}}
|
||||||
|
\begin{aligned}
|
||||||
|
T & \rightarrow \text{data} \; \text{upperVar} \; L_T = \{ L_D \} \\
|
||||||
|
D & \rightarrow \text{upperVar} \; L_Y \\
|
||||||
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
Those are all the changes we have to make to our grammar. Let's now move on to implementing
|
||||||
|
the corresponding data structures. We define a new family of structs, which represent types as they are
|
||||||
|
received from the parser. These differ from regular types in that they
|
||||||
|
do not necessarily represent valid types; validating types requires two passes, whereas parsing is
|
||||||
|
done in a single pass. We can define our parsed types as follows:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/11/parsed_type.hpp" >}}
|
||||||
|
|
||||||
|
We define the conversion method `to_type`, which requires
|
||||||
|
a set of type variables that are allowed to occur within a parsed
|
||||||
|
type (which are the variables specified on the left of the `=`
|
||||||
|
in the data type declaration syntax), and the environment in which to
|
||||||
|
look up the arities of any type constructors. The implementation is as follows:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/11/parsed_type.cpp" >}}
|
||||||
|
|
||||||
|
Note that this definition requires a new `type` subclass, `type_app`, which
|
||||||
|
represents type application. Unlike `parsed_type_app`, it stores a pointer
|
||||||
|
to the type constructor being applied, rather than its name. This
|
||||||
|
helps validate the type (by making sure the parsed type's name refers to
|
||||||
|
an existing type constructor), and lets us gather information like
|
||||||
|
which constructors the resulting type has. We define this new type as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/type.hpp" 70 78 >}}
|
||||||
|
|
||||||
|
With our new data structures in hand, we can now update the grammar in our Bison file.
|
||||||
|
First things first, we'll add the type parameters to the data type definition:
|
||||||
|
|
||||||
|
{{< codelines "plaintext" "compiler/11/parser.y" 127 130 >}}
|
||||||
|
|
||||||
|
Next, we add the new grammar rules we came up with:
|
||||||
|
|
||||||
|
{{< codelines "plaintext" "compiler/11/parser.y" 138 163 >}}
|
||||||
|
|
||||||
|
Note in the above rules that even for `typeListElement`, which
|
||||||
|
can never be applied to any arguments, we still attach a `parsed_type_app`
|
||||||
|
as the semantic value. This is for consistency; it's easier to view
|
||||||
|
all types in our system as applications to zero or more arguments,
|
||||||
|
than to write coercions from non-applied types to types applied to zero
|
||||||
|
arguments.
|
||||||
|
|
||||||
|
Finally, we define the types for these new rules at the top of the file:
|
||||||
|
|
||||||
|
{{< codelines "plaintext" "compiler/11/parser.y" 43 44 >}}
|
||||||
|
|
||||||
|
This concludes our work on the parser, but opens up a whole can of worms
|
||||||
|
elsewhere. First of all, now that we introduced a new `type` subclass, we must
|
||||||
|
ensure that type unification still works as intended. We therefore have
|
||||||
|
to adjust the `type_mgr::unify` method:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/type.cpp" 95 132 >}}
|
||||||
|
|
||||||
|
In the above snippet, we add a new if-statement that checks whether or
|
||||||
|
not both types being unified are type applications, and if so, unifies
|
||||||
|
their constructors and arguments. We also extend our type equality check
|
||||||
|
to ensure that both the names _and_ arities of types match
|
||||||
|
{{< sidenote "right" "type-equality-note" "when they are compared for equality." >}}
|
||||||
|
This is actually a pretty silly measure. Consider the following three
|
||||||
|
propositions:
|
||||||
|
1) types are only declared at the top-level scope.
|
||||||
|
2) if a type is introduced, and another type with that name already exists, we throw an error.
|
||||||
|
3) for name equality to be insufficient, we need to have two declared types
|
||||||
|
with the same name. Given these propositions, it will not be possible for us to
|
||||||
|
declare two types that would confuse the name equality check. However,
|
||||||
|
in the near future, these propositions may not all hold: if we allow
|
||||||
|
<code>let/in</code> expressions to contain data type definitions,
|
||||||
|
it will be possible to declare two types with the same name and arity
|
||||||
|
(in different scopes), which would <em>still</em> confuse the check.
|
||||||
|
In the future, if this becomes an issue, we will likely move to unique
|
||||||
|
type identifiers.
|
||||||
|
{{< /sidenote >}} Note also the more basic fact that we added arity
|
||||||
|
to our `type_base`,
|
||||||
|
{{< sidenote "left" "base-arity-note" "since it may now be a type constructor instead of a plain type." >}}
|
||||||
|
You may be wondering, why did we add arity to base types, rather than data types?
|
||||||
|
Although so far, our language can only create type constructors from data type definitions,
|
||||||
|
it's possible (or even likely) that we will have
|
||||||
|
polymorphic built-in types, such as
|
||||||
|
<a href="https://www.haskell.org/tutorial/io.html">the IO monad</a>.
|
||||||
|
To prepare for this, we will allow our base types to be type constructors too.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
Jut as we change `type_mgr::unify`, we need to change `type_mgr::find_free`
|
||||||
|
to include the new case of `type_app`. The adjusted function looks as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/type.cpp" 174 187 >}}
|
||||||
|
|
||||||
|
There is another adjustment that we have to make to our type code. Recall
|
||||||
|
that we had code that implemented substitutions: replacing free variables
|
||||||
|
with other types to properly implement our type schemes. There
|
||||||
|
was a bug in that code, which becomes much more apparent when the substitution
|
||||||
|
system is put under more pressure. Specifically, the bug was in how type
|
||||||
|
variables were handled.
|
||||||
|
|
||||||
|
The old substitution code, when it found that a type
|
||||||
|
variable had been bound to another type, always moved on to perform
|
||||||
|
a substitution in that other type. This wasn't really a problem then, since
|
||||||
|
any type variables that needed to be substituted were guaranteed to be
|
||||||
|
free (that's why they were put into the "forall" quantifier). However, with our
|
||||||
|
new system, we are using user-provided type variables (usually `a`, `b`, and so on),
|
||||||
|
which have likely already been used by our compiler internally, and thus have
|
||||||
|
been bound to something. That something is irrelevant to us: when we
|
||||||
|
perform a substitution on a user-defined data type, we _know_ that _our_ `a` is
|
||||||
|
free, and should be substitited. In short, precedence should be given to
|
||||||
|
substituting type variables, rather than resolving them to what they are bound to.
|
||||||
|
|
||||||
|
To make this adjustment possible, we need to make `substitute` a method of `type_manager`,
|
||||||
|
since it will now require an awareness of existing type bindings. Additionally,
|
||||||
|
this method will now perform its own type resolution, checking if a type variable
|
||||||
|
needs to be substitited between each step. The whole code is as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/type.cpp" 134 165 >}}
|
||||||
|
|
||||||
|
That's all for types. Definitions, though, need some work. First of all,
|
||||||
|
we've changed our parser to feed our `constructor` type a vector of
|
||||||
|
`parsed_type_ptr`, rather than `std::string`. We therefore have to update
|
||||||
|
`constructor` to receive and store this new vector:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/definition.hpp" 13 20 >}}
|
||||||
|
|
||||||
|
Similarly, `definition_data` itself needs to accept the list of type
|
||||||
|
variables it has:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/definition.hpp" 54 70 >}}
|
||||||
|
|
||||||
|
We then look at `definition_data::insert_constructors`, which converts
|
||||||
|
`constructor` instances to actual constructor functions. The code,
|
||||||
|
which is getting pretty complciated, is as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/definition.cpp" 64 92 >}}
|
||||||
|
|
||||||
|
In the above snippet, we do the following things:
|
||||||
|
|
||||||
|
1. We first create a set of type variables that can occur
|
||||||
|
in this type's constructors (the same set that's used
|
||||||
|
by the `to_type` method we saw earlier). While doing this, we ensure
|
||||||
|
a type variable is not used twice (this is not allowed), and add each
|
||||||
|
type variable to the final return type (which is something like `List a`),
|
||||||
|
in the order they occur.
|
||||||
|
2. When the variables have been gathered into a set, we iterate
|
||||||
|
over all constructors, and convert them into types by calling `to_type`
|
||||||
|
on their arguments, then assembling the resulting argument types into a function.
|
||||||
|
This is not enough, however,
|
||||||
|
{{< sidenote "right" "type-variables-note" "since constructors of types that accept type variables are polymorphic," >}}
|
||||||
|
This is also not enough because without generalization using "forall", we are risking using type variables
|
||||||
|
that have already been bound, or that will be bound. Even if <code>a</code> has not yet been used by the typechecker,
|
||||||
|
it will be once the type manager generates its first type variable, and things will go south. If we, for some reason,
|
||||||
|
wanted type constructors to be monomorphic (but generic, with type variables) we'd need to internally
|
||||||
|
instnatiate fresh type variables for every user-defined type variable, and substitute them appropriately.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
as we have discussed above with \\(\\text{Nil}\\) and \\(\\text{Cons}\\).
|
||||||
|
To accomodate for this, we also add all type variables to the "forall" quantifier
|
||||||
|
of a new type scheme, whose monotype is our newly assembled function type. This
|
||||||
|
type scheme is what we store as the type of the constructor.
|
||||||
|
|
||||||
|
This was the last major change we have to perform. The rest is cleanup: we have switched
|
||||||
|
our system to dealing with type applications (sometimes with zero arguments), and we must
|
||||||
|
bring the rest of the compiler up to speed with this change. For instance, we update
|
||||||
|
`ast_int` to create a reference to an existing integer type during typechecking:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/ast.cpp" 20 22 >}}
|
||||||
|
|
||||||
|
Similarly, we update our code in `typecheck_program` to use
|
||||||
|
type applications in the type for binary operations:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/main.cpp" 31 37 >}}
|
||||||
|
|
||||||
|
Finally, we update `ast_case` to unwrap type applications to get the needed constructor
|
||||||
|
data from `type_data`. This has to be done in `ast_case::typecheck`, as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/ast.cpp" 163 168 >}}
|
||||||
|
|
||||||
|
Additionally, a similar change needs to be made in `ast_case::compile`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/ast.cpp" 174 175 >}}
|
||||||
|
|
||||||
|
That should be all! Let's try an example:
|
||||||
|
|
||||||
|
{{< rawblock "compiler/11/examples/works3.txt" >}}
|
||||||
|
|
||||||
|
The output:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 6
|
||||||
|
```
|
||||||
|
|
||||||
|
Yay! Not only were we able to define a list of any type, but our `length` function correctly
|
||||||
|
determined the lengths of two lists of different types! Let's try an example with the
|
||||||
|
classic [`fold` functions](http://learnyouahaskell.com/higher-order-functions#folds):
|
||||||
|
|
||||||
|
{{< rawblock "compiler/11/examples/list.txt" >}}
|
||||||
|
|
||||||
|
We expect the sum of the list `[1,2,3,4]` to be `10`, and its length to be `4`, so the sum
|
||||||
|
of the two should be `14`. And indeed, our program agrees:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 14
|
||||||
|
```
|
||||||
|
|
||||||
|
Let's do one more example, to test types that take more than one type parameter:
|
||||||
|
|
||||||
|
{{< rawblock "compiler/11/examples/pair.txt" >}}
|
||||||
|
|
||||||
|
Once again, the compiled program gives the expected result:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 4
|
||||||
|
```
|
||||||
|
|
||||||
|
This looks good! We have added support for polymorphic data types to our compiler.
|
||||||
|
We are now free to move on to `let/in` expressions, __lambda functions__, and __Input/Output__,
|
||||||
|
as promised, starting with [part 12]({{< relref "12_compiler_let_in_lambda/index.md" >}}) - `let/in`
|
||||||
|
and lambdas!
|
||||||
|
|||||||
BIN
content/blog/12_compiler_let_in_lambda/fig_colored.png
Executable file
|
After Width: | Height: | Size: 396 KiB |
BIN
content/blog/12_compiler_let_in_lambda/fig_colored_ordered.png
Executable file
|
After Width: | Height: | Size: 457 KiB |
BIN
content/blog/12_compiler_let_in_lambda/fig_graph.png
Executable file
|
After Width: | Height: | Size: 117 KiB |
BIN
content/blog/12_compiler_let_in_lambda/fig_subgraphs.png
Executable file
|
After Width: | Height: | Size: 137 KiB |
BIN
content/blog/12_compiler_let_in_lambda/fig_subgraphs_colored.png
Executable file
|
After Width: | Height: | Size: 195 KiB |
BIN
content/blog/12_compiler_let_in_lambda/fig_subgraphs_colored_all.png
Executable file
|
After Width: | Height: | Size: 471 KiB |
BIN
content/blog/12_compiler_let_in_lambda/fig_subgraphs_colored_ordered.png
Executable file
|
After Width: | Height: | Size: 602 KiB |
988
content/blog/12_compiler_let_in_lambda/index.md
Normal file
@@ -0,0 +1,988 @@
|
|||||||
|
---
|
||||||
|
title: Compiling a Functional Language Using C++, Part 12 - Let/In and Lambdas
|
||||||
|
date: 2020-06-21T00:50:07-07:00
|
||||||
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
description: "In this post, we extend our language with let/in expressions and lambda functions."
|
||||||
|
---
|
||||||
|
|
||||||
|
Now that our language's type system is more fleshed out and pleasant to use, it's time to shift our focus to the ergonomics of the language itself. I've been mentioning `let/in` and __lambda__ expressions for a while now. The former will let us create names for expressions that are limited to a certain scope (without having to create global variable bindings), while the latter will allow us to create functions without giving them any name at all.
|
||||||
|
|
||||||
|
Let's take a look at `let/in` expressions first, to make sure we're all on the same page about what it is we're trying to implement. Let's start with some rather basic examples, and then move on to more complex ones. A very basic use of a `let/in` expression is, in Haskell:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
let x = 5 in x + x
|
||||||
|
```
|
||||||
|
|
||||||
|
In the above example, we bind the variable `x` to the value `5`, and then refer to `x` twice in the expression after the `in`. The whole snippet is one expression, evaluating to what the `in` part evaluates to. Additionally, the variable `x` does not escape the expression -
|
||||||
|
{{< sidenote "right" "used-note" "it cannot be used anywhere else." >}}
|
||||||
|
Unless, of course, you bind it elsewhere; naturally, using <code>x</code> here does not forbid you from re-using the variable.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
Now, consider a slightly more complicated example:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
let sum xs = foldl (+) 0 xs in sum [1,2,3]
|
||||||
|
```
|
||||||
|
|
||||||
|
Here, we're defining a _function_ `sum`,
|
||||||
|
{{< sidenote "right" "eta-note" "which takes a single argument:" >}}
|
||||||
|
Those who favor the
|
||||||
|
<a href="https://en.wikipedia.org/wiki/Tacit_programming#Functional_programming">point-free</a>
|
||||||
|
programming style may be slightly twitching right now, the words <em>eta reduction</em> swirling in their mind. What do you know, <code>fold</code>-based <code>sum</code> is even one of the examples on the Wikipedia page! I assure you, I left the code as you see it deliberately, to demonstrate a principle.
|
||||||
|
{{< /sidenote >}} the list to be summed. We will want this to be valid in our language, as well. We will soon see how this particular feature is related to lambda functions, and why I'm covering these two features in the same post.
|
||||||
|
|
||||||
|
Let's step up the difficulty a bit more, with an example that,
|
||||||
|
{{< sidenote "left" "translate-note" "though it does not immediately translate to our language," >}}
|
||||||
|
The part that doesn't translate well is the whole deal with patterns in function arguments, as well as the notion of having more than one equation for a single function, as is the case with <code>safeTail</code>.
|
||||||
|
<br><br>
|
||||||
|
It's not that these things are <em>impossible</em> to translate; it's just that translating them may be worthy of a post in and of itself, and would only serve to bloat and complicate this part. What can be implemented with pattern arguments can just as well be implemented using regular case expressions; I dare say most "big" functional languages actually just convert from the former to the latter as part of the compillation process.
|
||||||
|
{{< /sidenote >}} illustrates another important principle:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
let
|
||||||
|
safeTail [] = Nothing
|
||||||
|
safeTail [x] = Just x
|
||||||
|
safeTail (_:xs) = safeTail xs
|
||||||
|
myTail = safeTail [1,2,3,4]
|
||||||
|
in
|
||||||
|
myTail
|
||||||
|
```
|
||||||
|
|
||||||
|
The principle here is that definitions in `let/in` can be __recursive and polymorphic__. Remember the note in
|
||||||
|
[part 10]({{< relref "10_compiler_polymorphism.md" >}}) about
|
||||||
|
[let-polymorphism](https://en.wikipedia.org/wiki/Hindley%E2%80%93Milner_type_system#Let-polymorphism)? This is it: we're allowing polymorphic variable bindings, but only when they're bound in a `let/in` expression (or at the top level).
|
||||||
|
|
||||||
|
The principles demonstrated by the last two snippets mean that compiling `let/in` expressions, at least with the power we want to give them, will require the same kind of dependency analysis we had to go through when we implemented polymorphically typed functions. That is, we will need to analyze which functions calls which other functions, and typecheck the callees before the callers. We will continue to represent callee-caller relationships using a dependency graph, in which nodes represent functions, and an edge from one function node to another means that the former function calls the latter. Below is an image of one such graph:
|
||||||
|
|
||||||
|
{{< figure src="fig_graph.png" caption="Example dependency graph without `let/in` expressions." >}}
|
||||||
|
|
||||||
|
Since we want to typecheck callees first, we effectively want to traverse the graph in reverse
|
||||||
|
topological order. However, there's a slight issue: a topological order is only defined for acyclic graphs, and it is very possible for functions in our language to mutually call each other. To deal with this, we have to find groups of mutually recursive functions, and and treat them as a single unit, thereby eliminating cycles. In the above graph, there are two groups, as follows:
|
||||||
|
|
||||||
|
{{< figure src="fig_colored_ordered.png" caption="Previous depndency graph with mutually recursive groups highlighted." >}}
|
||||||
|
|
||||||
|
As seen in the second image, according to the reverse topological order of the given graph, we will typecheck the blue group containing three functions first, since the sole function in the orange group calls one of the blue functions.
|
||||||
|
|
||||||
|
Things are more complicated now that `let/in` expressions are able to introduce their own, polymorphic and recursive declarations. However, there is a single invariant we can establish: function definitions can only depend on functions defined at the same time as them. That is, for our purposes, functions declared in the global scope can only depend on other functions declared in the global scope, and functions declared in a `let/in` expression can only depend on other functions declared in that same expression. That's not to say that a function declared in a `let/in` block inside some function `f` can't call another globally declared function `g` - rather, we allow this, but treat the situation as though `f` depends on `g`. In contrast, it's not at all possible for a global function to depend on a local function, because bindings created in a `let/in` expression do not escape the expression itself. This invariant tells us that in the presence of nested function definitions, the situation looks like this:
|
||||||
|
|
||||||
|
{{< figure src="fig_subgraphs.png" caption="Previous depndency graph augmented with `let/in` subgraphs." >}}
|
||||||
|
|
||||||
|
In the above image, some of the original nodes in our graph now contain other, smaller graphs. Those subgraphs are the graphs created by function declarations in `let/in` expressions. Just like our top-level nodes, the nodes of these smaller graphs can depend on other nodes, and even form cycles. Within each subgraph, we will have to perform the same kind of cycle detection, resulting in something like this:
|
||||||
|
|
||||||
|
{{< figure src="fig_subgraphs_colored_all.png" caption="Augmented dependency graph with mutually recursive groups highlighted." >}}
|
||||||
|
|
||||||
|
When typechecking a function, we must be ready to perform dependency analysis at any point. What's more is that the free variable analysis we used to perform must now be extended to differentiate between free variables that refer to "nearby" definitions (i.e. within the same `let/in` expression), and "far away" definitions (i.e. outside of the `let/in` expression). And speaking of free variables...
|
||||||
|
|
||||||
|
What do we do about variables that are captured by a local definition? Consider the following snippet:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
addToAll n xs = map addSingle xs
|
||||||
|
where
|
||||||
|
addSingle x = n + x
|
||||||
|
```
|
||||||
|
|
||||||
|
In the code above, the variable `n`, bound on line 1, is used by `addSingle` on line 3. When a function refers to variables bound outside of itself (as `addSingle` does), it is said to be _capturing_ these variables, and the function is called a _closure_. Why does this matter? On the machine level, functions are represented as sequences of instructions, and there's a finite number of them (as there is finite space on the machine). But there is an infinite number of `addSingle` functions! When we write `addToAll 5 [1,2,3]`, `addSingle` becomes `5+x`. When, on the other hand, we write `addToAll 6 [1,2,3]`, `addSingle` becomes `6+x`. There are certain ways to work around this - we could, for instance, dynamically create machine code in memory, and then execute it (this is called [just-in-time compilation](https://en.wikipedia.org/wiki/Just-in-time_compilation)). This would end up with a collections of runtime-defined functions that can be represented as follows:
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
-- Version of addSingle when n = 5
|
||||||
|
addSingle5 x = 5 + x
|
||||||
|
|
||||||
|
-- Version of addSingle when n = 6
|
||||||
|
addSingle6 x = 6 + x
|
||||||
|
|
||||||
|
-- ... and so on ...
|
||||||
|
```
|
||||||
|
|
||||||
|
But now, we end up creating several functions with almost identical bodies, with the exception of the free variables themselves. Wouldn't it be better to perform the well-known strategy of reducing code duplication by factoring out parameters, and leaving only one instance of the repeated code? We would end up with:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
addToAll n xs = map (addSingle n) xs
|
||||||
|
addSingle n x = n + x
|
||||||
|
```
|
||||||
|
|
||||||
|
Observe that we no longer have the "infinite" number of functions - the infinitude of possible behaviors is created via currying. Also note that `addSingle`
|
||||||
|
{{< sidenote "right" "global-note" "is now declared at the global scope," >}}
|
||||||
|
Wait a moment, didn't we just talk about nested polymorphic definitions, and how they change our typechecking model? If we transform our program into a bunch of global definitions, we don't need to make adjustments to our typechecking. <br><br>
|
||||||
|
This is true, but why should we perform transformations on a malformed program? Typechecking before pulling functions to the global scope will help us save the work, and breaking down one dependency-searching problem (which is \(O(n^3)\) thanks to Warshall's) into smaller, independent problems may even lead to better performance. Furthermore, typechecking before program transformations will help us come up with more helpful error messages.
|
||||||
|
{{< /sidenote >}} and can be transformed into a sequence of instructions just like any other global function. It has been pulled from its `where` (which, by the way, is pretty much equivalent to a `let/in`) to the top level.
|
||||||
|
|
||||||
|
Now, see how `addSingle` became `(addSingle n)`? If we chose to rewrite the
|
||||||
|
program this way, we'd have to find-and-replace every instance of `addSingle`
|
||||||
|
in the function body, which would be tedious and require us to keep
|
||||||
|
track of shadowed variables and the like. Also, what if we used a local
|
||||||
|
definition twice in the original piece of code? How about something like this:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
fourthPower x = square * square
|
||||||
|
where
|
||||||
|
square = x * x
|
||||||
|
```
|
||||||
|
|
||||||
|
Applying the strategy we saw above, we get:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
fourthPower x = (square x) * (square x)
|
||||||
|
square x = x * x
|
||||||
|
```
|
||||||
|
|
||||||
|
This is valid, except that in our evaluation model, the two instances
|
||||||
|
of `(square x)` will be built independently of one another, and thus,
|
||||||
|
will not be shared. This, in turn, will mean that `square` will be called
|
||||||
|
twice, which is not what we would expect from looking at the original program.
|
||||||
|
This isn't good. Instead, why don't we keep the `where`, but modify it
|
||||||
|
as follows:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
fourthPower x = square * square
|
||||||
|
where square = square' x
|
||||||
|
square' x = x * x
|
||||||
|
```
|
||||||
|
|
||||||
|
This time, assuming we can properly implement `where`, the call to
|
||||||
|
`square' x` should only occur once. Though I've been using `where`,
|
||||||
|
which leads to less clutter in Haskell code, the exact same approach applies
|
||||||
|
to `let/in`, and that's what we'll be using in our language.
|
||||||
|
|
||||||
|
This technique of replacing captured variables with arguments, and pulling closures into the global scope to aid compilation, is called [Lambda Lifting](https://en.wikipedia.org/wiki/Lambda_lifting). Its name is no coincidence - lambda functions need to undergo the same kind of transformation as our nested definitions (unlike nested definitions, though, lambda functions need to be named). This is why they are included in this post together with `let/in`!
|
||||||
|
|
||||||
|
What are lambda functions, by the way? A lambda function is just a function
|
||||||
|
expression that doesn't have a name. For example, if we had Haskell code like
|
||||||
|
this:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
double x = x + x
|
||||||
|
doubleList xs = map double xs
|
||||||
|
```
|
||||||
|
|
||||||
|
We could rewrite it using a lambda function as follows:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
doubleList xs = map (\x -> x + x) xs
|
||||||
|
```
|
||||||
|
|
||||||
|
As you can see, a lambda is an expression in the form `\x -> y` where `x` can
|
||||||
|
be any variable and `y` can be any expression (including another lambda).
|
||||||
|
This represents a function that, when applied to a value `x`, will perform
|
||||||
|
the computation given by `y`. Lambdas are useful when creating single-use
|
||||||
|
functions that we don't want to make globally available.
|
||||||
|
|
||||||
|
Lifting lambda functions will effectively rewrite our program in the
|
||||||
|
opposite direction to the one shown, replacing the lambda with a reference
|
||||||
|
to a global declaration which will hold the function's body. Just like
|
||||||
|
with `let/in`, we will represent captured variables using arguments
|
||||||
|
and partial appliciation. For instance, when starting with:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
addToAll n xs = map (\x -> n + x) xs
|
||||||
|
```
|
||||||
|
|
||||||
|
We would output the following:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
addToAll n xs = map (lambda n) xs
|
||||||
|
lambda n x = n + x
|
||||||
|
```
|
||||||
|
|
||||||
|
### Implementation
|
||||||
|
Now that we understand what we have to do, it's time to jump straight into
|
||||||
|
doing it. First, we need to refactor our current code to allow for the changes
|
||||||
|
we're going to make; then, we will use the new tools we defined to implement `let/in` expressions and lambda functions.
|
||||||
|
|
||||||
|
#### Infrastructure Changes
|
||||||
|
When finding captured variables, the notion of _free variables_ once again
|
||||||
|
becomes important. Recall that a free variable in an expression is a variable
|
||||||
|
that is defined outside of that expression. Consider, for example, the
|
||||||
|
expression:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
let x = 5 in x + y
|
||||||
|
```
|
||||||
|
|
||||||
|
In this expression, `x` is _not_ a free variable, since it's defined
|
||||||
|
in the `let/in` expression. On the other hand, `y` _is_ a free variable,
|
||||||
|
since it's not defined locally.
|
||||||
|
|
||||||
|
The algorithm that we used for computing free variables was rather biased.
|
||||||
|
Previously, we only cared about the difference between a local variable
|
||||||
|
(defined somewhere in a function's body, or referring to one of the function's
|
||||||
|
parameters) and a global variable (referring to a global function).
|
||||||
|
This shows in our code for `find_free`. Consider, for example, this snippet:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/ast.cpp" 33 36 >}}
|
||||||
|
|
||||||
|
We created bindings in our type environment whenever we saw a new variable
|
||||||
|
being introduced, which led us to only count variables that we did not bind
|
||||||
|
_anywhere_ as 'free'. This approach is no longer sufficient. Consider,
|
||||||
|
for example, the following Haskell code:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
someFunction x =
|
||||||
|
let
|
||||||
|
y = x + 5
|
||||||
|
in
|
||||||
|
x*y
|
||||||
|
```
|
||||||
|
|
||||||
|
We can see that the variable `x` is introduced on line 1.
|
||||||
|
Thus, our current algorithm will happily store `x` in an environment,
|
||||||
|
and not count it as free. But clearly, the definition of `y` on line 3
|
||||||
|
captures `x`! If we were to lift `y` into global scope, we would need
|
||||||
|
to pass `x` to it as an argument. To fix this, we have to separate the creation
|
||||||
|
and assignment of type environments from free variable detection. Why
|
||||||
|
don't we start with `ast` and its descendants? Our signatures become:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
void ast::find_free(std::set<std::string>& into);
|
||||||
|
type_ptr ast::typecheck(type_mgr& mgr, type_env_ptr& env);
|
||||||
|
```
|
||||||
|
|
||||||
|
For the most part, the code remains unchanged. We avoid
|
||||||
|
using `env` (and `this->env`), and default to marking
|
||||||
|
any variable as a free variable:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 39 41 >}}
|
||||||
|
|
||||||
|
Since we no longer use the environment, we resort to an
|
||||||
|
alternative method of removing bound variables. Here's
|
||||||
|
`ast_case::find_free`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 169 181 >}}
|
||||||
|
|
||||||
|
For each branch, we find the free variables. However, we
|
||||||
|
want to avoid marking variables that were introduced through
|
||||||
|
pattern matching as free (they are not). Thus, we use `pattern::find_variables`
|
||||||
|
to see which of the variables were bound by that pattern,
|
||||||
|
and remove them from the list of free variables. We
|
||||||
|
can then safely add the list of free variables in the pattern to the overall
|
||||||
|
list of free variables. Other `ast` descendants experience largely
|
||||||
|
cosmetic changes (such as the removal of the `env` parameter).
|
||||||
|
|
||||||
|
Of course, we must implement `find_variables` for each of our `pattern`
|
||||||
|
subclasses. Here's what I got for `pattern_var`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 402 404 >}}
|
||||||
|
|
||||||
|
And here's an equally terse implementation for `pattern_constr`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 417 419 >}}
|
||||||
|
|
||||||
|
We also want to update `definition_defn` with this change. Our signatures
|
||||||
|
become:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
void definition_defn::find_free();
|
||||||
|
void definition_defn::insert_types(type_mgr& mgr, type_env_ptr& env, visibility v);
|
||||||
|
```
|
||||||
|
|
||||||
|
We'll get to the `visiblity` parameter later. The implementations
|
||||||
|
are fairly simple. Just like `ast_case`, we want to erase each function's
|
||||||
|
parameters from its list of free variables:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 13 18 >}}
|
||||||
|
|
||||||
|
Since `find_free` no longer creates any type bindings or environments,
|
||||||
|
this functionality is shouldered by `insert_types`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 20 32 >}}
|
||||||
|
|
||||||
|
Now that free variables are properly computed, we are able to move on
|
||||||
|
to bigger and better things.
|
||||||
|
|
||||||
|
#### Nested Definitions
|
||||||
|
At present, our code for typechecking the whole program is located in
|
||||||
|
`main.cpp`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/11/main.cpp" 43 61 >}}
|
||||||
|
|
||||||
|
This piece of code goes on. We now want this to be more general. Soon, `let/in`
|
||||||
|
expressions with bring with them definitions that are inside other definitions,
|
||||||
|
which will not be reachable at the top level. The fundamental topological
|
||||||
|
sorting algorithm, though, will remain the same. We can abstract a series
|
||||||
|
of definitions that need to be ordered and then typechecked into a new struct,
|
||||||
|
`definition_group`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.hpp" 73 83 >}}
|
||||||
|
|
||||||
|
This will be exactly like a list of `defn`/`data` definitions we have at the
|
||||||
|
top level, except now, it can also occur in other places, like `let/in`
|
||||||
|
expressions. Once again, ignore for the moment the `visibility` field.
|
||||||
|
|
||||||
|
The way we defined function ordering requires some extra work from
|
||||||
|
`definition_group`. Recall that conceptually, functions can only depend
|
||||||
|
on other functions defined in the same `let/in` expression, or, more generally,
|
||||||
|
in the same `definition_group`. This means that we now classify free variables
|
||||||
|
in definitions into two categories: free variables that refer to "nearby"
|
||||||
|
definitions (i.e. definitions in the same group) and free variables that refer
|
||||||
|
to "far away" definitions. The "nearby" variables will be used to do
|
||||||
|
topological ordering, while the "far away" variables can be passed along
|
||||||
|
further up, perhaps into an enclosing `let/in` expression (for which "nearby"
|
||||||
|
variables aren't actually free, since they are bound in the `let`). We
|
||||||
|
implement this partitioning of variables in `definition_group::find_free`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 94 105 >}}
|
||||||
|
|
||||||
|
Notice that we have added a new `nearby_variables` field to `definition_defn`.
|
||||||
|
This is used on line 101, and will be once again used in `definition_group::typecheck`. Speaking of `typecheck`, let's look at its definition:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 107 145 >}}
|
||||||
|
|
||||||
|
This function is a little long, but conceptually, each `for` loop
|
||||||
|
contains a step of the process:
|
||||||
|
|
||||||
|
* The first loop declares all data types, so that constructors can
|
||||||
|
be verified to properly reference them.
|
||||||
|
* The second loop creates all the data type constructors.
|
||||||
|
* The third loop adds edges to our dependency graph.
|
||||||
|
* The fourth loop performs typechecking on the now-ordered groups of mutually
|
||||||
|
recursive functions.
|
||||||
|
* The first inner loop inserts the types of all the functions into the environment.
|
||||||
|
* The second inner loop actually performs typechecking.
|
||||||
|
* The third inner loop makes as many things polymorphic as possible.
|
||||||
|
|
||||||
|
We can now adjust our `parser.y` to use a `definition_group` instead of
|
||||||
|
two global vectors. First, we declare a global `definition_group`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/parser.y" 10 10 >}}
|
||||||
|
|
||||||
|
Then, we adjust `definitions` to create `definition_group`s:
|
||||||
|
|
||||||
|
{{< codelines "text" "compiler/12/parser.y" 59 68 >}}
|
||||||
|
|
||||||
|
We can now adjust `main.cpp` to use the global `definition_group`. Among
|
||||||
|
other changes (such as removing `extern` references to `vector`s, and updating
|
||||||
|
function signatures) we also update the `typecheck_program` function:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/main.cpp" 41 49 >}}
|
||||||
|
|
||||||
|
Now, our code is ready for typechecking nested definitions, but not for
|
||||||
|
compiling them. The main thing that we still have to address is the addition
|
||||||
|
of new definitions to the global scope. Let's take a look at that next.
|
||||||
|
|
||||||
|
#### Global Definitions
|
||||||
|
We want every function (and even non-function definitions that capture surrounding
|
||||||
|
variables), regardless of whether or not it was declared in the global scope,
|
||||||
|
to be processed and converted to LLVM code. The LLVM code conversion takes
|
||||||
|
several steps. First, the function's AST is translated into G-machine
|
||||||
|
instructions, which we covered in [part 5]({{< relref "05_compiler_execution.md" >}}),
|
||||||
|
by a process we covered in [part 6]({{< relref "06_compiler_compilation.md" >}}).
|
||||||
|
Then, an LLVM function is created for every function, and registered globally.
|
||||||
|
Finally, the G-machine instructions are converted into LLVM IR, which is
|
||||||
|
inserted into the previously created functions. These things
|
||||||
|
can't be done in a single pass: at the very least, we can't start translating
|
||||||
|
G-machine instructions into LLVM IR until functions are globally declared,
|
||||||
|
because we would otherwise have no means of referencing other functions. It
|
||||||
|
makes sense to me, then, to pull out all the 'global' definitions into
|
||||||
|
a single top-level list (perhaps somewhere in `main.cpp`).
|
||||||
|
|
||||||
|
Let's start implementing this with a new `global_scope` struct. This
|
||||||
|
struct will contain all of the global function and constructor definitions:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.hpp" 42 55 >}}
|
||||||
|
|
||||||
|
This struct will allow us to keep track of all the global definitions,
|
||||||
|
emitting them as we go, and then coming back to them as necessary.
|
||||||
|
There are also signs of another piece of functionality: `occurence_count`
|
||||||
|
and `mangle_name`. These two will be used to handle duplicate names.
|
||||||
|
|
||||||
|
We cannot have two global functions named the same thing, but we can
|
||||||
|
easily imagine a situation in which two separate `let/in` expressions define
|
||||||
|
a variable like `x`, which then needs to be lifted to the global scope. We
|
||||||
|
resolve such conflicts by slightly changing - "mangling" - the name of
|
||||||
|
one of the resulting global definitions. We allow the first global definition
|
||||||
|
to be named the same as it was originally (in our example, this would be `x`).
|
||||||
|
However, if we detect that a global definition `x` already exists (we
|
||||||
|
track this using `occurence_count`), we rename it to `x_1`. Subsequent
|
||||||
|
global definitions will end up being named `x_2`, `x_3`, and so on.
|
||||||
|
|
||||||
|
Alright, let's take a look at `global_function` and `global_constructor`.
|
||||||
|
Here's the former:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.hpp" 11 27 >}}
|
||||||
|
|
||||||
|
There's nothing really surprising here: all of the fields
|
||||||
|
are reminiscent of `definition_defn`, though some type-related variables
|
||||||
|
are missing. We also include the three compilation-related methods,
|
||||||
|
`compile`, `declare_llvm`, and `generate_llvm`, which were previously in `definition_defn`. Let's look at `global_constructor` now:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.hpp" 29 40 >}}
|
||||||
|
|
||||||
|
This maps pretty closely to a single `definition_data::constructor`.
|
||||||
|
There's a difference here that is not clear at a glance, though. Whereas
|
||||||
|
the `name` in a `definition_defn` or `definition_data` refers to the
|
||||||
|
name as given by the user in the code, the `name` of a `global_function`
|
||||||
|
or `global_constructor` has gone through mangling, and thus, should be
|
||||||
|
unique.
|
||||||
|
|
||||||
|
Let's now look at the implementation of these structs' methods. The methods
|
||||||
|
`add_function` and `add_constructor` are pretty straightforward. Here's
|
||||||
|
the former:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 39 43 >}}
|
||||||
|
|
||||||
|
And here's the latter:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 45 49 >}}
|
||||||
|
|
||||||
|
In both of these functions, we return a reference to the new global
|
||||||
|
definition we created. This helps us access the mangled `name` field,
|
||||||
|
and, in the case of `global_function`, inspect the `ast_ptr` that represents
|
||||||
|
its body.
|
||||||
|
|
||||||
|
Next, we have `global_scope::compile` and `global_scope::generate_llvm`,
|
||||||
|
which encapsulate these operations on all global definitions. Their
|
||||||
|
implementations are very straightforward, and are similar to the
|
||||||
|
`gen_llvm` function we used to have in our `main.cpp`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 51 67 >}}
|
||||||
|
|
||||||
|
Finally, we have `mangle`, which takes care of potentially duplicate
|
||||||
|
variable names:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 69 83 >}}
|
||||||
|
|
||||||
|
Let's move on to the global definition structs.
|
||||||
|
The `compile`, `declare_llvm`, and `generate_llvm` methods for
|
||||||
|
`global_function` are pretty much the same as those that we used to have
|
||||||
|
in `definition_defn`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 4 24 >}}
|
||||||
|
|
||||||
|
The same is true for `global_constructor` and its method `generate_llvm`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/global_scope.cpp" 26 37 >}}
|
||||||
|
|
||||||
|
Recall that in this case, we need not have two methods for declaring
|
||||||
|
and generating LLVM, since constructors don't reference other constructors,
|
||||||
|
and are always generated before any function definitions.
|
||||||
|
|
||||||
|
#### Visibility
|
||||||
|
Should we really be turning _all_ free variables in a function definition
|
||||||
|
into arguments? Consider the following piece of Haskell code:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
add x y = x + y
|
||||||
|
mul x y = x * y
|
||||||
|
something = mul (add 1 3) 3
|
||||||
|
```
|
||||||
|
|
||||||
|
In the definition of `something`, `mul` and `add` occur free.
|
||||||
|
A very naive lifting algorithm might be tempted to rewrite such a program
|
||||||
|
as follows:
|
||||||
|
|
||||||
|
```Haskell {linenos=table}
|
||||||
|
add x y = x + y
|
||||||
|
mul x y = x * y
|
||||||
|
something' add mul = mul (add 1 3) 3
|
||||||
|
something = something' add mul
|
||||||
|
```
|
||||||
|
|
||||||
|
But that's absurd! Not only are `add` and `mul` available globally,
|
||||||
|
but such a rewrite generates another definition with free variables,
|
||||||
|
which means we didn't really improve our program in any way. From this
|
||||||
|
example, we can see that we don't want to be turning reference to global
|
||||||
|
variables into function parameters. But how can we tell if a variable
|
||||||
|
we're trying to operate on is global or not? I propose a flag in our
|
||||||
|
`type_env`, which we'll augment to be used as a symbol table. To do
|
||||||
|
this, we update the implementation of `type_env` to map variables to
|
||||||
|
values of a struct `variable_data`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.hpp" 14 23 >}}
|
||||||
|
|
||||||
|
The `visibility` enum is defined as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.hpp" 11 11 >}}
|
||||||
|
|
||||||
|
As you can see from the above snippet, we also added a `mangled_name` field
|
||||||
|
to the new `variable_data` struct. We will be using this field shortly. We
|
||||||
|
also add a few methods to our `type_env`, and end up with the following:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.hpp" 32 45 >}}
|
||||||
|
|
||||||
|
We will come back to `find_free` and `find_free_except`, as well as
|
||||||
|
`set_mangled_name` and `get_mangled_name`. For now, we just adjust `bind` to
|
||||||
|
take a visibility parameter that defaults to `local`, and implement
|
||||||
|
`is_global`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.cpp" 27 32 >}}
|
||||||
|
|
||||||
|
Remember the `visibility::global` in `parser.y`? This is where that comes in.
|
||||||
|
Specifically, we recall that `definition_defn::insert_types` is responsible
|
||||||
|
for placing function types into the environment, making them accessible
|
||||||
|
during typechecking later. At this time, we already need to know whether
|
||||||
|
or not the definitions are global or local (so that we can create the binding).
|
||||||
|
Thus, we add `visibility` as a parameter to `insert_types`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.hpp" 44 44 >}}
|
||||||
|
|
||||||
|
Since we are now moving from manually wrangling definitions towards using
|
||||||
|
`definition_group`, we make it so that the group itself provides this
|
||||||
|
argument. To do this, we add the `visibility` field from before to it,
|
||||||
|
and set it in the parser. One more thing: since constructors never
|
||||||
|
capture variables, we can always move them straight to the global
|
||||||
|
scope, and thus, we'll always mark them with `visibility::global`.
|
||||||
|
|
||||||
|
#### Managing Mangled Names
|
||||||
|
Just mangling names is not enough. Consider the following program:
|
||||||
|
|
||||||
|
```text {linenos=table}
|
||||||
|
defn packOne x = {
|
||||||
|
let {
|
||||||
|
data Packed a = { Pack a }
|
||||||
|
} in {
|
||||||
|
Pack x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn packTwo x = {
|
||||||
|
let {
|
||||||
|
data Packed a = { Pack a }
|
||||||
|
} in {
|
||||||
|
Pack x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< sidenote "right" "lifting-types-note" "Lifting the data type declarations" >}}
|
||||||
|
We are actually not <em>quite</em> doing something like the following snippet.
|
||||||
|
The reason for this is that we don't mangle the names for types. I pointed
|
||||||
|
out this potential issue in a sidenote in the previous post. Since the size
|
||||||
|
of this post is already balooning, I will not deal with this issue here.
|
||||||
|
Even at the end of this post, our compiler will not be able to distinguish
|
||||||
|
between the two <code>Packed</code> types. We will hopefully get to it later.
|
||||||
|
{{< /sidenote >}} and their constructors into the global
|
||||||
|
scope gives us something like:
|
||||||
|
|
||||||
|
``` {linenos=table}
|
||||||
|
data Packed a = { Pack a }
|
||||||
|
data Packed_1 a = { Pack_1 a }
|
||||||
|
defn packOne x = { Pack x }
|
||||||
|
defn packTwo x = { Pack_1 x }
|
||||||
|
```
|
||||||
|
|
||||||
|
Notice that we had to rename one of the calls to `Pack` to be a call to
|
||||||
|
be `Pack_1`. To actually change our AST to reference `Pack_1`, we'd have
|
||||||
|
to traverse the whole tree, and make sure to keep track of definitions
|
||||||
|
that could shadow `Pack` further down. This is cumbersome. Instead, we
|
||||||
|
can mark a variable as referring to a mangled version of itself, and
|
||||||
|
access this information when needed. To do this, we add the `mangled_name`
|
||||||
|
field to the `variable_data` struct as we've seen above, and implement
|
||||||
|
the `set_mangled_name` and `get_mangled_name` methods. The former:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.cpp" 34 37 >}}
|
||||||
|
|
||||||
|
And the latter:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.cpp" 39 45 >}}
|
||||||
|
|
||||||
|
We don't allow `set_mangled_name` to affect variables that are declared
|
||||||
|
above the receiving `type_env`, and use the empty string as a 'none' value.
|
||||||
|
Now, when lifting data type constructors, we'll be able to use
|
||||||
|
`set_mangled_name` to make sure constructor calls are made correctly. We
|
||||||
|
will also be able to use this in other cases, like the translation
|
||||||
|
of local function definitions.
|
||||||
|
|
||||||
|
#### New AST Nodes
|
||||||
|
Finally, it's time for us to add new AST nodes to our language.
|
||||||
|
Specifically, these nodes are `ast_let` (for `let/in` expressions)
|
||||||
|
and `ast_lambda` for lambda functions. We declare them as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.hpp" 131 166 >}}
|
||||||
|
|
||||||
|
In `ast_let`, the `definitions` field corresponds to the original definitions
|
||||||
|
given by the user in the program, and the `in` field corresponds to the
|
||||||
|
expression which uses these definitions. In the process of lifting, though,
|
||||||
|
we eventually transfer each of the definitions to the global scope, replacing
|
||||||
|
their right hand sides with partial applications. After this transformation,
|
||||||
|
all the data type definitions are effectively gone, and all the function
|
||||||
|
definitions are converted into the simple form `x = f a1 ... an`. We hold
|
||||||
|
these post-transformation equations in the `translated_definitions` field,
|
||||||
|
and it's them that we compile in this node's `compile` method.
|
||||||
|
|
||||||
|
In `ast_lambda`, we allow multiple parameters (like Haskell's `\x y -> x + y`).
|
||||||
|
We store these parameters in the `params` field, and we store the lambda's
|
||||||
|
expression in the `body` field. Just like `definition_defn`,
|
||||||
|
the `ast_lambda` node maintains a separate environment in which its children
|
||||||
|
have been bound, and a list of variables that occur freely in its body. The
|
||||||
|
former is used for typechecking, while the latter is used for lifting.
|
||||||
|
Finally, the `translated` field holds the lambda function's form
|
||||||
|
after its body has been transformed into a global function. Similarly to
|
||||||
|
`ast_let`, this node will be in the form `f a1 ... an`.
|
||||||
|
|
||||||
|
The
|
||||||
|
observant reader will have noticed that we have a new method: `translate`.
|
||||||
|
This is a new method for all `ast` descendants, and will implement the
|
||||||
|
steps of moving definitions to the global scope and transforming the
|
||||||
|
program. Before we get to it, though, let's look at the other relevant
|
||||||
|
pieces of code for `ast_let` and `ast_lambda`. First, their grammar
|
||||||
|
rules in `parser.y`:
|
||||||
|
|
||||||
|
{{< codelines "text" "compiler/12/parser.y" 107 115 >}}
|
||||||
|
|
||||||
|
This is pretty similar to the rest of the grammar, so I will give this no
|
||||||
|
further explanation. Next, their `find_free` and `typecheck` code.
|
||||||
|
We can start with `ast_let`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 275 289 >}}
|
||||||
|
|
||||||
|
As you can see, `ast_let::find_free` works in a similar manner to `ast_case::find_free`.
|
||||||
|
It finds the free variables in the `in` node as well as in each of the definitions
|
||||||
|
(taking advantage of the fact that `definition_group::find_free` populates the
|
||||||
|
given set with "far away" free variables). It then filters out any variables bound in
|
||||||
|
the `let` from the set of free variables in `in`, and returns the result.
|
||||||
|
|
||||||
|
Typechecking in `ast_let` relies on `definition_group::typecheck`, which holds
|
||||||
|
all of the required functionality for checking the new definitions.
|
||||||
|
Once the definitions are typechecked, we use their type information to
|
||||||
|
typecheck the `in` part of the expression (passing `definitions.env` to the
|
||||||
|
call to `typecheck` to make the new definitions visible).
|
||||||
|
|
||||||
|
Next, we look at `ast_lambda`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 344 366 >}}
|
||||||
|
|
||||||
|
Again, `ast_lambda::find_free` works similarly to `definition_defn`, stripping
|
||||||
|
the variables expected by the function from the body's list of free variables.
|
||||||
|
Also like `definition_defn`, this new node remembers the free variables in
|
||||||
|
its body, which we will later use for lifting.
|
||||||
|
|
||||||
|
Typechecking in this node also proceeds similarly to `definition_defn`. We create
|
||||||
|
new type variables for each parameter and for the return value, and build up
|
||||||
|
a function type called `full_type`. We then typecheck the body using the
|
||||||
|
new environment (which now includes the variables), and return the function type we came up with.
|
||||||
|
|
||||||
|
#### Translation
|
||||||
|
Recalling the transformations we described earlier, we can observe two
|
||||||
|
major steps to what we have to do:
|
||||||
|
|
||||||
|
1. Move the body of the original definition into its own
|
||||||
|
global definition, adding all the captured variables as arguments.
|
||||||
|
2. Replace the right hand side of the `let/in` expression with an application
|
||||||
|
of the global definition to the variables it requires.
|
||||||
|
|
||||||
|
We will implement these in a new `translate` method, with the following
|
||||||
|
signature:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
void ast::translate(global_scope& scope);
|
||||||
|
```
|
||||||
|
|
||||||
|
The `scope` parameter and its `add_function` and `add_constructor` methods will
|
||||||
|
be used to add definitions to the global scope. Each AST node will also
|
||||||
|
use this method to implement the second step. Currently, only
|
||||||
|
`ast_let` and `ast_lambda` will need to modify themselves - all other
|
||||||
|
nodes will simply recursively call this method on their children. Let's jump
|
||||||
|
straight into implementing this method for `ast_let`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 291 316 >}}
|
||||||
|
|
||||||
|
Since data type definitions don't really depend on anything else, we process
|
||||||
|
them first. This amounts to simply calling the `definition_data::into_globals`
|
||||||
|
method, which itself simply calls `global_scope::add_constructor`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 86 92 >}}
|
||||||
|
|
||||||
|
Note how `into_globals` updates the mangled name of its constructor
|
||||||
|
via `set_mangled_name`. This will help us decide which global
|
||||||
|
function to call during code generation. More on that later.
|
||||||
|
|
||||||
|
Starting with line 295, we start processing the function definitions
|
||||||
|
in the `let/in` expression. We remember how many arguments were
|
||||||
|
explicitly added to the function definition, and then call the
|
||||||
|
definition's `into_global` method. This method is implemented
|
||||||
|
as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/definition.cpp" 40 49 >}}
|
||||||
|
|
||||||
|
First, this method collects all the non-global free variables in
|
||||||
|
its body, which will need to be passed to the global definition
|
||||||
|
as arguments. It then combines this list with the arguments
|
||||||
|
the user explicitly added to it, recursively translates
|
||||||
|
its body, and creates a new global definition using `add_function`.
|
||||||
|
|
||||||
|
We return to `ast_let::translate` at line 299. Here,
|
||||||
|
we determine how many variables ended up being captured, by
|
||||||
|
subtracting the number of explicit parameters from the total
|
||||||
|
number of parameters the new global definition has. This number,
|
||||||
|
combined with the fact that we added all the 'implict' arguments
|
||||||
|
to the function to the beginning of the list, will let us
|
||||||
|
iterate over all implict arguments, creating a chain of partial
|
||||||
|
function applications.
|
||||||
|
|
||||||
|
But how do we build the application? We could use the mangled name
|
||||||
|
of the function, but this seems inelegant, especially since we
|
||||||
|
alreaady keep track of mangling information in `type_env`. Instead,
|
||||||
|
we create a new, local environment, in which we place an updated
|
||||||
|
binding for the function, marking it global, and setting
|
||||||
|
its mangled name to the one generated by `global_sope`. This work is done
|
||||||
|
on lines 301-303. We create a reference to the global function
|
||||||
|
using the new environment on lines 305 and 306, and apply it to
|
||||||
|
all the implict arguments on lines 307-313. Finally, we
|
||||||
|
add the new 'basic' equation into `translated_definitions`.
|
||||||
|
|
||||||
|
Let's take a look at translating `ast_lambda` next:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 368 392 >}}
|
||||||
|
|
||||||
|
Once again, on lines 369-375 we find all the arguments to the
|
||||||
|
global definition. On lines 377-382 we create a new global
|
||||||
|
function and a mangled environment, and start creating the
|
||||||
|
chain of function applications. On lines 384-390, we actually
|
||||||
|
create the arguments and apply the function to them. Finally,
|
||||||
|
on line 391, we store this new chain of applications in the
|
||||||
|
`translated` field.
|
||||||
|
|
||||||
|
#### Compilation
|
||||||
|
There's still another piece of the puzzle missing, and
|
||||||
|
that's how we're going to compile `let/in` expressions into
|
||||||
|
G-machine instructions. We have allowed these expressions
|
||||||
|
to be recursive, and maybe even mutually recursive. This
|
||||||
|
worked fine with global definitions; instead of specifying
|
||||||
|
where on the stack we can find the reference to a global
|
||||||
|
function, we just created a new global node, and called
|
||||||
|
it good. Things are different now, though, because the definitions
|
||||||
|
we're referencing aren't _just_ global functions; they are partial
|
||||||
|
applications of a global function. And to reference themselves,
|
||||||
|
or their neighbors, they have to have a handle on their own nodes. We do this
|
||||||
|
using an instruction that we foreshadowed in part 5, but didn't use
|
||||||
|
until just now: __Alloc__.
|
||||||
|
|
||||||
|
__Alloc__ creates placeholder nodes on the stack. These nodes
|
||||||
|
are indirections, the same kind that we use for lazy evaluation
|
||||||
|
and sharing elsewhere. We create an indirection node for every
|
||||||
|
definition that we then build; when an expression needs access
|
||||||
|
to a definition, we give it the indirection node. After
|
||||||
|
building the partial application graph for an expression,
|
||||||
|
we use __Update__, making the corresponding indirection
|
||||||
|
point to this new graph. This way, the 'handle' to a
|
||||||
|
definition is always accessible, and once the definition's expression
|
||||||
|
is built, the handle correctly points to it. Here's the implementation:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 319 332 >}}
|
||||||
|
|
||||||
|
First, we create the __Alloc__ instruction. Then, we update
|
||||||
|
our environment to map each definition name to a location
|
||||||
|
within the newly allocated batch of nodes. Since we iterate
|
||||||
|
the definitions in order, 'pushing' them into our environment,
|
||||||
|
we end up with the convention of having the later definitions
|
||||||
|
closer to the top of the G-machine stack. Thus, when we
|
||||||
|
iterate the definitions again, this time to compile their
|
||||||
|
bodies, we have to do so starting with the highest offset,
|
||||||
|
and working our way down to __Update__-ing the top of the stack.
|
||||||
|
Once the definitions have been compiled, we proceed to compiling
|
||||||
|
the `in` part of the expression as normal, using our updated
|
||||||
|
environment. Finally, we use __Slide__ to get rid of the definition
|
||||||
|
graphs, cleaning up the stack.
|
||||||
|
|
||||||
|
Compiling the `ast_lambda` is far more straightforward. We just
|
||||||
|
compile the resulting partial application as we normally would have:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 394 396 >}}
|
||||||
|
|
||||||
|
One more thing. Let's adopt the convention of storing __mangled__
|
||||||
|
names into the compilation environment. This way, rather than looking up
|
||||||
|
mangled names only for global functions, which would be a 'gotcha'
|
||||||
|
for anyone working on the compiler, we will always use the mangled
|
||||||
|
names during compilation. To make this change, we make sure that
|
||||||
|
`ast_case` also uses `mangled_name`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 242 242 >}}
|
||||||
|
|
||||||
|
We also update the logic for `ast_lid::compile` to use the mangled
|
||||||
|
name information:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/ast.cpp" 52 58 >}}
|
||||||
|
|
||||||
|
#### Fixing Type Generalization
|
||||||
|
This is a rather serious bug that made its way into the codebase
|
||||||
|
since part 10. Recall that we can only generalize type variables
|
||||||
|
that are free in the environment. Thus far, we haven't done that,
|
||||||
|
and we really should: I ran into incorrectly inferred types
|
||||||
|
in my first test of the `let/in` language feature.
|
||||||
|
|
||||||
|
We need to make our code capable of finding free variables in the
|
||||||
|
type environment. This requires the `type_mgr`, which associates
|
||||||
|
with type variables the real types they represent, if any. We
|
||||||
|
thus create methods with signatures as follows:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const;
|
||||||
|
void type_env::find_free_except(const type_mgr& mgr, const std::string& avoid,
|
||||||
|
std::set<std::string>& into) const;
|
||||||
|
```
|
||||||
|
|
||||||
|
Why `find_free_except`? When generalizing a variable whose type was already
|
||||||
|
stored in the environment, all the type variables we could generalize would
|
||||||
|
not be 'free'. If they only occur in the type we're generalizing, though,
|
||||||
|
we shouldn't let that stop us! More generally, if we see type variables that
|
||||||
|
are only found in the same mutually recursive group as the binding we're
|
||||||
|
generalizing, we are free to generalize them too. Thus, we pass in
|
||||||
|
a reference to a `group`, and check if a variable is a member of that group
|
||||||
|
before searching it for free type variables. The implementations of the two
|
||||||
|
methods are straightforward:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.cpp" 4 18 >}}
|
||||||
|
|
||||||
|
Note that `find_free_except` calls `find_free` in its recursive call. This
|
||||||
|
is not a bug: we _do_ want to include free type variables from bindings
|
||||||
|
that have the same name as the variable we're generalizing, but aren't found
|
||||||
|
in the same scope. As far as we're concerned, they're different variables!
|
||||||
|
The two methods use another `find_free` method which we add to `type_mgr`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type.cpp" 206 219 >}}
|
||||||
|
|
||||||
|
This one is a bit of a hack. Typically, while running `find_free`, a
|
||||||
|
`type_mgr` will resolve any type variables. However, variables from the
|
||||||
|
`forall` quantifier of a type scheme should not be resolved, since they
|
||||||
|
are explicitly generic. To prevent the type manager from erroneously resolving
|
||||||
|
such type variables, we create a new type manager that does not have
|
||||||
|
these variables bound to anything, and thus marks them as free. We then
|
||||||
|
filter these variables out of the final list of free variables.
|
||||||
|
|
||||||
|
Finally, `generalize` makes sure not to use variables that it finds free:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/type_env.cpp" 68 81 >}}
|
||||||
|
|
||||||
|
#### Putting It All Together
|
||||||
|
All that's left is to tie the parts we've created into one coherent whole
|
||||||
|
in `main.cpp`. First of all, since we moved all of the LLVM-related
|
||||||
|
code into `global_scope`, we can safely replace that functionality
|
||||||
|
in `main.cpp` with a method call:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/main.cpp" 121 132 >}}
|
||||||
|
|
||||||
|
On the other hand, we need top-level logic to handle `definition_group`s.
|
||||||
|
This is pretty straightforward, and the main trick is to remember to
|
||||||
|
update the function's mangled name. Right now, depending on the choice
|
||||||
|
of manging algorithm, it's possible even for top-level functions to
|
||||||
|
have their names changed, and we must account for that. The whole code is:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/main.cpp" 52 62 >}}
|
||||||
|
|
||||||
|
Finally, we call `global_scope`'s methods in `main()`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/12/main.cpp" 148 151 >}}
|
||||||
|
|
||||||
|
That's it! Please note that I've mentioned or hinted at minor changes to the
|
||||||
|
codebase. Detailing every single change this late into the project is
|
||||||
|
needlessly time consuming and verbose; Gitea reports that I've made 677
|
||||||
|
insertions into and 215 deletions from the code. As always, I provide
|
||||||
|
the [source code for the compiler](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/12), and you can also take a look at the
|
||||||
|
[Gitea-generated diff](https://dev.danilafe.com/Web-Projects/blog-static/compare/1905601aaa96d11c771eae9c56bb9fc105050cda...21851e3a9c552383ee8c4bc878ea06e7d28c333e)
|
||||||
|
at the time of writing. If you want to follow along, feel free to check
|
||||||
|
them out!
|
||||||
|
|
||||||
|
### Running Our Programs
|
||||||
|
It's important to test all the language features that we just added. This
|
||||||
|
includes recursive definitions, nested function dependency cycles, and
|
||||||
|
uses of lambda functions. Some of the following examples will be rather
|
||||||
|
silly, but they should do a good job of checking that everything works
|
||||||
|
as we expect. Let's start with a simple use of a recursive definition
|
||||||
|
inside a `let/in`. A classic definition in that form is of `fix`
|
||||||
|
(the fixpoint combinator):
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
fix f = let x = f x in x
|
||||||
|
```
|
||||||
|
|
||||||
|
This defines `x` to be `f x`, which by substitution becomes `f (f x)`, and then
|
||||||
|
`f (f (f x))` and so on. The fixpoint combinator allows one to write a
|
||||||
|
recursive function that doesn't use its own name in the body. Rather,
|
||||||
|
we write a function expecting to receive 'itself' as a value:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
fix :: (a -> a) -> a
|
||||||
|
|
||||||
|
factRec :: (Int -> Int) -> Int -> Int
|
||||||
|
factRec f x = if x == 0 then 1 else x * f x
|
||||||
|
|
||||||
|
fact :: Int -> Int
|
||||||
|
fact = fix factRec
|
||||||
|
```
|
||||||
|
|
||||||
|
Notice that `factRec` doesn't reference itself, but rather takes
|
||||||
|
as argument a function it expects to be 'factorial' called `f`,
|
||||||
|
and uses that in its recursive case. We can write something similar
|
||||||
|
in our language, perhaps to create an infinite list of ones:
|
||||||
|
|
||||||
|
{{< codeblock "text" "compiler/12/examples/fixpoint.txt" >}}
|
||||||
|
|
||||||
|
We want `sumTwo` to take the first two elements from the list,
|
||||||
|
and return their sum. For an infinite list of ones, we expect
|
||||||
|
this sum to be equal to 2, and it is:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 2
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, let's try to define a function which has a mutually recursive pair
|
||||||
|
of definitions inside of a `let/in`. Let's also make these expressions
|
||||||
|
reference a function from the global scope, so that we know our
|
||||||
|
dependency tracking works as expected:
|
||||||
|
|
||||||
|
{{< codeblock "text" "compiler/12/examples/letin.txt" >}}
|
||||||
|
|
||||||
|
Here, we have a function `mergeUntil` which, given two lists
|
||||||
|
and a predicate, combines the two lists as long as
|
||||||
|
the predicate returns `True`. It does so using a convoluted
|
||||||
|
pair of mutually recursive functions, one of which
|
||||||
|
unpacks the left list, and the other the right. Each of the
|
||||||
|
functions calls the global function `if`. We also use two
|
||||||
|
definitions inside of `main` to create the two lists we're
|
||||||
|
going to merge. The compiler outputs the following (correct)
|
||||||
|
types:
|
||||||
|
|
||||||
|
```
|
||||||
|
const: forall bb bc . bc -> bb -> bc
|
||||||
|
if: Bool* -> List* Int* -> List* Int* -> List* Int*
|
||||||
|
main: Int*
|
||||||
|
mergeUntil: List* Int* -> List* Int* -> (Int* -> Bool*) -> List* Int*
|
||||||
|
sum: List* Int* -> Int*
|
||||||
|
```
|
||||||
|
|
||||||
|
And the result is 21, as would be expected from the sum of the numbers 1-6:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 21
|
||||||
|
```
|
||||||
|
|
||||||
|
Let's try lambda functions now. We can try use them for a higher-order function
|
||||||
|
like `map`:
|
||||||
|
|
||||||
|
{{< codeblock "text" "compiler/12/examples/lambda.txt" >}}
|
||||||
|
|
||||||
|
In this example, we first double every element in the list, then square it,
|
||||||
|
and finally take the sum. This should give us 4+16+36 = 56, and so it does:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 56
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, let's do some magic with a locally-declared data type. We'll make a
|
||||||
|
"packer" that creates a wrapped instance of a type, `Packed a`. Since the
|
||||||
|
constructor of this data type is not globally visible, it's not possible
|
||||||
|
to get the value back out, except by using an 'unpacking' function that
|
||||||
|
we provide:
|
||||||
|
|
||||||
|
{{< codeblock "text" "compiler/12/examples/packed.txt" >}}
|
||||||
|
|
||||||
|
Here, the `packer` definition returns a pair of the 'packing'
|
||||||
|
and 'unpacking' functions. The 'packing' function simply applies
|
||||||
|
the consntructor of `Packed` to its argument, while the 'unpacking'
|
||||||
|
function performs pattern matching (which is possible since the
|
||||||
|
data type is still in scope there). We expect `unpack (pack 3)` to
|
||||||
|
return 3, and it does:
|
||||||
|
|
||||||
|
```
|
||||||
|
Result: 3
|
||||||
|
```
|
||||||
|
|
||||||
|
Trying to pattern match, though, doesn't work, just like we would want!
|
||||||
|
|
||||||
|
This is enough to convince me that our changes do, indeed, work! Of
|
||||||
|
the 'major' components that I wanted to cover, only __Input/Output__
|
||||||
|
remains! Additionally, a [lobste.rs](https://lobste.rs) user suggested
|
||||||
|
that we also cover namespacing, and perhaps we will look into that as well.
|
||||||
|
Before either of those things, though, I think that I want to go through
|
||||||
|
the compiler and perform another round of improvements, similarly to
|
||||||
|
[part 4]({{< relref "04_compiler_improvements" >}}). It's hard to do a lot
|
||||||
|
of refactoring while covering new content, since major changes need to
|
||||||
|
be explained and presented for the post to make sense. I hope to see
|
||||||
|
you in these future posts!
|
||||||
304
content/blog/backend_math_rendering.md
Normal file
@@ -0,0 +1,304 @@
|
|||||||
|
---
|
||||||
|
title: Rendering Mathematics On The Back End
|
||||||
|
date: 2020-07-21T14:54:26-07:00
|
||||||
|
tags: ["Website", "Nix", "Ruby", "KaTeX"]
|
||||||
|
---
|
||||||
|
|
||||||
|
Due to something of a streak of bad luck when it came to computers, I spent a
|
||||||
|
significant amount of time using a Linux-based Chromebook, and then a
|
||||||
|
Pinebook Pro. It was, in some way, enlightening. The things that I used to take
|
||||||
|
for granted with a 'powerful' machine now became a rare luxury: StackOverflow,
|
||||||
|
and other relatively static websites, took upwards of ten seconds to finish
|
||||||
|
loading. On Slack, each of my keypresses could take longer than 500ms to
|
||||||
|
appear on the screen, and sometimes, it would take several seconds. Some
|
||||||
|
websites would present me with a white screen, and remain that way for much
|
||||||
|
longer than I had time to wait. It was awful.
|
||||||
|
|
||||||
|
At one point, I installed uMatrix, and made it the default policy to block
|
||||||
|
all JavaScript. For the most part, this worked well. Of course, I had to
|
||||||
|
enable JavaScript for applications that needed to be interactive, like
|
||||||
|
Slack, and Discord. But for the most part, I was able to browse the majority
|
||||||
|
of the websites I normally browse. This went on until I started working
|
||||||
|
on the [compiler series]({{< relref "00_compiler_intro.md" >}}) again,
|
||||||
|
and discovered that the LaTeX math on my page, which was required
|
||||||
|
for displaying things like inference rules, didn't work without
|
||||||
|
JavaScript. I was left with two options:
|
||||||
|
|
||||||
|
* Allow JavaScript, and continue using MathJax to render my math.
|
||||||
|
* Make it so that the mathematics are rendered on the back end.
|
||||||
|
|
||||||
|
I've [previously written about math rendering]({{< relref "math_rendering_is_wrong.md" >}}),
|
||||||
|
and made the observation that MathJax's output for LaTeX is __identical__
|
||||||
|
on every computer. From the MathJax 2.6 change log:
|
||||||
|
|
||||||
|
> _Improved CommonHTML output_. The CommonHTML output now provides the same layout quality and MathML support as the HTML-CSS and SVG output. It is on average 40% faster than the other outputs and the markup it produces are identical on all browsers and thus can also be pre-generated on the server via MathJax-node.
|
||||||
|
|
||||||
|
It seems absurd, then, to offload this kind of work into the users, to
|
||||||
|
be done over and over again. As should be clear from the title of
|
||||||
|
this post, this made me settle for the second option: it was
|
||||||
|
__obviously within reach__, especially for a statically-generated website
|
||||||
|
like mine, to render math on the backend.
|
||||||
|
|
||||||
|
I settled on the following architecture:
|
||||||
|
|
||||||
|
* As before, I would generate my pages using Hugo.
|
||||||
|
* I would use the KaTeX NPM package to render math.
|
||||||
|
* To build the website no matter what system I was on, I would use Nix.
|
||||||
|
|
||||||
|
It so happens that Nix isn't really required for using my approach in general.
|
||||||
|
I will give my setup here, but feel free to skip ahead.
|
||||||
|
|
||||||
|
### Setting Up A Nix Build
|
||||||
|
My `default.nix` file looks like this:
|
||||||
|
|
||||||
|
```Nix {linenos=table}
|
||||||
|
{ stdenv, hugo, fetchgit, pkgs, nodejs, ruby }:
|
||||||
|
|
||||||
|
let
|
||||||
|
url = "https://dev.danilafe.com/Web-Projects/blog-static.git";
|
||||||
|
rev = "<commit>";
|
||||||
|
sha256 = "<hash>";
|
||||||
|
requiredPackages = import ./required-packages.nix {
|
||||||
|
inherit pkgs nodejs;
|
||||||
|
};
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation {
|
||||||
|
name = "blog-static";
|
||||||
|
version = rev;
|
||||||
|
src = fetchgit {
|
||||||
|
inherit url rev sha256;
|
||||||
|
};
|
||||||
|
builder = ./builder.sh;
|
||||||
|
converter = ./convert.rb;
|
||||||
|
buildInputs = [
|
||||||
|
hugo
|
||||||
|
requiredPackages.katex
|
||||||
|
(ruby.withPackages (ps: [ ps.nokogiri ]))
|
||||||
|
];
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
I'm using `node2nix` to generate the `required-packages.nix` file, which allows me,
|
||||||
|
even from a sandboxed Nix build, to download and install `npm` packages. This is needed
|
||||||
|
so that I have access to the `katex` binary at build time. I fed the following JSON file
|
||||||
|
to `node2nix`:
|
||||||
|
|
||||||
|
```JSON {linenos=table}
|
||||||
|
[
|
||||||
|
"katex"
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
The Ruby script I wrote for this (more on that soon) required the `nokogiri` gem, which
|
||||||
|
I used for traversing the HTML generated for my site. Hugo was obviously required to
|
||||||
|
generate the HTML.
|
||||||
|
|
||||||
|
### Converting LaTeX To HTML
|
||||||
|
After my first post complaining about the state of mathematics on the web, I received
|
||||||
|
the following email (which the author allowed me to share):
|
||||||
|
|
||||||
|
> Sorry for having a random stranger email you, but in your blog post
|
||||||
|
[(link)](https://danilafe.com/blog/math_rendering_is_wrong) you seem to focus on MathJax's
|
||||||
|
difficulty in rendering things server-side, while quietly ignoring that KaTeX's front
|
||||||
|
page advertises server-side rendering. Their documentation [(link)](https://katex.org/docs/options.html)
|
||||||
|
even shows (at least as of the time this email was sent) that it renders both HTML
|
||||||
|
(to be arranged nicely with their CSS) for visuals and MathML for accessibility.
|
||||||
|
|
||||||
|
The author of the email then kindly provided a link to a page they generated using KaTeX and
|
||||||
|
some Bash scripts. The math on this page was rendered at the time it was generated.
|
||||||
|
|
||||||
|
This is a great point, and KaTeX is indeed usable for server-side rendering. But I've
|
||||||
|
seen few people who do actually use it. Unfortunately, as I pointed out in my previous post on the subject,
|
||||||
|
few tools actually take your HTML page and replace LaTeX with rendered math.
|
||||||
|
Here's what I wrote about this last time:
|
||||||
|
|
||||||
|
> [In MathJax,] The bigger issue, though, was that the `page2html`
|
||||||
|
program, which rendered all the mathematics in a single HTML page,
|
||||||
|
was gone. I found `tex2html` and `text2htmlcss`, which could only
|
||||||
|
render equations without the surrounding HTML. I also found `mjpage`,
|
||||||
|
which replaced mathematical expressions in a page with their SVG forms.
|
||||||
|
|
||||||
|
This is still the case, in both MathJax and KaTeX. The ability
|
||||||
|
to render math in one step is the main selling point of front-end LaTeX renderers:
|
||||||
|
all you have to do is drop in a file from a CDN, and voila, you have your
|
||||||
|
math. There are no such easy answers for back-end rendering. In fact,
|
||||||
|
as we will soon see, it's not possible to just search-and-replace occurences
|
||||||
|
of mathematics on your page, either. To actually get KaTeX working
|
||||||
|
on the backend, you need access to tools that handle the potential variety
|
||||||
|
of edge cases associated with HTML. Such tools, to my knowledge, do not
|
||||||
|
currently exist.
|
||||||
|
|
||||||
|
I decided to write my own Ruby script to get the job done. From this script, I
|
||||||
|
would call the `katex` command-line program, which would perform
|
||||||
|
the heavy lifting of rendering the mathematics.
|
||||||
|
|
||||||
|
There are two types of math on my website: inline math and display math.
|
||||||
|
On the command line ([here are the docs](https://katex.org/docs/cli.html)),
|
||||||
|
the distinction is made using the `--display-mode` argument. So, the general algorithm
|
||||||
|
is to replace the code inside the `$$...$$` with their display-rendered version,
|
||||||
|
and the code inside the `\(...\)` with the inline-rendered version. I came up with
|
||||||
|
the following Ruby function:
|
||||||
|
|
||||||
|
```Ruby {linenos=table}
|
||||||
|
def render_cached(cache, command, string, render_comment = nil)
|
||||||
|
cache.fetch(string) do |new|
|
||||||
|
puts " Rendering #{render_comment || new}"
|
||||||
|
cache[string] = Open3.popen3(command) do |i, o, e, t|
|
||||||
|
i.write new
|
||||||
|
i.close
|
||||||
|
o.read.force_encoding(Encoding::UTF_8).strip
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
Here, the `cache` argument is used to prevent re-running the `katex` command
|
||||||
|
on an equation that was already rendered before (the output is the same, after all).
|
||||||
|
The `command` is the specific shell command that we want to invoke; this would
|
||||||
|
be either `katex` or `katex -d`. The `string` is the math equation to render,
|
||||||
|
and the `render_comment` is the string to print to the console instead of the equation
|
||||||
|
(so that long, display math equations are not printed out to standard out).
|
||||||
|
|
||||||
|
Then, given a substring of the HTML file, we use regular expressions
|
||||||
|
to find the `\(...\)` and `$$...$$`s, and use the `render_cached` method
|
||||||
|
on the LaTeX code inside.
|
||||||
|
|
||||||
|
```Ruby {linenos=table}
|
||||||
|
def perform_katex_sub(inline_cache, display_cache, content)
|
||||||
|
rendered = content.gsub /\\\(((?:[^\\]|\\[^\)])*)\\\)/ do |match|
|
||||||
|
render_cached(inline_cache, "katex", $~[1])
|
||||||
|
end
|
||||||
|
rendered = rendered.gsub /\$\$((?:[^\$]|$[^\$])*)\$\$/ do |match|
|
||||||
|
render_cached(display_cache, "katex -d", $~[1], "display")
|
||||||
|
end
|
||||||
|
return rendered
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
There's a bit of a trick to the final layer of this script. We want to be
|
||||||
|
really careful about where we replace LaTeX, and where we don't. In
|
||||||
|
particular, we _don't_ want to go into the `code` tags. Otherwise,
|
||||||
|
it wouldn't be possible to talk about LaTeX code! I also suspect that
|
||||||
|
some captions, alt texts, and similar elements should also be left alone.
|
||||||
|
However, I don't have those on my website (yet), and I won't worry about
|
||||||
|
them now. Either way, because of the code tags,
|
||||||
|
we can't just search-and-replace over the entire page; we need to be context
|
||||||
|
aware. This is where `nokogiri` comes in. We parse the HTML, and iterate
|
||||||
|
over all of the 'text' nodes, calling `perform_katex_sub` on all
|
||||||
|
of those that _aren't_ inside code tags.
|
||||||
|
|
||||||
|
Fortunately, this kind of iteration is pretty easy to specify thanks to something called XPath.
|
||||||
|
This was my first time encountering it, but it seems extremely useful: it's
|
||||||
|
a sort of language for selecting XML nodes. First, you provide an 'axis',
|
||||||
|
which is used to specify the positions of the nodes you want to look at
|
||||||
|
relative to the root node. The axis `/` looks at the immediate children
|
||||||
|
(this would be the `html` tag in a properly formatted document, I would imagine).
|
||||||
|
The axis `//` looks at all the transitive children. That is, it will look at the
|
||||||
|
children of the root, then its children, and so on. There's also the `self` axis,
|
||||||
|
which looks at the node itself.
|
||||||
|
|
||||||
|
After you provide an axis, you need to specify the type of node that you want to
|
||||||
|
select. We can write `code`, for instance, to pick only the `<code>....</code>` tags
|
||||||
|
from the axis we've chosen. We can also use `*` to select any node, and we can
|
||||||
|
use `text()` to select text nodes, such as the `Hello` inside of `<b>Hello</b>`.
|
||||||
|
|
||||||
|
We can also apply some more conditions to the nodes we pick using `[]`.
|
||||||
|
For us, the relevant feature here is `not(...)`, which allows us to
|
||||||
|
select nodes that do __not__ match a particular condition. This is all
|
||||||
|
we need to know.
|
||||||
|
|
||||||
|
We write:
|
||||||
|
|
||||||
|
* `//`, starting to search for nodes everywhere, not just the root of the document.
|
||||||
|
* `*`, to match _any_ node. We want to replace math inside of `div`s, `span`s, `nav`s,
|
||||||
|
all of the `h`s, and so on.
|
||||||
|
* `[not(self::code)]`, cutting out all the `code` tags.
|
||||||
|
* `/`, now selecting the nodes that are immediate descendants of the nodes we've selected.
|
||||||
|
* `text()`, giving us the text contents of all the nodes we've selected.
|
||||||
|
|
||||||
|
All in all:
|
||||||
|
|
||||||
|
```
|
||||||
|
//*[not(self::code)]/text()
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, we use this XPath from `nokogiri`:
|
||||||
|
|
||||||
|
```Ruby {linenos=table}
|
||||||
|
files = ARGV[0..-1]
|
||||||
|
inline_cache, display_cache = {}, {}
|
||||||
|
|
||||||
|
files.each do |file|
|
||||||
|
puts "Rendering file: #{file}"
|
||||||
|
document = Nokogiri::HTML.parse(File.open(file))
|
||||||
|
document.search('//*[not(self::code)]/text()').each do |t|
|
||||||
|
t.replace(perform_katex_sub(inline_cache, display_cache, t.content))
|
||||||
|
end
|
||||||
|
File.write(file, document.to_html)
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
I named this script `convert.rb`; it's used from inside of the Nix expression
|
||||||
|
and its builder, which we will cover below.
|
||||||
|
|
||||||
|
### Tying it All Together
|
||||||
|
Finally, I wanted an end-to-end script to generate HTML pages and render the LaTeX in them.
|
||||||
|
I used Nix for this, but the below script will largely be compatible with a non-Nix system.
|
||||||
|
I came up with the following, commenting on Nix-specific commands:
|
||||||
|
|
||||||
|
```Bash {linenos=table}
|
||||||
|
# Nix-specific; set up paths.
|
||||||
|
source $stdenv/setup
|
||||||
|
|
||||||
|
# Build site with Hugo
|
||||||
|
# The cp is Nix-specific; it copies the blog source into the current directory.
|
||||||
|
cp -r $src/* .
|
||||||
|
hugo --baseUrl="https://danilafe.com"
|
||||||
|
|
||||||
|
# Render math in HTML and XML files.
|
||||||
|
# $converter is Nix-specific; you can just use convert.rb.
|
||||||
|
find public/ -regex "public/.*\.html" | xargs ruby $converter
|
||||||
|
|
||||||
|
# Output result
|
||||||
|
# $out is Nix-specific; you can replace it with your destination folder.
|
||||||
|
mkdir $out
|
||||||
|
cp -r public/* $out/
|
||||||
|
```
|
||||||
|
|
||||||
|
This is it! Using the two scripts, `convert.rb` and `builder.sh`, I
|
||||||
|
was able to generate my blog with the math rendered on the back-end.
|
||||||
|
Please note, though, that I had to add the KaTeX CSS to my website's
|
||||||
|
`<head>`.
|
||||||
|
|
||||||
|
### Caveats
|
||||||
|
The main caveat of my approach is performance. For every piece of
|
||||||
|
mathematics that I render, I invoke the `katex` command. This incurs
|
||||||
|
the penalty of Node's startup time, every time, and makes my approach
|
||||||
|
take a few dozen seconds to run on my relatively small site. The
|
||||||
|
better approach would be to use a NodeJS script, rather than a Ruby one,
|
||||||
|
to perform the conversion. KaTeX also provides an API, so such a NodeJS
|
||||||
|
script can find the files, parse the HTML, and perform the substitutions.
|
||||||
|
I did quite like using `nokogiri` here, though, and I hope that an equivalently
|
||||||
|
pleasant solution exists in JavaScript.
|
||||||
|
|
||||||
|
Re-rendering the whole website is also pretty wasteful. I rarely change the
|
||||||
|
mathematics on more than one page at a time, but every time I do so, I have
|
||||||
|
to re-run the script, and therefore re-render every page. This makes sense
|
||||||
|
for me, since I use Nix, and my builds are pretty much always performed
|
||||||
|
from scratch. On the other hand, for others, this may not be the best solution.
|
||||||
|
|
||||||
|
### Alternatives
|
||||||
|
The same person who sent me the original email above also pointed out
|
||||||
|
[this `pandoc` filter for KaTeX](https://github.com/Zaharid/pandoc_static_katex).
|
||||||
|
I do not use Pandoc, but from what I can see, this fitler relies on
|
||||||
|
Pandoc's `Math` AST nodes, and applies KaTeX to each of those. This
|
||||||
|
should work, but wasn't applicable in my case, since Hugo's shrotcodes
|
||||||
|
don't mix well with Pandoc. However, it certainly seems like a workable
|
||||||
|
solution.
|
||||||
|
|
||||||
|
### Conclusion
|
||||||
|
With the removal of MathJax from my site, it is now completely JavaScript free,
|
||||||
|
and contains virtually the same HTML that it did beforehand. This, I hope,
|
||||||
|
makes it work better on devices where computational power is more limited.
|
||||||
|
I also hope that it illustrates a general principle - it's very possible,
|
||||||
|
and plausible, to render LaTeX on the back-end for a static site.
|
||||||
112
content/blog/crystal_nix_revisited.md
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
---
|
||||||
|
title: Building a Crystal Project with Nix, Revisited
|
||||||
|
date: 2020-04-26T18:37:22-07:00
|
||||||
|
tags: ["Crystal", "Nix"]
|
||||||
|
---
|
||||||
|
|
||||||
|
As I've described in my [previous post]({{< relref "crystal_nix.md" >}}), the process for compiling a Crystal project with Nix is a fairly straightforward one. As is standard within the Nix ecosystem, the project's dependencies, as specified by the source language's build system (shards, in Crystal's case), are converted into a Nix expression (`shards.nix`). These dependencies are then used in a derivation, which, in Crystal's case, can take advantage of `buildCrystalPackage` to reduce boilerplate build scripts. All is well.
|
||||||
|
|
||||||
|
Things start to fall apart a little bit when the Crystal project being built is more complex. The predefined infrastructure (like `buildCrystalPackage`)
|
||||||
|
{{< sidenote "right" "versatility-note" "is not written with versatility in mind," >}}
|
||||||
|
This is not a bad thing at all; it's much better to get something working for the practical case, rather than concoct an overcomplicated solution that covers all theoretically possible cases.
|
||||||
|
{{< /sidenote >}} though it seems to work exceptionally in the common case. Additionally, I discovered that the compiler itself has some quirks, and have killed a few hours of my time trying to figure out some unexpected behaviors.
|
||||||
|
|
||||||
|
This post will cover the extra, more obscure steps I had to take to build an HTTPS-enabled Crystal project.
|
||||||
|
|
||||||
|
### First Problem: Git-Based Dependencies
|
||||||
|
A lot of my projects use Crystal libraries that are not hosted on GitHub at all; I use a private Git server, and most of my non-public code resides on it. The Crystal people within Nix don't seem to like this: let's look at the code for `crystal2nix.cr` file in the [nixpkgs repository](https://github.com/NixOS/nixpkgs/blob/1ffdf01777360f548cc7c10ef5b168cbe78fd183/pkgs/development/compilers/crystal/crystal2nix.cr). In particular, consider lines 18 and 19:
|
||||||
|
```Crystal {linenos=table,linenostart=18}
|
||||||
|
yaml.shards.each do |key, value|
|
||||||
|
owner, repo = value["github"].split("/")
|
||||||
|
```
|
||||||
|
Ouch! If you as much as mention a non-GitHub repository in your `shards.lock` file, you will experience a good old uncaught exception. Things don't end there, either. Nix provides a convenient `fetchFromGitHub` function, which only requires a repository name and its enclosing namespace (user or group). `crystal2nix` uses this, by generating a file with that information:
|
||||||
|
```Crystal {linenos=table,linenostart=34}
|
||||||
|
file.puts %( #{key} = {)
|
||||||
|
file.puts %( owner = "#{owner}";)
|
||||||
|
file.puts %( repo = "#{repo}";)
|
||||||
|
file.puts %( rev = "#{rev}";)
|
||||||
|
file.puts %( sha256 = "#{sha256}";)
|
||||||
|
file.puts %( };)
|
||||||
|
```
|
||||||
|
And, of course, `build-package.nix` (of which [this is the version at the time of writing](https://github.com/NixOS/nixpkgs/blob/912eb6b120eba15237ff053eafc4b5d90577685b/pkgs/development/compilers/crystal/build-package.nix)) uses this to declare dependencies:
|
||||||
|
```Nix {linenos=table,linenostart=26}
|
||||||
|
crystalLib = linkFarm "crystal-lib" (lib.mapAttrsToList (name: value: {
|
||||||
|
inherit name;
|
||||||
|
path = fetchFromGitHub value;
|
||||||
|
}) (import shardsFile));
|
||||||
|
```
|
||||||
|
This effectively creates a folder of dependencies cloned from GitHub, which is then placed into `lib` as if `shards` was run:
|
||||||
|
```Nix {linenos=table,linenostart=37}
|
||||||
|
configurePhase = args.configurePhase or lib.concatStringsSep "\n" ([
|
||||||
|
"runHook preConfigure"
|
||||||
|
] ++ lib.optional (lockFile != null) "ln -s ${lockFile} ./shard.lock"
|
||||||
|
++ lib.optional (shardsFile != null) "ln -s ${crystalLib} lib"
|
||||||
|
++ [ "runHook postConfigure "]);
|
||||||
|
```
|
||||||
|
Sleek, except that there's no place in this flow for dependencies based _only_ on Git! `crystalLib` is declared locally in a `let/in` expression, and we don't have access to it; neither can we call `linkFarm` again, since this results in a derivation, which, with different inputs, will be created at a different path. To work around this, I made my own Nix package, called `customCrystal`, and had it pass several modifications to `buildCrystalPackage`:
|
||||||
|
```Nix
|
||||||
|
{ stdenv, lib, linkFarm, fetchgit, fetchFromGitHub }:
|
||||||
|
|
||||||
|
{ crystal,
|
||||||
|
gitShardsFile ? null,
|
||||||
|
lockFile ? null,
|
||||||
|
shardsFile ? null, ...}@args:
|
||||||
|
|
||||||
|
let
|
||||||
|
buildArgs = builtins.removeAttrs args [ "crystal" ];
|
||||||
|
githubLinks = lib.mapAttrsToList (name: value: {
|
||||||
|
inherit name;
|
||||||
|
path = fetchFromGitHub value;
|
||||||
|
}) (import shardsFile);
|
||||||
|
gitLinks = lib.mapAttrsToList (name: value: {
|
||||||
|
inherit name;
|
||||||
|
path = fetchgit { inherit (value) url rev sha256; };
|
||||||
|
}) (import gitShardsFile);
|
||||||
|
crystalLib = linkFarm "crystal-lib" (githubLinks ++ gitLinks);
|
||||||
|
configurePhase = args.configurePhase or lib.concatStringsSep "\n" ([
|
||||||
|
"runHook preConfigure"
|
||||||
|
] ++ lib.optional (lockFile != null) "ln -s ${lockFile} ./shard.lock"
|
||||||
|
++ lib.optional (shardsFile != null) "ln -s ${crystalLib} lib"
|
||||||
|
++ [ "runHook postConfigure "]);
|
||||||
|
in
|
||||||
|
crystal.buildCrystalPackage (buildArgs // { inherit configurePhase; })
|
||||||
|
```
|
||||||
|
This does pretty much the equivalent of what `buildCrystalPackage` does (indeed, it does the heavy lifting). However, this snippet also retrieves Git repositories from the `gitShardsFile`, and creates the `lib` folder using both Git and GitHub dependencies. I didn't bother writing a `crystal2nix` equivalent for this, since I only had a couple of dependencies. I invoked my new function like `buildCrystalPackage`, with the addition of passing in the Crystal package, and that problem was solved.
|
||||||
|
|
||||||
|
### Second Problem: OpenSSL
|
||||||
|
The package I was trying to build used Crystal's built-in HTTP client, which, in turn, required OpenSSL. This, I thought, would be rather straightforward: add `openssl` to my package's `buildInputs`, and be done with it. It was not as simple, though, and I was greeted with a wall of errors like this one:
|
||||||
|
|
||||||
|
```
|
||||||
|
/nix/store/sq2b0dqlq243mqn4ql5h36xmpplyy20k-binutils-2.31.1/bin/ld: _main.o: in function `__crystal_main':
|
||||||
|
main_module:(.text+0x6f0): undefined reference to `SSL_library_init'
|
||||||
|
/nix/store/sq2b0dqlq243mqn4ql5h36xmpplyy20k-binutils-2.31.1/bin/ld: main_module:(.text+0x6f5): undefined reference to `SSL_load_error_strings'
|
||||||
|
/nix/store/sq2b0dqlq243mqn4ql5h36xmpplyy20k-binutils-2.31.1/bin/ld: main_module:(.text+0x6fa): undefined reference to `OPENSSL_add_all_algorithms_noconf'
|
||||||
|
/nix/store/sq2b0dqlq243mqn4ql5h36xmpplyy20k-binutils-2.31.1/bin/ld: main_module:(.text+0x6ff): undefined reference to `ERR_load_crypto_strings'
|
||||||
|
/nix/store/sq2b0dqlq243mqn4ql5h36xmpplyy20k-binutils-2.31.1/bin/ld: _main.o: in function `*HTTP::Client::new<String, (Int32 | Nil), Bool>:HTTP::Client':
|
||||||
|
```
|
||||||
|
|
||||||
|
Some snooping led me to discover that these symbols were part of OpenSSL 1.0.2, support for which ended in 2019. OpenSSL 1.1.0 has these symbols deprecated, and from what I can tell, they might be missing from the `.so` file altogether. I tried changing the package to specifically accept OpenSSL 1.0.2, but that didn't work, either: for some reason, the Crystal kept running the `gcc` command with `-L...openssl-1.1.0`. It also seemed like the compiler itself was built against the most recent version of OpenSSL, so what's the issue? I discovered this is a problem in the compiler itself. Consider the following line from Crystal's `openssl/lib_ssl.cr` [source file](https://github.com/crystal-lang/crystal/blob/0.34.0/src/openssl/lib_ssl.cr):
|
||||||
|
|
||||||
|
```Crystal {linenos=table,linenostart=8}
|
||||||
|
{% ssl_version = `hash pkg-config 2> /dev/null && pkg-config --silence-errors --modversion libssl || printf %s 0.0.0`.split.last.gsub(/[^0-9.]/, "") %}
|
||||||
|
```
|
||||||
|
|
||||||
|
Excuse me? If `pkg-config` is not found (which, in Nix, it won't be by default), Crystal assumes that it's using the _least_ up-to-date version of OpenSSL,
|
||||||
|
{{< sidenote "right" "version-note" "indicated by version code 0.0.0." >}}
|
||||||
|
The Crystal compiler compares version numbers based on semantic versioning, it seems, and 0.0.0 will always compare to be less than any other version of OpenSSL. Thus, code 0.0.0 indicates that Crystal should assume it's dealing with an extremely old version of OpenSSL.
|
||||||
|
{{< /sidenote >}} This matters, because later on in the file, we get this beauty:
|
||||||
|
|
||||||
|
```Crystal {linenos=table,linenostart=215}
|
||||||
|
{% if compare_versions(OPENSSL_VERSION, "1.1.0") >= 0 %}
|
||||||
|
fun tls_method = TLS_method : SSLMethod
|
||||||
|
{% else %}
|
||||||
|
fun ssl_library_init = SSL_library_init
|
||||||
|
fun ssl_load_error_strings = SSL_load_error_strings
|
||||||
|
fun sslv23_method = SSLv23_method : SSLMethod
|
||||||
|
{% end %}
|
||||||
|
```
|
||||||
|
|
||||||
|
That would be where the linker errors are coming from. Adding `pkg-config`to `buildInputs` along with `openssl` fixes the issue, and my package builds without problems.
|
||||||
|
|
||||||
|
### Conclusion
|
||||||
|
Crystal is a rather obscure language, and Nix is a rather obscure build system. I'm grateful that the infrastructure I'm using exists, and that using it is as streamlined as it is. There is, however, always room for improvement. If I have time, I will be opening pull requests for the `crystal2nix` tool on GitHub (to allow Git-based repositories), and perhaps on the Crystal compiler as well (to try figure out what to do about `pkg-config`). If someone else wants to do it themselves, I'd be happy to hear how it goes! Otherwise, I hope you found this post useful.
|
||||||
BIN
content/blog/dell_is_horrible/brokenkey.jpg
Normal file
|
After Width: | Height: | Size: 94 KiB |
BIN
content/blog/dell_is_horrible/brokenlcd.jpg
Normal file
|
After Width: | Height: | Size: 476 KiB |
BIN
content/blog/dell_is_horrible/dm_1.png
Normal file
|
After Width: | Height: | Size: 158 KiB |
BIN
content/blog/dell_is_horrible/dm_2.png
Normal file
|
After Width: | Height: | Size: 204 KiB |
BIN
content/blog/dell_is_horrible/dm_3.png
Normal file
|
After Width: | Height: | Size: 81 KiB |
BIN
content/blog/dell_is_horrible/dm_4.png
Normal file
|
After Width: | Height: | Size: 94 KiB |
BIN
content/blog/dell_is_horrible/dm_5.png
Normal file
|
After Width: | Height: | Size: 102 KiB |