Add CMake file and code for post 4

This commit is contained in:
Danila Fedorin 2019-08-26 18:52:25 -07:00
parent d03aa181e3
commit 6344ce11ce
17 changed files with 766 additions and 0 deletions

View File

@ -0,0 +1,24 @@
cmake_minimum_required(VERSION 3.1)
project(compiler)
find_package(BISON)
find_package(FLEX)
bison_target(parser
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
COMPILE_FLAGS "-d")
flex_target(scanner
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
add_flex_bison_dependency(scanner parser)
add_executable(compiler
ast.cpp ast.hpp definition.cpp
env.cpp env.hpp
type.cpp type.hpp
${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS}
main.cpp
)
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})

82
code/compiler/04/ast.cpp Normal file
View File

@ -0,0 +1,82 @@
#include "ast.hpp"
std::string op_name(binop op) {
switch(op) {
case PLUS: return "+";
case MINUS: return "-";
case TIMES: return "*";
case DIVIDE: return "/";
}
throw 0;
}
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
return type_ptr(new type_base("Int"));
}
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr ftype = env.lookup(op_name(op));
if(!ftype) throw 0;
type_ptr return_type = mgr.new_type();
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
mgr.unify(arrow_two, ftype);
return return_type;
}
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr return_type = mgr.new_type();
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
mgr.unify(arrow, ltype);
return return_type;
}
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr case_type = of->typecheck(mgr, env);
type_ptr branch_type = mgr.new_type();
for(auto& branch : branches) {
type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
mgr.unify(branch_type, curr_branch_type);
}
return branch_type;
}
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
env.bind(var, t);
}
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
type_ptr constructor_type = env.lookup(constr);
if(!constructor_type) throw 0;
for(int i = 0; i < params.size(); i++) {
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
if(!arr) throw 0;
env.bind(params[i], arr->left);
constructor_type = arr->right;
}
mgr.unify(t, constructor_type);
type_base* result_type = dynamic_cast<type_base*>(constructor_type.get());
if(!result_type) throw 0;
}

162
code/compiler/04/ast.hpp Normal file
View File

@ -0,0 +1,162 @@
#pragma once
#include <memory>
#include <vector>
#include "type.hpp"
#include "env.hpp"
struct ast {
virtual ~ast() = default;
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
};
using ast_ptr = std::unique_ptr<ast>;
struct pattern {
virtual ~pattern() = default;
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
};
using pattern_ptr = std::unique_ptr<pattern>;
struct branch {
pattern_ptr pat;
ast_ptr expr;
branch(pattern_ptr p, ast_ptr a)
: pat(std::move(p)), expr(std::move(a)) {}
};
using branch_ptr = std::unique_ptr<branch>;
struct constructor {
std::string name;
std::vector<std::string> types;
constructor(std::string n, std::vector<std::string> ts)
: name(std::move(n)), types(std::move(ts)) {}
};
using constructor_ptr = std::unique_ptr<constructor>;
struct definition {
virtual ~definition() = default;
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
};
using definition_ptr = std::unique_ptr<definition>;
enum binop {
PLUS,
MINUS,
TIMES,
DIVIDE
};
struct ast_int : public ast {
int value;
explicit ast_int(int v)
: value(v) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
struct ast_lid : public ast {
std::string id;
explicit ast_lid(std::string i)
: id(std::move(i)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
struct ast_uid : public ast {
std::string id;
explicit ast_uid(std::string i)
: id(std::move(i)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
struct ast_binop : public ast {
binop op;
ast_ptr left;
ast_ptr right;
ast_binop(binop o, ast_ptr l, ast_ptr r)
: op(o), left(std::move(l)), right(std::move(r)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
struct ast_app : public ast {
ast_ptr left;
ast_ptr right;
ast_app(ast_ptr l, ast_ptr r)
: left(std::move(l)), right(std::move(r)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
struct ast_case : public ast {
ast_ptr of;
std::vector<branch_ptr> branches;
ast_case(ast_ptr o, std::vector<branch_ptr> b)
: of(std::move(o)), branches(std::move(b)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
};
struct pattern_var : public pattern {
std::string var;
pattern_var(std::string v)
: var(std::move(v)) {}
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
};
struct pattern_constr : public pattern {
std::string constr;
std::vector<std::string> params;
pattern_constr(std::string c, std::vector<std::string> p)
: constr(std::move(c)), params(std::move(p)) {}
void match(type_ptr t, type_mgr&, type_env& env) const;
};
struct definition_defn : public definition {
std::string name;
std::vector<std::string> params;
ast_ptr body;
type_ptr return_type;
std::vector<type_ptr> param_types;
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
}
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
};
struct definition_data : public definition {
std::string name;
std::vector<constructor_ptr> constructors;
definition_data(std::string n, std::vector<constructor_ptr> cs)
: name(std::move(n)), constructors(std::move(cs)) {}
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
};

View File

@ -0,0 +1,48 @@
#include "ast.hpp"
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
return_type = mgr.new_type();
type_ptr full_type = return_type;
for(auto it = params.rbegin(); it != params.rend(); it++) {
type_ptr param_type = mgr.new_type();
full_type = type_ptr(new type_arr(param_type, full_type));
param_types.push_back(param_type);
}
env.bind(name, full_type);
}
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
type_env new_env = env.scope();
auto param_it = params.begin();
auto type_it = param_types.rbegin();
while(param_it != params.end() && type_it != param_types.rend()) {
new_env.bind(*param_it, *type_it);
param_it++;
type_it++;
}
type_ptr body_type = body->typecheck(mgr, new_env);
mgr.unify(return_type, body_type);
}
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
type_ptr return_type = type_ptr(new type_base(name));
for(auto& constructor : constructors) {
type_ptr full_type = return_type;
for(auto& type_name : constructor->types) {
type_ptr type = type_ptr(new type_base(type_name));
full_type = type_ptr(new type_arr(type, full_type));
}
env.bind(constructor->name, full_type);
}
}
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
// Nothing
}

16
code/compiler/04/env.cpp Normal file
View File

@ -0,0 +1,16 @@
#include "env.hpp"
type_ptr type_env::lookup(const std::string& name) const {
auto it = names.find(name);
if(it != names.end()) return it->second;
if(parent) return parent->lookup(name);
return nullptr;
}
void type_env::bind(const std::string& name, type_ptr t) {
names[name] = t;
}
type_env type_env::scope() const {
return type_env(this);
}

16
code/compiler/04/env.hpp Normal file
View File

@ -0,0 +1,16 @@
#pragma once
#include <map>
#include "type.hpp"
struct type_env {
std::map<std::string, type_ptr> names;
type_env const* parent = nullptr;
type_env(type_env const* p)
: parent(p) {}
type_env() : type_env(nullptr) {}
type_ptr lookup(const std::string& name) const;
void bind(const std::string& name, type_ptr t);
type_env scope() const;
};

View File

@ -0,0 +1,2 @@
data Bool = { True, False }
defn main = { 3 + True }

View File

@ -0,0 +1 @@
defn main = { 1 2 3 4 5 }

View File

@ -0,0 +1,2 @@
defn main = { plus 320 6 }
defn plus x y = { x + y }

View File

@ -0,0 +1,3 @@
defn add x y = { x + y }
defn double x = { add x x }
defn main = { double 163 }

View File

@ -0,0 +1,7 @@
data List = { Nil, Cons Int List }
defn length l = {
case l of {
Nil -> { 0 }
Cons x xs -> { 1 + length xs }
}
}

39
code/compiler/04/main.cpp Normal file
View File

@ -0,0 +1,39 @@
#include "ast.hpp"
#include "parser.hpp"
#include "type.hpp"
void yy::parser::error(const std::string& msg) {
std::cout << "An error occured: " << msg << std::endl;
}
extern std::vector<definition_ptr> program;
void typecheck_program(const std::vector<definition_ptr>& prog) {
type_mgr mgr;
type_env env;
type_ptr int_type = type_ptr(new type_base("Int"));
type_ptr binop_type = type_ptr(new type_arr(
int_type,
type_ptr(new type_arr(int_type, int_type))));
env.bind("+", binop_type);
env.bind("-", binop_type);
env.bind("*", binop_type);
env.bind("/", binop_type);
for(auto& def : prog) {
def->typecheck_first(mgr, env);
}
for(auto& def : prog) {
def->typecheck_second(mgr, env);
}
}
int main() {
yy::parser parser;
parser.parse();
typecheck_program(program);
std::cout << program.size() << std::endl;
}

140
code/compiler/04/parser.y Normal file
View File

@ -0,0 +1,140 @@
%{
#include <string>
#include <iostream>
#include "ast.hpp"
#include "parser.hpp"
std::vector<definition_ptr> program;
extern yy::parser::symbol_type yylex();
%}
%token PLUS
%token TIMES
%token MINUS
%token DIVIDE
%token <int> INT
%token DEFN
%token DATA
%token CASE
%token OF
%token OCURLY
%token CCURLY
%token OPAREN
%token CPAREN
%token COMMA
%token ARROW
%token EQUAL
%token <std::string> LID
%token <std::string> UID
%language "c++"
%define api.value.type variant
%define api.token.constructor
%type <std::vector<std::string>> lowercaseParams uppercaseParams
%type <std::vector<definition_ptr>> program definitions
%type <std::vector<branch_ptr>> branches
%type <std::vector<constructor_ptr>> constructors
%type <ast_ptr> aAdd aMul case app appBase
%type <definition_ptr> definition defn data
%type <branch_ptr> branch
%type <pattern_ptr> pattern
%type <constructor_ptr> constructor
%start program
%%
program
: definitions { program = std::move($1); }
;
definitions
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
;
definition
: defn { $$ = std::move($1); }
| data { $$ = std::move($1); }
;
defn
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
{ $$ = definition_ptr(
new definition_defn(std::move($2), std::move($3), std::move($6))); }
;
lowercaseParams
: %empty { $$ = std::vector<std::string>(); }
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
;
uppercaseParams
: %empty { $$ = std::vector<std::string>(); }
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
;
aAdd
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
| aMul { $$ = std::move($1); }
;
aMul
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
| app { $$ = std::move($1); }
;
app
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
| appBase { $$ = std::move($1); }
;
appBase
: INT { $$ = ast_ptr(new ast_int($1)); }
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
| OPAREN aAdd CPAREN { $$ = std::move($2); }
| case { $$ = std::move($1); }
;
case
: CASE aAdd OF OCURLY branches CCURLY
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
;
branches
: branches branch { $$ = std::move($1); $1.push_back(std::move($2)); }
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
;
branch
: pattern ARROW OCURLY aAdd CCURLY
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
;
pattern
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
| UID lowercaseParams
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
;
data
: DATA UID EQUAL OCURLY constructors CCURLY
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
;
constructors
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
| constructor
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
;
constructor
: UID uppercaseParams
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
;

View File

@ -0,0 +1,34 @@
%option noyywrap
%{
#include <iostream>
#include "ast.hpp"
#include "parser.hpp"
#define YY_DECL yy::parser::symbol_type yylex()
%}
%%
[ \n]+ {}
\+ { return yy::parser::make_PLUS(); }
\* { return yy::parser::make_TIMES(); }
- { return yy::parser::make_MINUS(); }
\/ { return yy::parser::make_DIVIDE(); }
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
defn { return yy::parser::make_DEFN(); }
data { return yy::parser::make_DATA(); }
case { return yy::parser::make_CASE(); }
of { return yy::parser::make_OF(); }
\{ { return yy::parser::make_OCURLY(); }
\} { return yy::parser::make_CCURLY(); }
\( { return yy::parser::make_OPAREN(); }
\) { return yy::parser::make_CPAREN(); }
, { return yy::parser::make_COMMA(); }
-> { return yy::parser::make_ARROW(); }
= { return yy::parser::make_EQUAL(); }
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
%%

78
code/compiler/04/type.cpp Normal file
View File

@ -0,0 +1,78 @@
#include "type.hpp"
#include <sstream>
#include <algorithm>
std::string type_mgr::new_type_name() {
int temp = last_id++;
std::string str = "";
while(temp != -1) {
str += (char) ('a' + (temp % 26));
temp = temp / 26 - 1;
}
std::reverse(str.begin(), str.end());
return str;
}
type_ptr type_mgr::new_type() {
return type_ptr(new type_var(new_type_name()));
}
type_ptr type_mgr::new_arrow_type() {
return type_ptr(new type_arr(new_type(), new_type()));
}
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) {
type_var* cast;
var = nullptr;
while((cast = dynamic_cast<type_var*>(t.get()))) {
auto it = types.find(cast->name);
if(it == types.end()) {
var = cast;
break;
}
t = it->second;
}
return t;
}
void type_mgr::unify(type_ptr l, type_ptr r) {
type_var* lvar;
type_var* rvar;
type_arr* larr;
type_arr* rarr;
type_base* lid;
type_base* rid;
l = resolve(l, lvar);
r = resolve(r, rvar);
if(lvar) {
bind(lvar->name, r);
return;
} else if(rvar) {
bind(rvar->name, l);
return;
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
(rarr = dynamic_cast<type_arr*>(r.get()))) {
unify(larr->left, rarr->left);
unify(larr->right, rarr->right);
return;
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
(rid = dynamic_cast<type_base*>(r.get()))) {
if(lid->name == rid->name) return;
}
throw 0;
}
void type_mgr::bind(const std::string& s, type_ptr t) {
type_var* other = dynamic_cast<type_var*>(t.get());
if(other && other->name == s) return;
types[s] = t;
}

44
code/compiler/04/type.hpp Normal file
View File

@ -0,0 +1,44 @@
#pragma once
#include <memory>
#include <map>
struct type {
virtual ~type() = default;
};
using type_ptr = std::shared_ptr<type>;
struct type_var : public type {
std::string name;
type_var(std::string n)
: name(std::move(n)) {}
};
struct type_base : public type {
std::string name;
type_base(std::string n)
: name(std::move(n)) {}
};
struct type_arr : public type {
type_ptr left;
type_ptr right;
type_arr(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)) {}
};
struct type_mgr {
int last_id = 0;
std::map<std::string, type_ptr> types;
std::string new_type_name();
type_ptr new_type();
type_ptr new_arrow_type();
void unify(type_ptr l, type_ptr r);
type_ptr resolve(type_ptr t, type_var*& var);
void bind(const std::string& s, type_ptr t);
};

View File

@ -0,0 +1,68 @@
---
title: Compiling a Functional Language Using C++, Part 4 - Small Improvements
date: 2019-08-06T14:26:38-07:00
draft: true
tags: ["C and C++", "Functional Languages", "Compilers"]
---
We've done quite a big push in the previous post. We defined
type rules for our language, implemented unification,
and then implemented unification to enforce these rules for
our program. The post was pretty long, and even then we
weren't able to fit quite everything into it.
For instance, we threw 0 whenever an error occured. This
gives us no indication of what actually went wrong. We should
probably define an exception class, one that can contain
information about the error, and report it to the user.
Also, when there's no error, our compiler doesn't
really tell us anything at all about the code besides
the number of definitions. We probably want to see the types
of these definitions, or at least some intermediate information.
At the very least, we want to have the __ability__ to see
this information.
Finally, we have no build system. We are creating more
and more source files, and so far (unless you've taken
initiative), we've been compiling them by hand. We want
to only compile source files that have changed,
and we want to have a standard definition of how to
build our program.
### Setting up CMake
This would be extremely easy if not for Flex and Bison. We start with the usual:
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 1 2 >}}
Next, we want to set up Flex and Bison. CMake provides two commands for this:
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 4 5 >}}
We now have access to commands that allow us to tell CMake about our parser
and tokenizer (or scanner). We use them as follows:
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 6 12 >}}
We also want CMake to know that the scanner needs to parser's header file
in order to compile. We add this dependency:
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 13 13 >}}
Finally, we add our source code to a CMake target. We use
the `BISON_parser_OUTPUTS` and `FLEX_scanner_OUTPUTS` to
pass in the source files generated by Flex and Bison.
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 15 22 >}}
Almost there! `parser.cpp` will be generated in the `build` directory
during an out-of-source build, and so will `parser.hpp`. When building,
`parser.cpp` will try to look for `ast.hpp`, and `main.cpp` will look for
`parser.hpp`. We want them to be able to find each other, so we
add both the source directory and the build (binary) directory to
the list of includes directories:
{{< codelines "CMake" "compiler/04/CMakeLists.txt" 23 24 >}}
That's it for CMake! Let's try our build:
```
cmake -S . -B build
cd build && make -j8
```
We get an executable called `compiler`. Excellent! Here's the whole file:
{{< codeblock "CMake" "compiler/04/CMakeLists.txt" >}}