Drafts of code and markdown.

This commit is contained in:
Danila Fedorin 2019-08-26 00:13:10 -07:00
parent d60d4e61bd
commit 918dfbe980
10 changed files with 335 additions and 14 deletions

View File

@ -1 +1,88 @@
#include "ast.hpp" #include "ast.hpp"
std::string op_name(binop op) {
switch(op) {
case PLUS: return "+";
case MINUS: return "-";
case TIMES: return "*";
case DIVIDE: return "/";
}
throw 0;
}
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
return type_ptr(new type_base("Int"));
}
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr ftype = env.lookup(op_name(op));
if(!ftype) throw 0;
type_ptr place_a = mgr.new_type();
type_ptr place_b = mgr.new_type();
type_ptr place_c = mgr.new_type();
type_ptr arrow_one = type_ptr(new type_arr(place_b, place_c));
type_ptr arrow_two = type_ptr(new type_arr(place_a, arrow_one));
mgr.unify(arrow_two, ftype);
mgr.unify(place_a, ltype);
mgr.unify(place_b, rtype);
return place_c;
}
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck(mgr, env);
type_ptr rtype = right->typecheck(mgr, env);
type_ptr place_a = mgr.new_type();
type_ptr place_b = mgr.new_type();
type_ptr arrow = type_ptr(new type_arr(place_a, place_b));
mgr.unify(arrow, ltype);
mgr.unify(place_a, rtype);
return place_b;
}
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr case_type = of->typecheck(mgr, env);
type_ptr branch_type = mgr.new_type();
for(auto& branch : branches) {
type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
mgr.unify(branch_type, curr_branch_type);
}
return branch_type;
}
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
env.bind(var, t);
}
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
type_ptr constructor_type = env.lookup(constr);
if(!constructor_type) throw 0;
for(int i = 0; i < params.size(); i++) {
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
if(!arr) throw 0;
env.bind(params[i], arr->left);
constructor_type = arr->right;
}
mgr.unify(t, constructor_type);
type_base* result_type = dynamic_cast<type_base*>(constructor_type.get());
if(!result_type) throw 0;
}

View File

@ -2,15 +2,20 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "type.hpp" #include "type.hpp"
#include "env.hpp"
struct ast { struct ast {
virtual ~ast() = default; virtual ~ast() = default;
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
}; };
using ast_ptr = std::unique_ptr<ast>; using ast_ptr = std::unique_ptr<ast>;
struct pattern { struct pattern {
virtual ~pattern() = default; virtual ~pattern() = default;
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
}; };
using pattern_ptr = std::unique_ptr<pattern>; using pattern_ptr = std::unique_ptr<pattern>;
@ -37,6 +42,9 @@ using constructor_ptr = std::unique_ptr<constructor>;
struct definition { struct definition {
virtual ~definition() = default; virtual ~definition() = default;
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
}; };
using definition_ptr = std::unique_ptr<definition>; using definition_ptr = std::unique_ptr<definition>;
@ -53,6 +61,8 @@ struct ast_int : public ast {
explicit ast_int(int v) explicit ast_int(int v)
: value(v) {} : value(v) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
}; };
struct ast_lid : public ast { struct ast_lid : public ast {
@ -60,6 +70,8 @@ struct ast_lid : public ast {
explicit ast_lid(std::string i) explicit ast_lid(std::string i)
: id(std::move(i)) {} : id(std::move(i)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
}; };
struct ast_uid : public ast { struct ast_uid : public ast {
@ -67,6 +79,8 @@ struct ast_uid : public ast {
explicit ast_uid(std::string i) explicit ast_uid(std::string i)
: id(std::move(i)) {} : id(std::move(i)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
}; };
struct ast_binop : public ast { struct ast_binop : public ast {
@ -76,6 +90,8 @@ struct ast_binop : public ast {
ast_binop(binop o, ast_ptr l, ast_ptr r) ast_binop(binop o, ast_ptr l, ast_ptr r)
: op(o), left(std::move(l)), right(std::move(r)) {} : op(o), left(std::move(l)), right(std::move(r)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
}; };
struct ast_app : public ast { struct ast_app : public ast {
@ -84,6 +100,8 @@ struct ast_app : public ast {
ast_app(ast_ptr l, ast_ptr r) ast_app(ast_ptr l, ast_ptr r)
: left(std::move(l)), right(std::move(r)) {} : left(std::move(l)), right(std::move(r)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
}; };
struct ast_case : public ast { struct ast_case : public ast {
@ -92,6 +110,8 @@ struct ast_case : public ast {
ast_case(ast_ptr o, std::vector<branch_ptr> b) ast_case(ast_ptr o, std::vector<branch_ptr> b)
: of(std::move(o)), branches(std::move(b)) {} : of(std::move(o)), branches(std::move(b)) {}
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
}; };
struct pattern_var : public pattern { struct pattern_var : public pattern {
@ -99,6 +119,8 @@ struct pattern_var : public pattern {
pattern_var(std::string v) pattern_var(std::string v)
: var(std::move(v)) {} : var(std::move(v)) {}
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
}; };
struct pattern_constr : public pattern { struct pattern_constr : public pattern {
@ -107,6 +129,8 @@ struct pattern_constr : public pattern {
pattern_constr(std::string c, std::vector<std::string> p) pattern_constr(std::string c, std::vector<std::string> p)
: constr(std::move(c)), params(std::move(p)) {} : constr(std::move(c)), params(std::move(p)) {}
void match(type_ptr t, type_mgr&, type_env& env) const;
}; };
struct definition_defn : public definition { struct definition_defn : public definition {
@ -114,10 +138,16 @@ struct definition_defn : public definition {
std::vector<std::string> params; std::vector<std::string> params;
ast_ptr body; ast_ptr body;
type_ptr return_type;
std::vector<type_ptr> param_types;
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b) definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
: name(std::move(n)), params(std::move(p)), body(std::move(b)) { : name(std::move(n)), params(std::move(p)), body(std::move(b)) {
} }
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
}; };
struct definition_data : public definition { struct definition_data : public definition {
@ -126,4 +156,7 @@ struct definition_data : public definition {
definition_data(std::string n, std::vector<constructor_ptr> cs) definition_data(std::string n, std::vector<constructor_ptr> cs)
: name(std::move(n)), constructors(std::move(cs)) {} : name(std::move(n)), constructors(std::move(cs)) {}
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
}; };

View File

@ -1 +1 @@
rm -f parser.o parser.cpp parser.hpp stack.hh scanner.cpp scanner.o type.o a.out rm -f parser.o parser.cpp parser.hpp stack.hh scanner.cpp scanner.o type.o env.o ast.o definition.o a.out

View File

@ -1,6 +1,9 @@
bison -o parser.cpp -d parser.y bison -o parser.cpp -d parser.y
flex -o scanner.cpp scanner.l flex -o scanner.cpp scanner.l
g++ -c -o scanner.o scanner.cpp g++ -g -c -o scanner.o scanner.cpp
g++ -c -o parser.o parser.cpp g++ -g -c -o parser.o parser.cpp
g++ -c -o type.o type.cpp g++ -g -c -o type.o type.cpp
g++ main.cpp parser.o scanner.o type.o g++ -g -c -o env.o env.cpp
g++ -g -c -o ast.o ast.cpp
g++ -g -c -o definition.o definition.cpp
g++ -g main.cpp parser.o scanner.o type.o env.o ast.o definition.o

View File

@ -0,0 +1,48 @@
#include "ast.hpp"
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
return_type = mgr.new_type();
type_ptr full_type = return_type;
for(auto it = params.rbegin(); it != params.rend(); it++) {
type_ptr param_type = mgr.new_type();
full_type = type_ptr(new type_arr(param_type, full_type));
param_types.push_back(param_type);
}
env.bind(name, full_type);
}
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
type_env new_env = env.scope();
auto param_it = params.begin();
auto type_it = param_types.rbegin();
while(param_it != params.end() && type_it != param_types.rend()) {
new_env.bind(*param_it, *type_it);
param_it++;
type_it++;
}
type_ptr body_type = body->typecheck(mgr, new_env);
mgr.unify(return_type, body_type);
}
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
type_ptr return_type = type_ptr(new type_base(name));
for(auto& constructor : constructors) {
type_ptr full_type = return_type;
for(auto& type_name : constructor->types) {
type_ptr type = type_ptr(new type_base(type_name));
full_type = type_ptr(new type_arr(type, full_type));
}
env.bind(constructor->name, full_type);
}
}
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
// Nothing
}

View File

@ -8,8 +8,32 @@ void yy::parser::error(const std::string& msg) {
extern std::vector<definition_ptr> program; extern std::vector<definition_ptr> program;
void typecheck_program(const std::vector<definition_ptr>& prog) {
type_mgr mgr;
type_env env;
type_ptr int_type = type_ptr(new type_base("Int"));
type_ptr binop_type = type_ptr(new type_arr(
int_type,
type_ptr(new type_arr(int_type, int_type))));
env.bind("+", binop_type);
env.bind("-", binop_type);
env.bind("*", binop_type);
env.bind("/", binop_type);
for(auto& def : prog) {
def->typecheck_first(mgr, env);
}
for(auto& def : prog) {
def->typecheck_second(mgr, env);
}
}
int main() { int main() {
yy::parser parser; yy::parser parser;
parser.parse(); parser.parse();
typecheck_program(program);
std::cout << program.size() << std::endl; std::cout << program.size() << std::endl;
} }

View File

@ -32,7 +32,7 @@ extern yy::parser::symbol_type yylex();
%define api.value.type variant %define api.value.type variant
%define api.token.constructor %define api.token.constructor
%type <std::vector<std::string>> lowercaseParams %type <std::vector<std::string>> lowercaseParams uppercaseParams
%type <std::vector<definition_ptr>> program definitions %type <std::vector<definition_ptr>> program definitions
%type <std::vector<branch_ptr>> branches %type <std::vector<branch_ptr>> branches
%type <std::vector<constructor_ptr>> constructors %type <std::vector<constructor_ptr>> constructors
@ -71,6 +71,11 @@ lowercaseParams
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); } | lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
; ;
uppercaseParams
: %empty { $$ = std::vector<std::string>(); }
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
;
aAdd aAdd
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); } : aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); } | aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
@ -102,7 +107,7 @@ case
; ;
branches branches
: branches COMMA branch { $$ = std::move($1); $1.push_back(std::move($3)); } : branches branch { $$ = std::move($1); $1.push_back(std::move($2)); }
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));} | branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
; ;
@ -129,7 +134,7 @@ constructors
; ;
constructor constructor
: UID lowercaseParams : UID uppercaseParams
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); } { $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
; ;

View File

@ -70,7 +70,7 @@ void type_mgr::unify(type_ptr l, type_ptr r) {
throw 0; throw 0;
} }
void type_mgr::bind(std::string s, type_ptr t) { void type_mgr::bind(const std::string& s, type_ptr t) {
type_var* other = dynamic_cast<type_var*>(t.get()); type_var* other = dynamic_cast<type_var*>(t.get());
if(other && other->name == s) return; if(other && other->name == s) return;

View File

@ -40,5 +40,5 @@ struct type_mgr {
void unify(type_ptr l, type_ptr r); void unify(type_ptr l, type_ptr r);
type_ptr resolve(type_ptr t, type_var*& var); type_ptr resolve(type_ptr t, type_var*& var);
void bind(std::string s, type_ptr t); void bind(const std::string& s, type_ptr t);
}; };

View File

@ -1,5 +1,5 @@
--- ---
title: Compiling a Functional Language Using C++, Part 3 - Operations On Trees title: Compiling a Functional Language Using C++, Part 3 - Type Checking
date: 2019-08-06T14:26:38-07:00 date: 2019-08-06T14:26:38-07:00
draft: true draft: true
tags: ["C and C++", "Functional Languages", "Compilers"] tags: ["C and C++", "Functional Languages", "Compilers"]
@ -24,7 +24,7 @@ programs we get from the parser valid? See for yourself:
``` ```
data Bool = { True, False } data Bool = { True, False }
defn main { 3 + True } defn main = { 3 + True }
``` ```
Obviously, that's not right. The parser accepts it - this matches our grammar. Obviously, that's not right. The parser accepts it - this matches our grammar.
@ -32,7 +32,7 @@ But giving meaning to this program is not easy, since we have no clear
way of adding 3 and some data type. Similarly: way of adding 3 and some data type. Similarly:
``` ```
defn main { 1 2 3 4 5 } defn main = { 1 2 3 4 5 }
``` ```
What is this? It's a sequence of applications, starting with `1 2`. Numbers What is this? It's a sequence of applications, starting with `1 2`. Numbers
@ -412,4 +412,125 @@ When we look up a variable name, we first look in this node we created.
If we don't find the variable we're looking for, we move on to the next If we don't find the variable we're looking for, we move on to the next
node. The benefit of this is that we won't be re-creating a map node. The benefit of this is that we won't be re-creating a map
for each branch, and just creating a node with the changes. for each branch, and just creating a node with the changes.
Let's implement exactly that: Let's implement exactly that. the header:
{{< codeblock "C++" "compiler/03/env.hpp" >}}
And the source file:
{{< codeblock "C++" "compiler/03/env.cpp" >}}
Nothing should seem too surprising. Of note is the fact
that we're not using smart pointers for `scope`,
and that the child we create during the call
would be invalid if the parent goes out of scope
/ is released. We're gearing this towards
creating new environments on the stack, and we'll
take care not to let a parent go out of scope
before the child.
At least, it's time to declare a new type checking method.
We start with with a signature inside `ast`:
```
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
```
We also implement the \\(\\text{matchp}\\) function
as a method `match` on `pattern` with the following signature:
```
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const;
```
We declare this in every subclass of `ast`. Let's take a look
at the implementation now:
{{< codeblock "C++" "compiler/03/ast.cpp" >}}
This looks good, but we're not done yet. We can type
check expressions, but our program ins't
made up of expressions. Rather, it's made up of
declarations. Further, we can't look at the declarations
in isolation. Consider these two functions:
```
defn double x = { x + x }
defn quadruple x = { double (double x) }
```
Assuming we have an environment containing `x` when we typecheck the body
of `double`, our algorithm will work out fine. But what about
`quadruple`? It needs to know what `double` is, or at least that it exists.
We could also envision two mutually recursive functions. Let's
assume we have the functions `eq` and `if` in global scope. We can write
two functions, `even` and `odd`:
```
defn even x = { if (eq x 0) True (odd (x-1)) }
defn odd x = { if (eq x 0) False (even (n-1)) }
```
`odd` needs to know about `even`, and `even` needs
to know about `odd`. Thus, before we do any checking,
we need to populate a global environment with __some__
type for each function we declare. We will
use what we know about the function for our
initial declaration: if the function
takes two parameters, its type will be `a -> b -> c`.
If it takes one parameter, its type will be `a -> b`.
What's more, though, is that we need to make sure
that the function's parameters are passed in the environment
when checking its body, and that these parameters' types
are the same as the placeholder types in the function's
"declaration".
We'll typecheck the program in two passes. First,
we'll go through each definition, and add any
functions it declares to the global scope. Then,
we will go through each definition again, and,
if it's a function, typecheck its body using
the previously fleshed out global scope.
We'll add two functions, `typecheck_first`
and `typecheck_second` corresponding to
these two stages. Their signatures:
```
virtual void typecheck_first(type_mgr& mgr, type_env& env);
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const;
```
Furthermore, in the `definition_defn`, we will keep an
`std::vector` of `type_ptr`, in which the first element is the
type of the __last__ parameter, and so on. We switch around
the order of arguments because we build up the `a -> b -> ... -> x`
type signature from the right (`->` is right associative), and
thus we'll be creating the types right-to-left, too. We also
add a `type_ptr` field which holds the type for the function's
return value. We keep these two things in the `definition_defn` so
that they persist between the two typechecking stages: we want to use
the types from the first stage to aid in checking the body in the second stage.
Here's the code for the implementation:
{{< codeblock "C++" "compiler/03/definition.cpp" >}}
And finally, our updated main:
{{< codeblock "C++" "compiler/03/main.cpp" >}}
Notice that we manually add the types for our binary operators to the environment.
Let's run our project on a few examples. On our two "bad" examples, we get
the very eloquent error:
```
terminate called after throwing an instance of 'int'
[2] 9776 abort (core dumped) ./a.out < bad2.txt
```
That's what we get for throwing 0.
So far, our program has thrown in 100% of cases. Let's verify it actually
accepts valid programs! We'll try our very first example from today,
as well as these two:
{{< rawblock "compiler/03/works2.txt" >}}
{{< rawblock "compiler/03/works3.txt" >}}
All of our examples print the number of declarations in the program,
which means they don't throw 0. And so, we have typechecking!