Write up type code
This commit is contained in:
parent
ac589a8b0a
commit
1820a05fcc
@ -3,18 +3,17 @@
|
||||
#include <algorithm>
|
||||
|
||||
std::string type_mgr::new_type_name() {
|
||||
std::ostringstream oss;
|
||||
int temp = last_id++;
|
||||
std::string str = "";
|
||||
|
||||
do {
|
||||
oss << (char) ('a' + (temp % 26));
|
||||
temp /= 26;
|
||||
} while(temp);
|
||||
std::string str = oss.str();
|
||||
while(temp != -1) {
|
||||
str += (char) ('a' + (temp % 26));
|
||||
temp = temp / 26 - 1;
|
||||
}
|
||||
|
||||
std::reverse(str.begin(), str.end());
|
||||
return str;
|
||||
};
|
||||
}
|
||||
|
||||
type_ptr type_mgr::new_type() {
|
||||
return type_ptr(new type_var(new_type_name()));
|
||||
@ -23,3 +22,57 @@ type_ptr type_mgr::new_type() {
|
||||
type_ptr type_mgr::new_arrow_type() {
|
||||
return type_ptr(new type_arr(new_type(), new_type()));
|
||||
}
|
||||
|
||||
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) {
|
||||
type_var* cast;
|
||||
|
||||
var = nullptr;
|
||||
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||
auto it = types.find(cast->name);
|
||||
|
||||
if(it == types.end()) {
|
||||
var = cast;
|
||||
break;
|
||||
}
|
||||
t = it->second;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||
type_var* lvar;
|
||||
type_var* rvar;
|
||||
type_arr* larr;
|
||||
type_arr* rarr;
|
||||
type_base* lid;
|
||||
type_base* rid;
|
||||
|
||||
l = resolve(l, lvar);
|
||||
r = resolve(r, rvar);
|
||||
|
||||
if(lvar) {
|
||||
bind(lvar->name, r);
|
||||
return;
|
||||
} else if(rvar) {
|
||||
bind(rvar->name, l);
|
||||
return;
|
||||
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||
unify(larr->left, rarr->left);
|
||||
unify(larr->right, rarr->right);
|
||||
return;
|
||||
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||
if(lid->name == rid->name) return;
|
||||
}
|
||||
|
||||
throw 0;
|
||||
}
|
||||
|
||||
void type_mgr::bind(std::string s, type_ptr t) {
|
||||
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||
|
||||
if(other && other->name == s) return;
|
||||
types[s] = t;
|
||||
}
|
||||
|
@ -15,11 +15,11 @@ struct type_var : public type {
|
||||
: name(std::move(n)) {}
|
||||
};
|
||||
|
||||
struct type_id : public type {
|
||||
int id;
|
||||
struct type_base : public type {
|
||||
std::string name;
|
||||
|
||||
type_id(int i)
|
||||
: id(i) {}
|
||||
type_base(std::string n)
|
||||
: name(std::move(n)) {}
|
||||
};
|
||||
|
||||
struct type_arr : public type {
|
||||
@ -32,8 +32,13 @@ struct type_arr : public type {
|
||||
|
||||
struct type_mgr {
|
||||
int last_id = 0;
|
||||
std::map<std::string, type_ptr> types;
|
||||
|
||||
std::string new_type_name();
|
||||
type_ptr new_type();
|
||||
type_ptr new_arrow_type();
|
||||
|
||||
void unify(type_ptr l, type_ptr r);
|
||||
type_ptr resolve(type_ptr t, type_var*& var);
|
||||
void bind(std::string s, type_ptr t);
|
||||
};
|
||||
|
@ -295,4 +295,121 @@ through \\(\\tau\_n\\), then the each variable will have a corresponding type.
|
||||
We didn't include lambda expressions in our syntax, and thus we won't need typing rules for them,
|
||||
so it actually seems like we're done with the first draft of our type rules.
|
||||
|
||||
{{< todo >}}Cite 006_hindley_milner on implementation of bind. {{< /todo >}}.
|
||||
#### Implementation
|
||||
Let's work towards some code. Before we write anything down though, let's
|
||||
get a definition of what a "type" is, in the context of our type checker.
|
||||
Let's say a type is one of 3 things:
|
||||
|
||||
1. A "base type", like `Int`, `Bool`, or `List`.
|
||||
2. A type that's a function from one type to another.
|
||||
3. A placeholder / type variable (like the kind we used for type inference).
|
||||
|
||||
We represent a plceholder type with a unique string, such as "a", or "b",
|
||||
and this makes our placeholder type class very similar to the base type class.
|
||||
|
||||
{{< codeblock "C++" "compiler/03/type.hpp" >}}
|
||||
|
||||
As you can see, we also declared a `type_mgr`, or type manager class.
|
||||
This class will keep the state used for generating more placeholder
|
||||
type names, as well as the information about which
|
||||
placeholder type is mapped to what. We gave it 3 methods:
|
||||
|
||||
* `unify`, to perform unification. It will take two types and
|
||||
find values for placeholder variables such that they can
|
||||
equal.
|
||||
* `resolve`, to get to the "bottom" of a chain of equations.
|
||||
For instance, we have placeholder `a` be mapped to a placeholder
|
||||
`b`, an finally, the placeholder `b` to be mapped to `Int`.
|
||||
`resolve` will return for us `Int`, and, if the "bottom"
|
||||
of the chain is a placeholder, it will set `var` to be a pointer
|
||||
to that placeholder.
|
||||
* `bind`, inspired by [this post](http://dev.stephendiehl.com/fun/006_hindley_milner.html),
|
||||
will map a type variable of some name to a type. This function will also check if
|
||||
the thing we're binding to is the same type variable and not do anything in that case,
|
||||
since `a = a` is not a very useful equation to have.
|
||||
|
||||
To fit its original purpose, we also give the manager class the methods
|
||||
`new_type_name`, and two convenience methods to create placeholder types,
|
||||
`new_type` (in the form `a`) and `new_arrow_type` (in the form `a->b`).
|
||||
|
||||
Let's take a look at the implementation now:
|
||||
|
||||
{{< codeblock "C++" "compiler/03/type.cpp" >}}
|
||||
|
||||
Here, `new_type_name` is actually pretty boring. My goal
|
||||
was to generate type names like `a`, then `b`, eventually getting
|
||||
to `z`, and then move on to `aa`. This provides is with an
|
||||
endless stream of placeholder types.
|
||||
|
||||
Time for the interesting functions. `resolve` keeps
|
||||
trying `dynamic_cast` to a type variable, and if that succeeds,
|
||||
then either:
|
||||
|
||||
1. It's a type variable that's already been set
|
||||
to something, in which case we try resolve the thing it was
|
||||
set to (`t = it->second`)
|
||||
2. It's a type variable that hasn't been set to something.
|
||||
We set `var` to it (the caller will use this info),
|
||||
and stop our resolution loop (`break`).
|
||||
|
||||
In `unify`, we start by calling `resolve` - we don't want
|
||||
to accidentally compare `a` and `b` (and try to bind `a` to
|
||||
`b`) when `a` is already bound to something else (like `Int`).
|
||||
|
||||
From resolve, we will have `lvar` and `rvar` set to
|
||||
something not NULL if `l` or `r` were type variables
|
||||
that haven't been yet mapped (we defined `resolve` to behave this way).
|
||||
So, if one of the variables is not NULL, we try to bind it.
|
||||
|
||||
Next, `unify` checks if both types are either base types or
|
||||
arrow types. If they're base types, it compares their names,
|
||||
and if they're arrow types, it recursively unifies their children.
|
||||
We return in all cases when unification succeeds, and then throw
|
||||
an exception (currently 0) if all the cases fell thorugh, and thus,
|
||||
unification failed.
|
||||
|
||||
Finally, `bind` places the type we're binding to into
|
||||
the `types` map, but not before it checks that the type
|
||||
we're binding is the same as the string we're binding it to
|
||||
(since, again, `a=a` is not a useful equation).
|
||||
|
||||
We now have a unification algorithm, but we still
|
||||
need to implement our rules. Our rules
|
||||
usually include three things: an environment
|
||||
\\(\\Gamma\\), an expression \\(e\\),
|
||||
and a type \\(\\tau\\). We will
|
||||
represent this as a method on `ast`, which is
|
||||
our representation of an expression \\(e\\). This
|
||||
method will take an environment and return
|
||||
a type.
|
||||
|
||||
But first, how should we implement our environment?
|
||||
Conceptually, an environment maps a name string
|
||||
to a type. So naively, we can implement this simply
|
||||
using an `std::map`. But observe
|
||||
that we only extend the environment in one case so far:
|
||||
a case expression. In a case expression, we have the base
|
||||
envrionment \\(\\Gamma\\), and for each branch,
|
||||
we extend it with the bindings produced by
|
||||
the pattern match. Each branch receives a modified
|
||||
copy of the original environment, one that
|
||||
doesn't see the effects of the other branches.
|
||||
|
||||
Using our naive approach, we'd create a new `std::map` for
|
||||
each branch that's a copy of the original environment,
|
||||
and place into it the new pairs. But this means we'll
|
||||
need to copy a map for each branch of the pattern!
|
||||
|
||||
There's a better way. We structure our environment like
|
||||
a linked list. Each node in the linked list
|
||||
contains an `std::map`. When we encounter a new
|
||||
scope (such as in a case branch), we create a new such node, and add all
|
||||
variables introduced in this scope to that node's map. We make
|
||||
it point to our current environment. Then, we pass around the new node
|
||||
as the environment.
|
||||
|
||||
When we look up a variable name, we first look in this node we created.
|
||||
If we don't find the variable we're looking for, we move on to the next
|
||||
node. The benefit of this is that we won't be re-creating a map
|
||||
for each branch, and just creating a node with the changes.
|
||||
Let's implement exactly that:
|
||||
|
Loading…
Reference in New Issue
Block a user