Write up type code
This commit is contained in:
parent
ac589a8b0a
commit
1820a05fcc
@ -3,18 +3,17 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
std::string type_mgr::new_type_name() {
|
std::string type_mgr::new_type_name() {
|
||||||
std::ostringstream oss;
|
|
||||||
int temp = last_id++;
|
int temp = last_id++;
|
||||||
|
std::string str = "";
|
||||||
|
|
||||||
do {
|
while(temp != -1) {
|
||||||
oss << (char) ('a' + (temp % 26));
|
str += (char) ('a' + (temp % 26));
|
||||||
temp /= 26;
|
temp = temp / 26 - 1;
|
||||||
} while(temp);
|
}
|
||||||
std::string str = oss.str();
|
|
||||||
|
|
||||||
std::reverse(str.begin(), str.end());
|
std::reverse(str.begin(), str.end());
|
||||||
return str;
|
return str;
|
||||||
};
|
}
|
||||||
|
|
||||||
type_ptr type_mgr::new_type() {
|
type_ptr type_mgr::new_type() {
|
||||||
return type_ptr(new type_var(new_type_name()));
|
return type_ptr(new type_var(new_type_name()));
|
||||||
@ -23,3 +22,57 @@ type_ptr type_mgr::new_type() {
|
|||||||
type_ptr type_mgr::new_arrow_type() {
|
type_ptr type_mgr::new_arrow_type() {
|
||||||
return type_ptr(new type_arr(new_type(), new_type()));
|
return type_ptr(new type_arr(new_type(), new_type()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) {
|
||||||
|
type_var* cast;
|
||||||
|
|
||||||
|
var = nullptr;
|
||||||
|
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||||
|
auto it = types.find(cast->name);
|
||||||
|
|
||||||
|
if(it == types.end()) {
|
||||||
|
var = cast;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
t = it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
|
type_var* lvar;
|
||||||
|
type_var* rvar;
|
||||||
|
type_arr* larr;
|
||||||
|
type_arr* rarr;
|
||||||
|
type_base* lid;
|
||||||
|
type_base* rid;
|
||||||
|
|
||||||
|
l = resolve(l, lvar);
|
||||||
|
r = resolve(r, rvar);
|
||||||
|
|
||||||
|
if(lvar) {
|
||||||
|
bind(lvar->name, r);
|
||||||
|
return;
|
||||||
|
} else if(rvar) {
|
||||||
|
bind(rvar->name, l);
|
||||||
|
return;
|
||||||
|
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||||
|
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||||
|
unify(larr->left, rarr->left);
|
||||||
|
unify(larr->right, rarr->right);
|
||||||
|
return;
|
||||||
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
|
if(lid->name == rid->name) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::bind(std::string s, type_ptr t) {
|
||||||
|
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||||
|
|
||||||
|
if(other && other->name == s) return;
|
||||||
|
types[s] = t;
|
||||||
|
}
|
||||||
|
@ -15,11 +15,11 @@ struct type_var : public type {
|
|||||||
: name(std::move(n)) {}
|
: name(std::move(n)) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct type_id : public type {
|
struct type_base : public type {
|
||||||
int id;
|
std::string name;
|
||||||
|
|
||||||
type_id(int i)
|
type_base(std::string n)
|
||||||
: id(i) {}
|
: name(std::move(n)) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct type_arr : public type {
|
struct type_arr : public type {
|
||||||
@ -32,8 +32,13 @@ struct type_arr : public type {
|
|||||||
|
|
||||||
struct type_mgr {
|
struct type_mgr {
|
||||||
int last_id = 0;
|
int last_id = 0;
|
||||||
|
std::map<std::string, type_ptr> types;
|
||||||
|
|
||||||
std::string new_type_name();
|
std::string new_type_name();
|
||||||
type_ptr new_type();
|
type_ptr new_type();
|
||||||
type_ptr new_arrow_type();
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
|
void unify(type_ptr l, type_ptr r);
|
||||||
|
type_ptr resolve(type_ptr t, type_var*& var);
|
||||||
|
void bind(std::string s, type_ptr t);
|
||||||
};
|
};
|
||||||
|
@ -295,4 +295,121 @@ through \\(\\tau\_n\\), then the each variable will have a corresponding type.
|
|||||||
We didn't include lambda expressions in our syntax, and thus we won't need typing rules for them,
|
We didn't include lambda expressions in our syntax, and thus we won't need typing rules for them,
|
||||||
so it actually seems like we're done with the first draft of our type rules.
|
so it actually seems like we're done with the first draft of our type rules.
|
||||||
|
|
||||||
{{< todo >}}Cite 006_hindley_milner on implementation of bind. {{< /todo >}}.
|
#### Implementation
|
||||||
|
Let's work towards some code. Before we write anything down though, let's
|
||||||
|
get a definition of what a "type" is, in the context of our type checker.
|
||||||
|
Let's say a type is one of 3 things:
|
||||||
|
|
||||||
|
1. A "base type", like `Int`, `Bool`, or `List`.
|
||||||
|
2. A type that's a function from one type to another.
|
||||||
|
3. A placeholder / type variable (like the kind we used for type inference).
|
||||||
|
|
||||||
|
We represent a plceholder type with a unique string, such as "a", or "b",
|
||||||
|
and this makes our placeholder type class very similar to the base type class.
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/03/type.hpp" >}}
|
||||||
|
|
||||||
|
As you can see, we also declared a `type_mgr`, or type manager class.
|
||||||
|
This class will keep the state used for generating more placeholder
|
||||||
|
type names, as well as the information about which
|
||||||
|
placeholder type is mapped to what. We gave it 3 methods:
|
||||||
|
|
||||||
|
* `unify`, to perform unification. It will take two types and
|
||||||
|
find values for placeholder variables such that they can
|
||||||
|
equal.
|
||||||
|
* `resolve`, to get to the "bottom" of a chain of equations.
|
||||||
|
For instance, we have placeholder `a` be mapped to a placeholder
|
||||||
|
`b`, an finally, the placeholder `b` to be mapped to `Int`.
|
||||||
|
`resolve` will return for us `Int`, and, if the "bottom"
|
||||||
|
of the chain is a placeholder, it will set `var` to be a pointer
|
||||||
|
to that placeholder.
|
||||||
|
* `bind`, inspired by [this post](http://dev.stephendiehl.com/fun/006_hindley_milner.html),
|
||||||
|
will map a type variable of some name to a type. This function will also check if
|
||||||
|
the thing we're binding to is the same type variable and not do anything in that case,
|
||||||
|
since `a = a` is not a very useful equation to have.
|
||||||
|
|
||||||
|
To fit its original purpose, we also give the manager class the methods
|
||||||
|
`new_type_name`, and two convenience methods to create placeholder types,
|
||||||
|
`new_type` (in the form `a`) and `new_arrow_type` (in the form `a->b`).
|
||||||
|
|
||||||
|
Let's take a look at the implementation now:
|
||||||
|
|
||||||
|
{{< codeblock "C++" "compiler/03/type.cpp" >}}
|
||||||
|
|
||||||
|
Here, `new_type_name` is actually pretty boring. My goal
|
||||||
|
was to generate type names like `a`, then `b`, eventually getting
|
||||||
|
to `z`, and then move on to `aa`. This provides is with an
|
||||||
|
endless stream of placeholder types.
|
||||||
|
|
||||||
|
Time for the interesting functions. `resolve` keeps
|
||||||
|
trying `dynamic_cast` to a type variable, and if that succeeds,
|
||||||
|
then either:
|
||||||
|
|
||||||
|
1. It's a type variable that's already been set
|
||||||
|
to something, in which case we try resolve the thing it was
|
||||||
|
set to (`t = it->second`)
|
||||||
|
2. It's a type variable that hasn't been set to something.
|
||||||
|
We set `var` to it (the caller will use this info),
|
||||||
|
and stop our resolution loop (`break`).
|
||||||
|
|
||||||
|
In `unify`, we start by calling `resolve` - we don't want
|
||||||
|
to accidentally compare `a` and `b` (and try to bind `a` to
|
||||||
|
`b`) when `a` is already bound to something else (like `Int`).
|
||||||
|
|
||||||
|
From resolve, we will have `lvar` and `rvar` set to
|
||||||
|
something not NULL if `l` or `r` were type variables
|
||||||
|
that haven't been yet mapped (we defined `resolve` to behave this way).
|
||||||
|
So, if one of the variables is not NULL, we try to bind it.
|
||||||
|
|
||||||
|
Next, `unify` checks if both types are either base types or
|
||||||
|
arrow types. If they're base types, it compares their names,
|
||||||
|
and if they're arrow types, it recursively unifies their children.
|
||||||
|
We return in all cases when unification succeeds, and then throw
|
||||||
|
an exception (currently 0) if all the cases fell thorugh, and thus,
|
||||||
|
unification failed.
|
||||||
|
|
||||||
|
Finally, `bind` places the type we're binding to into
|
||||||
|
the `types` map, but not before it checks that the type
|
||||||
|
we're binding is the same as the string we're binding it to
|
||||||
|
(since, again, `a=a` is not a useful equation).
|
||||||
|
|
||||||
|
We now have a unification algorithm, but we still
|
||||||
|
need to implement our rules. Our rules
|
||||||
|
usually include three things: an environment
|
||||||
|
\\(\\Gamma\\), an expression \\(e\\),
|
||||||
|
and a type \\(\\tau\\). We will
|
||||||
|
represent this as a method on `ast`, which is
|
||||||
|
our representation of an expression \\(e\\). This
|
||||||
|
method will take an environment and return
|
||||||
|
a type.
|
||||||
|
|
||||||
|
But first, how should we implement our environment?
|
||||||
|
Conceptually, an environment maps a name string
|
||||||
|
to a type. So naively, we can implement this simply
|
||||||
|
using an `std::map`. But observe
|
||||||
|
that we only extend the environment in one case so far:
|
||||||
|
a case expression. In a case expression, we have the base
|
||||||
|
envrionment \\(\\Gamma\\), and for each branch,
|
||||||
|
we extend it with the bindings produced by
|
||||||
|
the pattern match. Each branch receives a modified
|
||||||
|
copy of the original environment, one that
|
||||||
|
doesn't see the effects of the other branches.
|
||||||
|
|
||||||
|
Using our naive approach, we'd create a new `std::map` for
|
||||||
|
each branch that's a copy of the original environment,
|
||||||
|
and place into it the new pairs. But this means we'll
|
||||||
|
need to copy a map for each branch of the pattern!
|
||||||
|
|
||||||
|
There's a better way. We structure our environment like
|
||||||
|
a linked list. Each node in the linked list
|
||||||
|
contains an `std::map`. When we encounter a new
|
||||||
|
scope (such as in a case branch), we create a new such node, and add all
|
||||||
|
variables introduced in this scope to that node's map. We make
|
||||||
|
it point to our current environment. Then, we pass around the new node
|
||||||
|
as the environment.
|
||||||
|
|
||||||
|
When we look up a variable name, we first look in this node we created.
|
||||||
|
If we don't find the variable we're looking for, we move on to the next
|
||||||
|
node. The benefit of this is that we won't be re-creating a map
|
||||||
|
for each branch, and just creating a node with the changes.
|
||||||
|
Let's implement exactly that:
|
||||||
|
Loading…
Reference in New Issue
Block a user