Compare commits
No commits in common. "1f00b6a3f87f6adafe26e926ffaded54bba29b74" and "2255543d94bc3ddf388ca6365ca315bce78e2f56" have entirely different histories.
1f00b6a3f8
...
2255543d94
|
@ -1,32 +0,0 @@
|
||||||
data List a = { Nil, Cons a (List a) }
|
|
||||||
|
|
||||||
defn map f l = {
|
|
||||||
case l of {
|
|
||||||
Nil -> { Nil }
|
|
||||||
Cons x xs -> { Cons (f x) (map f xs) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
defn foldl f b l = {
|
|
||||||
case l of {
|
|
||||||
Nil -> { b }
|
|
||||||
Cons x xs -> { foldl f (f b x) xs }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
defn foldr f b l = {
|
|
||||||
case l of {
|
|
||||||
Nil -> { b }
|
|
||||||
Cons x xs -> { f x (foldr f b xs) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
defn list = { Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))) }
|
|
||||||
|
|
||||||
defn add x y = { x + y }
|
|
||||||
defn sum l = { foldr add 0 l }
|
|
||||||
|
|
||||||
defn skipAdd x y = { y + 1 }
|
|
||||||
defn length l = { foldr skipAdd 0 l }
|
|
||||||
|
|
||||||
defn main = { sum list + length list }
|
|
|
@ -1,17 +0,0 @@
|
||||||
data Pair a b = { MkPair a b }
|
|
||||||
|
|
||||||
defn fst p = {
|
|
||||||
case p of {
|
|
||||||
MkPair a b -> { a }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
defn snd p = {
|
|
||||||
case p of {
|
|
||||||
MkPair a b -> { b }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
defn pair = { MkPair 1 (MkPair 2 3) }
|
|
||||||
|
|
||||||
defn main = { fst pair + snd (snd pair) }
|
|
|
@ -1,4 +1,4 @@
|
||||||
data List a = { Nil, Cons a (List a) }
|
data List = { Nil, Cons Int List }
|
||||||
data Bool = { True, False }
|
data Bool = { True, False }
|
||||||
defn length l = {
|
defn length l = {
|
||||||
case l of {
|
case l of {
|
||||||
|
@ -6,4 +6,4 @@ defn length l = {
|
||||||
Cons x xs -> { 1 + length xs }
|
Cons x xs -> { 1 + length xs }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) + length (Cons True (Cons False (Cons True Nil))) }
|
defn main = { length (Cons True (Cons False (Cons True Nil))) }
|
||||||
|
|
|
@ -41,7 +41,7 @@ extern yy::parser::symbol_type yylex();
|
||||||
%type <std::vector<branch_ptr>> branches
|
%type <std::vector<branch_ptr>> branches
|
||||||
%type <std::vector<constructor_ptr>> constructors
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
%type <std::vector<parsed_type_ptr>> typeList
|
%type <std::vector<parsed_type_ptr>> typeList
|
||||||
%type <parsed_type_ptr> type nonArrowType typeListElement
|
%type <parsed_type_ptr> type nullaryType typeListElement
|
||||||
%type <ast_ptr> aAdd aMul case app appBase
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
%type <definition_data_ptr> data
|
%type <definition_data_ptr> data
|
||||||
%type <definition_defn_ptr> defn
|
%type <definition_defn_ptr> defn
|
||||||
|
@ -141,11 +141,11 @@ constructor
|
||||||
;
|
;
|
||||||
|
|
||||||
type
|
type
|
||||||
: nonArrowType ARROW type { $$ = parsed_type_ptr(new parsed_type_arr(std::move($1), std::move($3))); }
|
: nullaryType ARROW type { $$ = parsed_type_ptr(new parsed_type_arr(std::move($1), std::move($3))); }
|
||||||
| nonArrowType { $$ = std::move($1); }
|
| nullaryType { $$ = std::move($1); }
|
||||||
;
|
;
|
||||||
|
|
||||||
nonArrowType
|
nullaryType
|
||||||
: UID typeList { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), std::move($2))); }
|
: UID typeList { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), std::move($2))); }
|
||||||
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
| LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
|
||||||
| OPAREN type CPAREN { $$ = std::move($2); }
|
| OPAREN type CPAREN { $$ = std::move($2); }
|
||||||
|
|
|
@ -114,7 +114,7 @@ void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
return;
|
return;
|
||||||
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
(rid = dynamic_cast<type_base*>(r.get()))) {
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
if(lid->name == rid->name && lid->arity == rid->arity) return;
|
if(lid->name == rid->name) return;
|
||||||
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
} else if((lapp = dynamic_cast<type_app*>(l.get())) &&
|
||||||
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
(rapp = dynamic_cast<type_app*>(r.get()))) {
|
||||||
unify(lapp->constructor, rapp->constructor);
|
unify(lapp->constructor, rapp->constructor);
|
||||||
|
|
|
@ -142,4 +142,3 @@ Here are the posts that I've written so far for this series:
|
||||||
* [LLVM]({{< relref "08_compiler_llvm.md" >}})
|
* [LLVM]({{< relref "08_compiler_llvm.md" >}})
|
||||||
* [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})
|
* [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})
|
||||||
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
|
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
|
||||||
* [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}})
|
|
||||||
|
|
|
@ -777,6 +777,6 @@ While this is a major success, we are not yet done. Although our functions can n
|
||||||
have polymorphic types, the same cannot be said for our data types! We want to
|
have polymorphic types, the same cannot be said for our data types! We want to
|
||||||
have lists of integers __and__ lists of booleans, without having to duplicate any code!
|
have lists of integers __and__ lists of booleans, without having to duplicate any code!
|
||||||
While this also falls into the category of polymorphism, this post has already gotten very long,
|
While this also falls into the category of polymorphism, this post has already gotten very long,
|
||||||
and we will return to it in [part 11]({{< relref "11_compiler_polymorphic_data_types.md" >}}). Once we're done with that, I still intend
|
and we will return to it in the near future. Once we're done with that, I still intend
|
||||||
to go over `let/in` expressions, __lambda functions__, and __Input/Output__ together with
|
to go over `let/in` expressions, __lambda functions__, and __Input/Output__ together with
|
||||||
__strings__.
|
__strings__. See you in these future posts!
|
||||||
|
|
|
@ -185,32 +185,24 @@ D & \rightarrow \text{upperVar} \; L_Y \\
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
{{< /latex >}}
|
{{< /latex >}}
|
||||||
|
|
||||||
Those are all the changes we have to make to our grammar. Let's now move on to implementing
|
Now that we have a grammar for all these things, we have to implement
|
||||||
the corresponding data structures. We define a new family of structs, which represent types as they are
|
the corresponding data structures. We define a new family of structs,
|
||||||
|
extending `parsed_type`, which represent types as they are
|
||||||
received from the parser. These differ from regular types in that they
|
received from the parser. These differ from regular types in that they
|
||||||
do not necessarily represent valid types; validating types requires two passes, whereas parsing is
|
do not require that the types they represent are valid; validating
|
||||||
done in a single pass. We can define our parsed types as follows:
|
types requires two passes, which is a luxury we do not have when
|
||||||
|
parsing. We can define them as follows:
|
||||||
|
|
||||||
{{< codeblock "C++" "compiler/11/parsed_type.hpp" >}}
|
{{< codeblock "C++" "compiler/11/parsed_type.hpp" >}}
|
||||||
|
|
||||||
We define the conversion method `to_type`, which requires
|
We define the conversion function `to_type`, which requires
|
||||||
a set of type variables that are allowed to occur within a parsed
|
a set of type variables quantified in the given type, and
|
||||||
type (which are the variables specified on the left of the `=`
|
the environment in which to look up the arities of various
|
||||||
in the data type declaration syntax), and the environment in which to
|
type constructors. The implementation is as follows:
|
||||||
look up the arities of any type constructors. The implementation is as follows:
|
|
||||||
|
|
||||||
{{< codeblock "C++" "compiler/11/parsed_type.cpp" >}}
|
{{< codeblock "C++" "compiler/11/parsed_type.cpp" >}}
|
||||||
|
|
||||||
Note that this definition requires a new `type` subclass, `type_app`, which
|
With this definition in hand, we can now update the grammar in our Bison file.
|
||||||
represents type application. Unlike `parsed_type_app`, it stores a pointer
|
|
||||||
to the type constructor being applied, rather than its name. This
|
|
||||||
helps validate the type (by making sure the parsed type's name refers to
|
|
||||||
an existing type constructor), and lets us gather information like
|
|
||||||
which constructors the resulting type has. We define this new type as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/type.hpp" 70 78 >}}
|
|
||||||
|
|
||||||
With our new data structures in hand, we can now update the grammar in our Bison file.
|
|
||||||
First things first, we'll add the type parameters to the data type definition:
|
First things first, we'll add the type parameters to the data type definition:
|
||||||
|
|
||||||
{{< codelines "plaintext" "compiler/11/parser.y" 127 130 >}}
|
{{< codelines "plaintext" "compiler/11/parser.y" 127 130 >}}
|
||||||
|
@ -219,179 +211,10 @@ Next, we add the new grammar rules we came up with:
|
||||||
|
|
||||||
{{< codelines "plaintext" "compiler/11/parser.y" 138 163 >}}
|
{{< codelines "plaintext" "compiler/11/parser.y" 138 163 >}}
|
||||||
|
|
||||||
Note in the above rules that even for `typeListElement`, which
|
|
||||||
can never be applied to any arguments, we still attach a `parsed_type_app`
|
|
||||||
as the semantic value. This is for consistency; it's easier to view
|
|
||||||
all types in our system as applications to zero or more arguments,
|
|
||||||
than to write coercions from non-applied types to types applied to zero
|
|
||||||
arguments.
|
|
||||||
|
|
||||||
Finally, we define the types for these new rules at the top of the file:
|
Finally, we define the types for these new rules at the top of the file:
|
||||||
|
|
||||||
{{< codelines "plaintext" "compiler/11/parser.y" 43 44 >}}
|
{{< codelines "plaintext" "compiler/11/parser.y" 43 44 >}}
|
||||||
|
|
||||||
This concludes our work on the parser, but opens up a whole can of worms
|
{{< todo >}}
|
||||||
elsewhere. First of all, now that we introduced a new `type` subclass, we must
|
Nullary is not the right word.
|
||||||
ensure that type unification still works as intended. We therefore have
|
{{< /todo >}}
|
||||||
to adjust the `type_mgr::unify` method:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/type.cpp" 95 132 >}}
|
|
||||||
|
|
||||||
In the above snippet, we add a new if-statement that checks whether or
|
|
||||||
not both types being unified are type applications, and if so, unifies
|
|
||||||
their constructors and arguments. We also extend our type equality check
|
|
||||||
to ensure that both the names _and_ arities of types match
|
|
||||||
{{< sidenote "right" "type-equality-note" "when they are compared for equality." >}}
|
|
||||||
This is actually a pretty silly measure. Consider the following three
|
|
||||||
propositions:
|
|
||||||
1) types are only declared at the top-level scope.
|
|
||||||
2) if a type is introduced, and another type with that name already exists, we throw an error.
|
|
||||||
3) for name equality to be insufficient, we need to have two declared types
|
|
||||||
with the same name. Given these propositions, it will not be possible for us to
|
|
||||||
declare two types that would confuse the name equality check. However,
|
|
||||||
in the near future, these propositions may not all hold: if we allow
|
|
||||||
<code>let/in</code> expressions to contain data type definitions,
|
|
||||||
it will be possible to declare two types with the same name and arity
|
|
||||||
(in different scopes), which would <em>still</em> confuse the check.
|
|
||||||
In the future, if this becomes an issue, we will likely move to unique
|
|
||||||
type identifiers.
|
|
||||||
{{< /sidenote >}} Note also the more basic fact that we added arity
|
|
||||||
to our `type_base`,
|
|
||||||
{{< sidenote "left" "base-arity-note" "since it may now be a type constructor instead." >}}
|
|
||||||
You may be wondering, why did we add arity to base types, rather than data types?
|
|
||||||
Although so far, our language can only create type constructors from data type definitions,
|
|
||||||
it's possible (or even likely) that we will have
|
|
||||||
polymorphic built-in types, such as
|
|
||||||
<a href="https://www.haskell.org/tutorial/io.html">the IO monad</a>.
|
|
||||||
To prepare for this, we will allow our base types to be type constructors too.
|
|
||||||
{{< /sidenote >}}
|
|
||||||
|
|
||||||
Jut as we change `type_mgr::unify`, we need to change `type_mgr::find_free`
|
|
||||||
to include the new case of `type_app`. The adjusted function looks as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/type.cpp" 174 187 >}}
|
|
||||||
|
|
||||||
There another adjustment that we have to make to our type code. Recall
|
|
||||||
that we had code that implemented substitutions: replacing free variables
|
|
||||||
with other types to properly implement our type schemes. There
|
|
||||||
was a bug in that code, which becomes much more apparent when the substitution
|
|
||||||
system is put under more pressure. Specifically, the bug was in how type
|
|
||||||
variables were handled.
|
|
||||||
|
|
||||||
The old substitution code, when it found that a type
|
|
||||||
variable had been bound to another type, always moved on to perform
|
|
||||||
a substitution in that other type. This wasn't really a problem then, since
|
|
||||||
any type variables that needed to be substituted were guaranteed to be
|
|
||||||
free (that's why they were put into the "forall" quantifier). However, with our
|
|
||||||
new system, we are using user-provided type variables (usually `a`, `b`, and so on),
|
|
||||||
which have likely already been used by our compiler internally, and thus have
|
|
||||||
been bound to something. That something is irrelevant to us: when we
|
|
||||||
perform a substitution on a user-defined data type, we _know_ that _our_ `a` is
|
|
||||||
free, and should be substitited. In short, precedence should be given to
|
|
||||||
substituting type variables, rather than resolving them to what they are bound to.
|
|
||||||
|
|
||||||
To make this adjustment possible, we need to make `substitute` a method of `type_manager`,
|
|
||||||
since it will now require an awareness of existing type bindings. Additionally,
|
|
||||||
this method will now perform its own type resolution, checking if a type variable
|
|
||||||
needs to be substitited between each step. The whole code is as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/type.cpp" 134 165 >}}
|
|
||||||
|
|
||||||
That's all for types. Definitions, though, need some work. First of all,
|
|
||||||
we've changed our parser to feed our `constructor` type a vector of
|
|
||||||
`parsed_type_ptr`, rather than `std::string`. We therefore have to update
|
|
||||||
`constructor` to receive and store this new vector:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/definition.hpp" 13 20 >}}
|
|
||||||
|
|
||||||
Similarly, `definition_data` itself needs to accept the list of type
|
|
||||||
variables it has:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/definition.hpp" 54 70 >}}
|
|
||||||
|
|
||||||
We then look at `definition_data::insert_constructors`, which converts
|
|
||||||
`constructor` instances to actual constructor functions. The code,
|
|
||||||
which is getting pretty complciated, is as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/definition.cpp" 64 92 >}}
|
|
||||||
|
|
||||||
In the above snippet, we do the following things:
|
|
||||||
|
|
||||||
1. We first create a set of type variables that can occur
|
|
||||||
in this type's constructors (the same set that's used
|
|
||||||
by the `to_type` method we saw earlier). While doing this, we ensure
|
|
||||||
a type variable is not used twice (this is not allowed), and add each
|
|
||||||
type variable to the final return type (which is something like `List a`),
|
|
||||||
in the order they occur.
|
|
||||||
2. When the variables have been gathered into a set, we iterate
|
|
||||||
over all constructors, and convert them into types by calling `to_type`
|
|
||||||
on their arguments, and assemble the resulting argument types into a function.
|
|
||||||
This is not enough, however,
|
|
||||||
{{< sidenote "right" "type-variables-note" "since constructors of types that accept type variables are polymorphic," >}}
|
|
||||||
This is also not enough because without generalization using "forall", we are risking using type variables
|
|
||||||
that have already been bound, or that will be bound. Even if <code>a</code> has not yet been used by the typechecker,
|
|
||||||
it will be once the type manager generates its first type variable, and things will go south. If we, for some reason,
|
|
||||||
wanted type constructors to be monomorphic (but generic, with type variables) we'd need to internally
|
|
||||||
instnatiate fresh type variables for every user-defined type variable, and substitute them appropriately.
|
|
||||||
{{< /sidenote >}}
|
|
||||||
as we have discussed above with \\(\\text{Nil}\\) and \\(\\text{Cons}\\).
|
|
||||||
To accomodate for this, we also add all type variables we've used to the "forall" quantifier
|
|
||||||
of a new type scheme, whose monotype is the result of our calls to `to_type`.
|
|
||||||
|
|
||||||
This is the last major change we have to perform. The rest is cleanup: we have switched
|
|
||||||
our system to dealing with type applications (sometimes with zero arguments), and we must
|
|
||||||
bring the rest of the compiler up to speed with this change. For instance, we update
|
|
||||||
`ast_int` to create a reference to an existing integer type during typechecking:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/ast.cpp" 20 22 >}}
|
|
||||||
|
|
||||||
Similarly, we update our code in `typecheck_program` to use
|
|
||||||
type applications in the type for binary operations:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/main.cpp" 31 37 >}}
|
|
||||||
|
|
||||||
Finally, we update `ast_case` to unwrap type applications to get the needed constructor
|
|
||||||
data from `type_data`. This has to be done in `ast_case::typecheck`, as follows:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/ast.cpp" 163 168 >}}
|
|
||||||
|
|
||||||
Additionally, a similar change needs to be made in `ast_case::compile`:
|
|
||||||
|
|
||||||
{{< codelines "C++" "compiler/11/ast.cpp" 174 175 >}}
|
|
||||||
|
|
||||||
That should be all! Let's try an example:
|
|
||||||
|
|
||||||
{{< rawblock "compiler/11/examples/works3.txt" >}}
|
|
||||||
|
|
||||||
The output:
|
|
||||||
|
|
||||||
```
|
|
||||||
Result: 6
|
|
||||||
```
|
|
||||||
|
|
||||||
Yay! Not only were we able to define a list of any type, but our `length` function correctly
|
|
||||||
determined the lengths of two lists of different types! Let's try an example with the
|
|
||||||
classic [`fold` functions](http://learnyouahaskell.com/higher-order-functions#folds):
|
|
||||||
|
|
||||||
{{< rawblock "compiler/11/examples/list.txt" >}}
|
|
||||||
|
|
||||||
We expect the sum of the list `[1,2,3,4]` to be `10`, and its length to be `4`, so the sum
|
|
||||||
of the two should be `14`. And indeed, our program agrees:
|
|
||||||
|
|
||||||
```
|
|
||||||
Result: 14
|
|
||||||
```
|
|
||||||
|
|
||||||
Let's do one more example, to test types that take more than one type parameter:
|
|
||||||
|
|
||||||
{{< rawblock "compiler/11/examples/pair.txt" >}}
|
|
||||||
|
|
||||||
Once again, the compiled program gives the expected result:
|
|
||||||
|
|
||||||
```
|
|
||||||
Result: 4
|
|
||||||
```
|
|
||||||
|
|
||||||
This looks good! We have added support for polymorphic data types to our compiler.
|
|
||||||
We are now free to move on to `let/in` expressions, __lambda functions__, and __Input/Output__,
|
|
||||||
as promised! I'll see you then!
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user