8 changed files with 23 additions and 250 deletions
--- a/code/compiler/11/examples/list.txt
+++ b/code/compiler/11/examples/list.txt
@ -1,32 +0,0 @@
 data List a = { Nil, Cons a (List a) }
 defn map f l = {
    case l of {
        Nil -> { Nil }
        Cons x xs -> { Cons (f x) (map f xs) }
    }
 }
 defn foldl f b l = {
    case l of {
        Nil -> { b }
        Cons x xs -> { foldl f (f b x) xs }
    }
 }
 defn foldr f b l = {
    case l of {
        Nil -> { b }
        Cons x xs -> { f x (foldr f b xs) }
    }
 }
 defn list = { Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))) }
 defn add x y = { x + y }
 defn sum l = { foldr add 0 l }
 defn skipAdd x y = { y + 1 }
 defn length l = { foldr skipAdd 0 l }
 defn main = { sum list + length list }
--- a/code/compiler/11/examples/pair.txt
+++ b/code/compiler/11/examples/pair.txt
@ -1,17 +0,0 @@
 data Pair a b = { MkPair a b }
 defn fst p = {
    case p of {
        MkPair a b -> { a }
    }
 }
 defn snd p = {
    case p of {
        MkPair a b -> { b }
    }
 }
 defn pair = { MkPair 1 (MkPair 2 3) }
 defn main = { fst pair + snd (snd pair) }
--- a/code/compiler/11/examples/works3.txt
+++ b/code/compiler/11/examples/works3.txt
@ -1,4 +1,4 @@
-data List a = { Nil, Cons a (List a) }
+data List = { Nil, Cons Int List }
 data Bool = { True, False }
 defn length l = {
    case l of {
@ -6,4 +6,4 @@ defn length l = {
        Cons x xs -> { 1 + length xs }
    }
 }
-defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) + length (Cons True (Cons False (Cons True Nil))) }
+defn main = { length (Cons True (Cons False (Cons True Nil))) }
--- a/code/compiler/11/parser.y
+++ b/code/compiler/11/parser.y
@ -41,7 +41,7 @@ extern yy::parser::symbol_type yylex();
 %type <std::vector<branch_ptr>> branches
 %type <std::vector<constructor_ptr>> constructors
 %type <std::vector<parsed_type_ptr>> typeList
-%type <parsed_type_ptr> type nonArrowType typeListElement
+%type <parsed_type_ptr> type nullaryType typeListElement
 %type <ast_ptr> aAdd aMul case app appBase
 %type <definition_data_ptr> data 
 %type <definition_defn_ptr> defn
@ -141,11 +141,11 @@ constructor
    ;
 type
-    : nonArrowType ARROW type { $$ = parsed_type_ptr(new parsed_type_arr(std::move($1), std::move($3))); }
+    : nullaryType ARROW type { $$ = parsed_type_ptr(new parsed_type_arr(std::move($1), std::move($3))); }
-    | nonArrowType { $$ = std::move($1); }
+    | nullaryType { $$ = std::move($1); }
    ;
-nonArrowType
+nullaryType
    : UID typeList { $$ = parsed_type_ptr(new parsed_type_app(std::move($1), std::move($2))); }
    | LID { $$ = parsed_type_ptr(new parsed_type_var(std::move($1))); }
    | OPAREN type CPAREN { $$ = std::move($2); }
--- a/code/compiler/11/type.cpp
+++ b/code/compiler/11/type.cpp
@ -114,7 +114,7 @@ void type_mgr::unify(type_ptr l, type_ptr r) {
        return;
    } else if((lid = dynamic_cast<type_base*>(l.get())) &&
            (rid = dynamic_cast<type_base*>(r.get()))) {
-        if(lid->name == rid->name && lid->arity == rid->arity) return;
+        if(lid->name == rid->name) return;
    } else if((lapp = dynamic_cast<type_app*>(l.get())) &&
            (rapp = dynamic_cast<type_app*>(r.get()))) {
        unify(lapp->constructor, rapp->constructor);
--- a/content/blog/00_compiler_intro.md
+++ b/content/blog/00_compiler_intro.md
@ -142,4 +142,3 @@ Here are the posts that I've written so far for this series:
 * [LLVM]({{< relref "08_compiler_llvm.md" >}})
 * [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})
 * [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
 * [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}})
--- a/content/blog/10_compiler_polymorphism.md
+++ b/content/blog/10_compiler_polymorphism.md
@ -777,6 +777,6 @@ While this is a major success, we are not yet done. Although our functions can n
 have polymorphic types, the same cannot be said for our data types! We want to
 have lists of integers __and__ lists of booleans, without having to duplicate any code!
 While this also falls into the category of polymorphism, this post has already gotten very long,
-and we will return to it in [part 11]({{< relref "11_compiler_polymorphic_data_types.md" >}}). Once we're done with that, I still intend
+and we will return to it in the near future. Once we're done with that, I still intend
 to go over `let/in` expressions, __lambda functions__, and __Input/Output__ together with
-__strings__. 
+__strings__. See you in these future posts!
--- a/content/blog/11_compiler_polymorphic_data_types.md
+++ b/content/blog/11_compiler_polymorphic_data_types.md
@ -185,32 +185,24 @@ D & \rightarrow \text{upperVar} \; L_Y \\
 \end{aligned}
 {{< /latex >}}
-Those are all the changes we have to make to our grammar. Let's now move on to implementing
+Now that we have a grammar for all these things, we have to implement
-the corresponding data structures. We define a new family of structs, which represent types as they are
+the corresponding data structures. We define a new family of structs,
 extending `parsed_type`, which represent types as they are
 received from the parser. These differ from regular types in that they
-do not necessarily represent valid types; validating types requires two passes, whereas parsing is
+do not require that the types they represent are valid; validating
-done in a single pass. We can define our parsed types as follows:
+types requires two passes, which is a luxury we do not have when
 parsing. We can define them as follows:
 {{< codeblock "C++" "compiler/11/parsed_type.hpp" >}}
-We define the conversion method `to_type`, which requires
+We define the conversion function `to_type`, which requires
-a set of type variables that are allowed to occur within a parsed
+a set of type variables quantified in the given type, and
-type (which are the variables specified on the left of the `=`
+the environment in which to look up the arities of various
-in the data type declaration syntax), and the environment in which to
+type constructors. The implementation is as follows:
 look up the arities of any type constructors. The implementation is as follows:
 {{< codeblock "C++" "compiler/11/parsed_type.cpp" >}}
-Note that this definition requires a new `type` subclass, `type_app`, which
+With this definition in hand, we can now update the grammar in our Bison file.
 represents type application. Unlike `parsed_type_app`, it stores a pointer
 to the type constructor being applied, rather than its name. This
 helps validate the type (by making sure the parsed type's name refers to
 an existing type constructor), and lets us gather information like
 which constructors the resulting type has. We define this new type as follows:
 {{< codelines "C++" "compiler/11/type.hpp" 70 78 >}}
 With our new data structures in hand, we can now update the grammar in our Bison file.
 First things first, we'll add the type parameters to the data type definition:
 {{< codelines "plaintext" "compiler/11/parser.y" 127 130 >}}
@ -219,179 +211,10 @@ Next, we add the new grammar rules we came up with:
 {{< codelines "plaintext" "compiler/11/parser.y" 138 163 >}}
 Note in the above rules that even for `typeListElement`, which
 can never be applied to any arguments, we still attach a `parsed_type_app`
 as the semantic value. This is for consistency; it's easier to view
 all types in our system as applications to zero or more arguments,
 than to write coercions from non-applied types to types applied to zero
 arguments.
 Finally, we define the types for these new rules at the top of the file:
 {{< codelines "plaintext" "compiler/11/parser.y" 43 44 >}}
-This concludes our work on the parser, but opens up a whole can of worms
+{{< todo >}}
-elsewhere. First of all, now that we introduced a new `type` subclass, we must
+Nullary is not the right word.
-ensure that type unification still works as intended. We therefore have
+{{< /todo >}}
 to adjust the `type_mgr::unify` method:
 {{< codelines "C++" "compiler/11/type.cpp" 95 132 >}}
 In the above snippet, we add a new if-statement that checks whether or
 not both types being unified are type applications, and if so, unifies
 their constructors and arguments. We also extend our type equality check
 to ensure that both the names _and_ arities of types match
 {{< sidenote "right" "type-equality-note" "when they are compared for equality." >}}
 This is actually a pretty silly measure. Consider the following three
 propositions:
 1) types are only declared at the top-level scope.
 2) if a type is introduced, and another type with that name already exists, we throw an error.
 3) for name equality to be insufficient, we need to have two declared types
 with the same name. Given these propositions, it will not be possible for us to
 declare two types that would confuse the name equality check. However,
 in the near future, these propositions may not all hold: if we allow
 <code>let/in</code> expressions to contain data type definitions,
 it will be possible to declare two types with the same name and arity
 (in different scopes), which would <em>still</em> confuse the check.
 In the future, if this becomes an issue, we will likely move to unique
 type identifiers.
 {{< /sidenote >}} Note also the more basic fact that we added arity
 to our `type_base`,
 {{< sidenote "left" "base-arity-note" "since it may now be a type constructor instead." >}}
 You may be wondering, why did we add arity to base types, rather than data types?
 Although so far, our language can only create type constructors from data type definitions,
 it's possible (or even likely) that we will have
 polymorphic built-in types, such as
 <a href="https://www.haskell.org/tutorial/io.html">the IO monad</a>.
 To prepare for this, we will allow our base types to be type constructors too.
 {{< /sidenote >}}
 Jut as we change `type_mgr::unify`, we need to change `type_mgr::find_free`
 to include the new case of `type_app`. The adjusted function looks as follows:
 {{< codelines "C++" "compiler/11/type.cpp" 174 187 >}}
 There another adjustment that we have to make to our type code. Recall
 that we had code that implemented substitutions: replacing free variables
 with other types to properly implement our type schemes. There
 was a bug in that code, which becomes much more apparent when the substitution
 system is put under more pressure. Specifically, the bug was in how type
 variables were handled.
 The old substitution code, when it found that a type
 variable had been bound to another type, always moved on to perform
 a substitution in that other type. This wasn't really a problem then, since
 any type variables that needed to be substituted were guaranteed to be
 free (that's why they were put into the "forall" quantifier). However, with our
 new system, we are using user-provided type variables (usually `a`, `b`, and so on),
 which have likely already been used by our compiler internally, and thus have
 been bound to something. That something is irrelevant to us: when we
 perform a substitution on a user-defined data type, we _know_ that _our_ `a` is
 free, and should be substitited. In short, precedence should be given to
 substituting type variables, rather than resolving them to what they are bound to.
 To make this adjustment possible, we need to make `substitute` a method of `type_manager`,
 since it will now require an awareness of existing type bindings. Additionally,
 this method will now perform its own type resolution, checking if a type variable
 needs to be substitited between each step. The whole code is as follows:
 {{< codelines "C++" "compiler/11/type.cpp" 134 165 >}}
 That's all for types. Definitions, though, need some work. First of all,
 we've changed our parser to feed our `constructor` type a vector of
 `parsed_type_ptr`, rather than `std::string`. We therefore have to update
 `constructor` to receive and store this new vector:
 {{< codelines "C++" "compiler/11/definition.hpp" 13 20 >}}
 Similarly, `definition_data` itself needs to accept the list of type
 variables it has:
 {{< codelines "C++" "compiler/11/definition.hpp" 54 70 >}}
 We then look at `definition_data::insert_constructors`, which converts
 `constructor` instances to actual constructor functions. The code,
 which is getting pretty complciated, is as follows:
 {{< codelines "C++" "compiler/11/definition.cpp" 64 92 >}}
 In the above snippet, we do the following things:
 1. We first create a set of type variables that can occur
 in this type's constructors (the same set that's used
 by the `to_type` method we saw earlier). While doing this, we ensure
 a type variable is not used twice (this is not allowed), and add each
 type variable to the final return type (which is something like `List a`),
 in the order they occur.
 2. When the variables have been gathered into a set, we iterate
 over all constructors, and convert them into types by calling `to_type`
 on their arguments, and assemble the resulting argument types into a function.
 This is not enough, however, 
 {{< sidenote "right" "type-variables-note" "since constructors of types that accept type variables are polymorphic," >}}
 This is also not enough because without generalization using "forall", we are risking using type variables
 that have already been bound, or that will be bound. Even if <code>a</code> has not yet been used by the typechecker,
 it will be once the type manager generates its first type variable, and things will go south. If we, for some reason,
 wanted type constructors to be monomorphic (but generic, with type variables) we'd need to internally
 instnatiate fresh type variables for every user-defined type variable, and substitute them appropriately.
 {{< /sidenote >}} 
 as we have discussed above with \\(\\text{Nil}\\) and \\(\\text{Cons}\\).
 To accomodate for this, we also add all type variables we've used to the "forall" quantifier
 of a new type scheme, whose monotype is the result of our calls to `to_type`.
 This is the last major change we have to perform. The rest is cleanup: we have switched
 our system to dealing with type applications (sometimes with zero arguments), and we must
 bring the rest of the compiler up to speed with this change. For instance, we update
 `ast_int` to create a reference to an existing integer type during typechecking:
 {{< codelines "C++" "compiler/11/ast.cpp" 20 22 >}}
 Similarly, we update our code in `typecheck_program` to use
 type applications in the type for binary operations:
 {{< codelines "C++" "compiler/11/main.cpp" 31 37 >}}
 Finally, we update `ast_case` to unwrap type applications to get the needed constructor
 data from `type_data`. This has to be done in `ast_case::typecheck`, as follows:
 {{< codelines "C++" "compiler/11/ast.cpp" 163 168 >}}
 Additionally, a similar change needs to be made in `ast_case::compile`:
 {{< codelines "C++" "compiler/11/ast.cpp" 174 175 >}}
 That should be all! Let's try an example:
 {{< rawblock "compiler/11/examples/works3.txt" >}}
 The output:
 ```
 Result: 6
 ```
 Yay! Not only were we able to define a list of any type, but our `length` function correctly
 determined the lengths of two lists of different types! Let's try an example with the
 classic [`fold` functions](http://learnyouahaskell.com/higher-order-functions#folds):
 {{< rawblock "compiler/11/examples/list.txt" >}}
 We expect the sum of the list `[1,2,3,4]` to be `10`, and its length to be `4`, so the sum
 of the two should be `14`. And indeed, our program agrees:
 ```
 Result: 14
 ```
 Let's do one more example, to test types that take more than one type parameter:
 {{< rawblock "compiler/11/examples/pair.txt" >}}
 Once again, the compiled program gives the expected result:
 ```
 Result: 4
 ```
 This looks good! We have added support for polymorphic data types to our compiler.
 We are now free to move on to `let/in` expressions, __lambda functions__, and __Input/Output__,
 as promised! I'll see you then!