Finalize draft of polymorphism post

2020-03-25 03:22:21 -07:00
parent 2a12f7f31e
commit 577e0ad930
9 changed files with 402 additions and 136 deletions
--- a/code/compiler/10/CMakeLists.txt
+++ b/code/compiler/10/CMakeLists.txt
@@ -29,6 +29,7 @@ add_executable(compiler
    error.cpp error.hpp
    binop.cpp binop.hpp
    instruction.cpp instruction.hpp
    graph.cpp graph.hpp
    ${BISON_parser_OUTPUTS}
    ${FLEX_scanner_OUTPUTS}
    main.cpp
--- a/code/compiler/10/ast.cpp
+++ b/code/compiler/10/ast.cpp
@@ -1,7 +1,9 @@
 #include "ast.hpp"
 #include <ostream>
 #include <iostream>
 #include "binop.hpp"
 #include "error.hpp"
 #include "type.hpp"
 #include "type_env.hpp"
 static void print_indent(int n, std::ostream& to) {
@@ -162,6 +164,10 @@ type_ptr ast_case::typecheck(type_mgr& mgr) {
    input_type = mgr.resolve(case_type, var);
    if(!dynamic_cast<type_data*>(input_type.get())) {
        std::cout << dynamic_cast<type_data*>(input_type.get()) << std::endl;
        std::cout << dynamic_cast<type_base*>(input_type.get()) << std::endl;
        std::cout << var << std::endl;
        input_type->print(mgr, std::cout); std::cout << std::endl;
        throw type_error("attempting case analysis of non-data type");
    }
--- a/code/compiler/10/definition.cpp
+++ b/code/compiler/10/definition.cpp
@@ -3,6 +3,7 @@
 #include "ast.hpp"
 #include "instruction.hpp"
 #include "llvm_context.hpp"
 #include "type.hpp"
 #include "type_env.hpp"
 #include <llvm/IR/DerivedTypes.h>
 #include <llvm/IR/Function.h>
@@ -57,11 +58,12 @@ void definition_defn::generate_llvm(llvm_context& ctx) {
 void definition_data::insert_types(type_mgr& mgr, type_env_ptr& env) {
    this->env = env;
    env->bind_type(name, type_ptr(new type_data(name)));
 }
 void definition_data::insert_constructors() const {
-    type_data* this_type = new type_data(name);
+    type_ptr return_type = env->lookup_type(name);
-    type_ptr return_type = type_ptr(this_type);
+    type_data* this_type = static_cast<type_data*>(return_type.get());
    int next_tag = 0;
    for(auto& constructor : constructors) {
@@ -70,7 +72,8 @@ void definition_data::insert_constructors() const {
        type_ptr full_type = return_type;
        for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
-            type_ptr type = type_ptr(new type_base(*it));
+            type_ptr type = env->lookup_type(*it);
            if(!type) throw 0;
            full_type = type_ptr(new type_arr(type, full_type));
        }
--- a/code/compiler/10/examples/if.txt
+++ b/code/compiler/10/examples/if.txt
@@ -0,0 +1,8 @@
 data Bool = { True, False }
 defn if c t e = {
    case c of {
        True -> { t }
        False -> { e }
    }
 }
 defn main = { if (if True False True) 11 3 }
--- a/code/compiler/10/graph.hpp
+++ b/code/compiler/10/graph.hpp
@@ -50,116 +50,3 @@ class function_graph {
    void add_edge(const function& from, const function& to);
    std::vector<group_ptr> compute_order();
 };
 std::set<function_graph::edge> function_graph::compute_transitive_edges() {
    std::set<edge> transitive_edges;
    transitive_edges.insert(edges.begin(), edges.end());
    for(auto& connector : adjacency_lists) {
        for(auto& from : adjacency_lists) {
            edge to_connector { from.first, connector.first };
            for(auto& to : adjacency_lists) {
                edge full_jump { from.first, to.first };
                if(transitive_edges.find(full_jump) != transitive_edges.end()) continue;
                edge from_connector { connector.first, to.first };
                if(transitive_edges.find(to_connector) != transitive_edges.end() &&
                        transitive_edges.find(from_connector) != transitive_edges.end())
                    transitive_edges.insert(std::move(full_jump));
            }
        }
    }
    return transitive_edges;
 }
 void function_graph::create_groups(
        const std::set<edge>& transitive_edges,
        std::map<function, group_id>& group_ids,
        std::map<group_id, data_ptr>& group_data_map) {
    group_id id_counter = 0;
    for(auto& vertex : adjacency_lists) {
        if(group_ids.find(vertex.first) != group_ids.end())
            continue;
        data_ptr new_group(new group_data);
        new_group->functions.insert(vertex.first);
        group_data_map[id_counter] = new_group;
        group_ids[vertex.first] = id_counter;
        for(auto& other_vertex : adjacency_lists) {
            if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() &&
                    transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) {
                group_ids[other_vertex.first] = id_counter;
                new_group->functions.insert(other_vertex.first);
            }
        }
        id_counter++;
    }
 }
 void function_graph::create_edges(
        std::map<function, group_id>& group_ids,
        std::map<group_id, data_ptr>& group_data_map) {
    std::set<std::pair<group_id, group_id>> group_edges;
    for(auto& vertex : adjacency_lists) {
        auto vertex_id = group_ids[vertex.first];
        auto& vertex_data = group_data_map[vertex_id];
        for(auto& other_vertex : vertex.second) {
            auto other_id = group_ids[other_vertex];
            if(vertex_id == other_id) continue;
            if(group_edges.find({vertex_id, other_id}) != group_edges.end())
                continue;
            group_edges.insert({vertex_id, other_id});
            vertex_data->adjacency_list.insert(other_id);
            group_data_map[other_id]->indegree++;
        }
    }
 }
 std::vector<group_ptr> function_graph::generate_order(
        std::map<function, group_id>& group_ids,
        std::map<group_id, data_ptr>& group_data_map) {
    std::queue<group_id> id_queue;
    std::vector<group_ptr> output;
    for(auto& group : group_data_map) {
        if(group.second->indegree == 0) id_queue.push(group.first);
    }
    while(!id_queue.empty()) {
        auto new_id = id_queue.front();
        auto& group_data = group_data_map[new_id];
        group_ptr output_group(new group);
        output_group->members = std::move(group_data->functions);
        id_queue.pop();
        for(auto& adjacent_group : group_data->adjacency_list) {
            if(--group_data_map[adjacent_group]->indegree == 0)
                id_queue.push(adjacent_group);
        }
        output.push_back(std::move(output_group));
    }
    return output;
 }
 std::set<function>& function_graph::add_function(const function& f) {
    auto adjacency_list_it = adjacency_lists.find(f);
    if(adjacency_list_it != adjacency_lists.end()) {
        return adjacency_list_it->second;
    } else {
        return adjacency_lists[f] = { };
    }
 }
 void function_graph::add_edge(const function& from, const function& to) {
    add_function(from).insert(to);
    edges.insert({ from, to });
 }
 std::vector<group_ptr> function_graph::compute_order() {
    std::set<edge> transitive_edges = compute_transitive_edges();
    std::map<function, group_id> group_ids;
    std::map<group_id, data_ptr> group_data_map;
    create_groups(transitive_edges, group_ids, group_data_map);
    create_edges(group_ids, group_data_map);
    return generate_order(group_ids, group_data_map);
 }
--- a/code/compiler/10/main.cpp
+++ b/code/compiler/10/main.cpp
@@ -29,10 +29,11 @@ void typecheck_program(
        const std::map<std::string, definition_defn_ptr>& defs_defn,
        type_mgr& mgr, type_env_ptr& env) {
    type_ptr int_type = type_ptr(new type_base("Int")); 
    env->bind_type("Int", int_type);
    type_ptr binop_type = type_ptr(new type_arr(
                int_type,
                type_ptr(new type_arr(int_type, int_type))));
    env->bind("+", binop_type);
    env->bind("-", binop_type);
    env->bind("*", binop_type);
@@ -68,6 +69,8 @@ void typecheck_program(
        for(auto& def_defnn_name : group->members) {
            auto& def_defn = defs_defn.find(def_defnn_name)->second;
            def_defn->typecheck(mgr);
        }
        for(auto& def_defnn_name : group->members) {
            env->generalize(def_defnn_name, mgr);
        }
    }
--- a/code/compiler/10/type_env.cpp
+++ b/code/compiler/10/type_env.cpp
@@ -8,6 +8,13 @@ type_scheme_ptr type_env::lookup(const std::string& name) const {
    return nullptr;
 }
 type_ptr type_env::lookup_type(const std::string& name) const {
    auto it = type_names.find(name);
    if(it != type_names.end()) return it->second;
    if(parent) return parent->lookup_type(name);
    return nullptr;
 }
 void type_env::bind(const std::string& name, type_ptr t) {
    names[name] = type_scheme_ptr(new type_scheme(t));
 }
@@ -16,6 +23,11 @@ void type_env::bind(const std::string& name, type_scheme_ptr t) {
    names[name] = t;
 }
 void type_env::bind_type(const std::string& type_name, type_ptr t) {
    if(lookup_type(type_name) != nullptr) throw 0;
    type_names[type_name] = t;
 }
 void type_env::generalize(const std::string& name, type_mgr& mgr) {
    auto names_it = names.find(name);
    if(names_it == names.end()) throw 0;
--- a/code/compiler/10/type_env.hpp
+++ b/code/compiler/10/type_env.hpp
@@ -8,13 +8,16 @@ using type_env_ptr = std::shared_ptr<type_env>;
 struct type_env {
    type_env_ptr parent;
    std::map<std::string, type_scheme_ptr> names;
    std::map<std::string, type_ptr> type_names;
    type_env(type_env_ptr p) : parent(std::move(p)) {}
    type_env() : type_env(nullptr) {}
    type_scheme_ptr lookup(const std::string& name) const;
    type_ptr lookup_type(const std::string& name) const;
    void bind(const std::string& name, type_ptr t);
    void bind(const std::string& name, type_scheme_ptr t);
    void bind_type(const std::string& type_name, type_ptr t);
    void generalize(const std::string& name, type_mgr& mgr);
 };
--- a/content/blog/10_compiler_polymorphism.md
+++ b/content/blog/10_compiler_polymorphism.md
@@ -257,7 +257,8 @@ they are placed in one group. We then construct a dependency graph __of these gr
 3. We compute a topological order of the group graph. This helps us typecheck the dependencies
 of functions before checking the functions themselves. In our specific case, this would ensure
 we check `if` first, and only then move on to `testOne` and `testTwo`. The order of typechecking
-within a group does not matter.
+within a group does not matter, as long as we generalize only after typechecking all functions
 in a group.
 4. We typecheck the function groups, and functions within them, following the above topological order.
 To find the transitive closure of a graph, we can use [Warshall's Algorithm](https://cs.winona.edu/lin/cs440/ch08-2.pdf).
@@ -326,7 +327,7 @@ I think that we should create a C++ class that will represent our function
 dependency graph. Let's call it `function_graph`. I propose the following
 definition:
-{{< codelines "C++" "compiler/10/graph.hpp" 12 51 >}}
+{{< codelines "C++" "compiler/10/graph.hpp" 12 52 >}}
 There's a lot to unpack here. First of all, we create a type alias `function` that
 represents the label of a function in our graph. It is probably most convenient
@@ -363,16 +364,22 @@ of each group are computed, as well as their adjacency lists.
 * `generate_order` uses the indegrees and adjacency lists produced in the prior step
 to establish a topological order.
-Finally, the `add_edge` method is used to add a new dependency between two functions,
+Following these, we have three public function definitions:
-while the `compute_order` method uses the internal methods described above to convert
+* `add_function` adds a vertex to the graph. Sometimes, a function does not
 reference any other functions, and would not appear in the list of edges.
 We will call this function to make sure that the function graph is aware
 of such functions. For convenience, this function returns the adjacency list
 of the added function.
 * `add_edge` adds a new dependency between two functions.
 * `compute_order` method uses the internal methods described above to convert
 the function dependency graph into a properly ordered list of groups.
 Let's start by looking at how to implement the internal methods. `compute_transitive_edges`
 is a very straightforward implementation of Warshall's:
-{{< codelines "C++" "compiler/10/graph.hpp" 53 71 >}}
+{{< codelines "C++" "compiler/10/graph.cpp" 3 21 >}}
-Next is `create_groups`, for each function, we iterate over all other functions.
+Next is `create_groups`. For each function, we iterate over all other functions.
 If the other function is mutually dependent with the first function, we add
 it to the same group. In the outer loop, we skip over functions that have
 already been added to the group. This is because 
@@ -392,7 +399,7 @@ is an [equivalence relation](https://en.wikipedia.org/wiki/Equivalence_relation)
 which means that if we already added a function to a group, all its
 group members were also already visited and added.
-{{< codelines "C++" "compiler/10/graph.hpp" 73 94 >}}
+{{< codelines "C++" "compiler/10/graph.cpp" 23 44 >}}
 Once groups have been created, we use their functions' edges
 to create edges for the groups themselves, using `create_edges`.
@@ -400,26 +407,362 @@ We avoid creating edges from a group to itself, to avoid
 unnecessary cycles. While constructing the edges, we also
 increment the relevant indegree counter.
-{{< codelines "C++" "compiler/10/graph.hpp" 96 113 >}}
+{{< codelines "C++" "compiler/10/graph.cpp" 46 63 >}}
 Finally, we apply Kahn's algorithm to create a topological order
 in `generate_order`:
-{{< codelines "C++" "compiler/10/graph.hpp" 115 140 >}}
+{{< codelines "C++" "compiler/10/graph.cpp" 65 90 >}}
 These four steps are used in `compute_order`:
-{{< codelines "C++" "compiler/10/graph.hpp" 152 160 >}}
+{{< codelines "C++" "compiler/10/graph.cpp" 106 114 >}}
-Finally, `add_edge` straightforwardly adds an edge
+Let's now look at the remaining two public definitions.
-to the graph:
+First comes `add_function`, which creates an adjacency list for the
 function to be inserted (if one does not already exist),
 and returns a reference to the resulting list:
-{{< codelines "C++" "compiler/10/graph.hpp" 142 150 >}}
+{{< codelines "C++" "compiler/10/graph.cpp" 92 99 >}}
 We use this in `add_edge`, which straightforwardly creates an edge
 between two functions:
 {{< codelines "C++" "compiler/10/graph.cpp" 101 104 >}}
 With this, we can now properly order our typechecking.
-However, there are a few pieces of the puzzle missing.
+However, we are just getting started: there are still
-First of all, we need to actually insert function
+numerous changes we need to make to get our compiler
-dependencies into the graph. Second, we need to think
+to behave as we desire.
-about how our existing language features and implementation
+
-will interact with polymorphism. Third, we have to come up
+The first change is the least relevant, but will help clean
-with an implementation of polymorphic data types.
+up our code base in the presence of polymorphism: we will
 get rid of `resolve`, in both definitions and AST nodes.
 The reasons for this are twofold. First,
 {{< sidenote "right" "case-type-note" "only the case expression node actually uses the type it stores." >}}
 Recall that <code>ast_case</code> needs this information to properly
 account for the changes to the stack from when data is unpacked.
 {{< /sidenote >}} This means that
 all the rest of the infrastructure we've written around
 preserving types is somewhat pointless. Second, when
 we call `resolve`, we'd now have to distinguish
 between type variables captured by "forall" and actual,
 undefined variables. That's a lot of wasted work!
 To replace the now-removed `type` field,
 we make `ast_case` include a new member, `input_type`,
 which stores the type of the thing between `case` and `of`.
 Since `ast_case` requires its type to be a data type
 at the time of typechecking, we no longer need to resolve anything.
 Next, we need to work in a step geared towards finding function calls
 (to determine dependencies). As we have noted in [part 6]({{< relref "06_compiler_compilation.md" >}}),
 it's pretty easy to tell apart calls to global functions from "local" ones. If
 we see that a variable was previously bound (perhaps as a function argument,
 or by a pattern in a case expression), we know for sure that it is not a global
 function call. Otherwise, if the variable isn't bound anywhere in the function
 definition (it's a __free variable__), it must refer to a global function. Then,
 we can traverse the function body, storing variables that are bound (but only within
 their scope), and noting references to variables we haven't yet seen. To
 implement this, we can use a linked list, where each node refers to a particular
 scope, points to the scope enclosing it, and contains a list of variables...
 Wait a minute, this is identical to `type_env`! There's no reason to reimplement all
 this. But then, another question arises: do we throw away the `type_env` generated
 by the dependency-searching step? It seems wasteful, since we will eventually
 repeat this same work. Rather, we'll re-use the same `type_env` instances
 in both this new step and `typecheck`. To do this, we will now store a pointer
 to a `type_env` in every AST node, and set this pointer during our first traversal
 of the tree. Indeed, this makes our `type_env` more like a
 [symbol table](https://en.wikipedia.org/wiki/Symbol_table). With this change,
 our new dependency-finding step will be implemented by the `find_free` function
 with the following signature:
 ```C++
 void ast::find_free(type_mgr& mgr, type_env_ptr& env, std::set<std::string>& into);
 ```
 Let's take a look at how this will be implemented. The simplest case (as usual)
 is `ast_int`:
 {{< codelines "C++" "compiler/10/ast.cpp" 16 18 >}}
 In this case, we associate the `type_env` with the node, but don't do anything
 else: a number is not a variable. A more interesting case is `ast_lid`:
 {{< codelines "C++" "compiler/10/ast.cpp" 33 36 >}}
 If a lowercase variable has not yet been bound to something, it's free,
 and we store it. Somewhat counterintuitively, `ast_uid` behaves
 differently:
 {{< codelines "C++" "compiler/10/ast.cpp" 54 56 >}}
 We don't allow uppercase variables to be bound to anything outside of data type
 declarations, so we don't care about uppercase free variables. Next up is
 `ast_binop`:
 {{< codelines "C++" "compiler/10/ast.cpp" 73 77 >}}
 A binary operator can have free variables in the subexpressions on the left and on the right, and
 the above implementation reflects that. This is identical to the implementation of
 `ast_app`:
 {{< codelines "C++" "compiler/10/ast.cpp" 109 113 >}}
 Finally, `ast_case` requires the most complicated function (as usual):
 {{< codelines "C++" "compiler/10/ast.cpp" 142 150 >}}
 The `type_scope` function replaces the `type_env::scope` method,
 which cannot (without significant effort) operate on smart pointers.
 Importantly, we are using a new `pattern` method here, `insert_bindings`. This
 is because we split "introducing variables" and "typechecking variables"
 into two steps for patterns, as well. The implementation of `insert_bindings`
 for `pattern_var` is as follows:
 {{< codelines "C++" "compiler/10/ast.cpp" 230 232 >}}
 A variable pattern always introduces the variable it is made up of.
 On the other hand, the implementation for `pattern_constr` is as follows:
 {{< codelines "C++" "compiler/10/ast.cpp" 245 249 >}}
 All the variables of the pattern are placed into the environment. For now, we don't worry
 about arity; this is the job of typechecking.
 These changes are reflected in all instances of our `typecheck` function. First of
 all, `typecheck` no longer needs to receive a `type_env` parameter, since each
 tree node has a `type_env_ptr`. Furthermore, `typecheck` should no longer call
 `bind`, since this was already done by `find_free`. For example,
 `ast_lid::typecheck` will now use `env::lookup`:
 {{< codelines "C++" "compiler/10/ast.cpp" 38 40 >}}
 Don't worry about `instantiate` for now; that's coming up. Similarly to
 `ast_lid`, `ast_case::typecheck` will no longer introduce new bindings,
 and unify instead:
 {{< codelines "C++" "compiler/10/ast.cpp" 152 169 >}}
 The above implementation uses another new `pattern` method, `typecheck`.
 This method inherits the type checking functionality previously
 contained in `pattern::match`. Here's the implementation for `pattern_var`:
 {{< codelines "C++" "compiler/10/ast.cpp" 234 236 >}}
 And here's the implementation for `pattern_constr`:
 {{< codelines "C++" "compiler/10/ast.cpp" 251 266 >}}
 So far, so good. However, for all of this to reach the main typechecking
 code, not only `ast` subclasses need to be updated, but also
 the `definition`s. Here things get more complicated, because
 `definition_data` and `definition_defn` are growing more and more apart.
 Previously, we had two typechecking steps: `typecheck_first` (which registered
 function names into the environment) and `typecheck_second` (which performed
 the actual typechecking). However, not only are these names not informative,
 but the algorithms for typechecking the two types of definition will soon
 have different numbers of "major" steps.
 Let's take a look at how we would typecheck data types. I propose the following
 steps:
 1. Iterate all declared data types, storing them into some kind of "known" list.
 2. Iterate again, and for each constructor of a type, verify that
 it refers to "known" types. Add valid constructors to the global environment as functions.
 We don't currently verify that types are "known"; A user could declare a list of `Floobs`,
 and never say what a `Floob` is. This isn't too big of an issue (good luck constructing
 a value of a non-existent type), but a mature compiler should prevent this from happening.
 On the other hand, here are the steps for function definitions:
 1. Find the free variables of each function to create the ordered list of groups as described above.
 2. Within each group, insert a general function type (like \\(a \\rightarrow b \\rightarrow c\\))
 into the environment for each function.
 3. Within each group (in the same pass) run typechecking
 (including polymorphism, using the rules as described above).
 The two types of definitions further diverge when generating LLVM and compiling to G-machine instructions:
 data types immediately construct and insert their functions, and do not emit G-machine instructions,
 while functions generate G-machine instructions, declare prototypes, and emit LLVM in three distinct phases.
 Overall, there are virtually no similarities between the two data type declarations, and any inheritance
 of common functions starts to appear somewhat forced. To address this, we remove the `definition` class
 altogether, and sever the relationship between `definition_data` and `definition_defn`. The
 two now look as follows:
 {{< codelines "C++" "compiler/10/definition.hpp" 23 67 >}}
 In `definition_defn`, the functions are arranged as follows:
 * `find_free` locates the free variables in the definition, populating
 the `free_variables` field and thereby finding edges for the function graph.
 * `insert_types` stores the type of the function into the global environment
 (a pointer to which is now stored as a field).
 * `typecheck` runs the standard typechecking steps.
 * `compile` generates G-machine instructions.
 * `declare_llvm` inserts LLVM function prototypes into the `llvm_context`.
 * `generate_llvm` converts G-machine instructions into LLVM IR.
 In `definition_data`, the steps are significantly simpler:
 * `insert_types` registers the type being declared as a "known" type.
 * `insert_constructors` inserts constructors (which are verified to
 refer to "known" types) into the global environment.
 * `generate_llvm` creates the LLVM functions (and their IR).
 While the last three methods of `definition_defn` remain unchanged save
 for the name, the implementations of the first three see some updates.
 First is `find_free`:
 {{< codelines "C++" "compiler/10/definition.cpp" 12 26 >}}
 First, to make sure we don't pollute the global scope
 with function parameters, `find_free` creates a new environment
 `var_env`. Then, it stores into this new environment the function parameters,
 ensuring that the parameters of a function aren't marked "free".
 Concurrently, `find_free` constructs the "general" function
 type (used by `insert_types`). Once all the arguments have been bound, `definition_defn::find_free`
 makes a call to `ast::find_free`, which does the work of actually
 finding free variables.
 Since the function type is created by `find_free`, `insert_types` has very little to do:
 {{< codelines "C++" "compiler/10/definition.cpp" 28 30 >}}
 Finally, `typecheck`, which no longer has to bind the function
 arguments to new types, is also fairly simple:
 {{< codelines "C++" "compiler/10/definition.cpp" 32 35 >}}
 Let's move on to data types. In order to implement `definition_data::insert_types`,
 we need to store somewhere a list of all the valid type names. We do this
 by adding a new `type_names` field to `type_env`, and implementing the
 corresponding methods `lookup_type`:
 {{< codelines "C++" "compiler/10/type_env.cpp" 11 16 >}}
 And `bind_type`:
 {{< codelines "C++" "compiler/10/type_env.cpp" 26 29 >}}
 Note in the above snippets that we disallow redeclaring type names;
 declaring two data types (or other types) with the same name in
 our language will not be valid. In `insert_types`, we create a new
 data type and store it in the environment:
 {{< codelines "C++" "compiler/10/definition.cpp" 59 62 >}}
 We then update `insert_constructors` to query the environment
 when creating constructor types, rather than blindly using `new type_base(...)`
 like before:
 {{< codelines "C++" "compiler/10/definition.cpp" 64 82 >}}
 The separation of data and function definitions must be reconciled with code
 going back as far as the parser. While previously, we populated a single, global
 vector of definitions called `program`, we can no longer do that. Instead, we'll
 split our program into two maps, one for data types and one for functions. We
 use maps for convenience: since the groups generated by our function graph refer
 to functions by name, and it would be nice to quickly look up the data
 the names refer to. Rather than returning such maps, we change our semantic
 actions to simply insert new data into one of two global maps. Below
 is a snippet that includes all the changes:
 {{< codelines "plaintext" "compiler/10/parser.y" 39 65 >}}
 Note that `program` and `definitions` no longer have a type, and that `data` and `defn`
 have been changed to return `definition_data_ptr` and `definition_defn_ptr`, respectively.
 This necessitates changes to our main file. First of all, we declare the two new maps
 we hope to receive from Bison:
 {{< codelines "C++" "compiler/10/main.cpp" 24 25 >}}
 We then change all affected functions, which in many cases amounts to splitting the `program` parameter
 into `defs_data` and `defs_defn` parameters. We also make other, largely mechanical changes: code iterating
 over definitions now requires the use of `second` to refer to the value stored in the map, and LLVM
 generation now needs to separately process the two different types of definitions. The biggest change
 occurs in `typecheck_program`, which not only undergoes all the aforementioned modifications, but 
 is also updated to use topological ordering:
 {{< codelines "C++" "compiler/10/main.cpp" 27 84 >}}
 The above code uses the yet-unexplained `generalize` method. What's going on?
 Observe that the __Var__ rule of the Hindley-Milner type system says that a variable \\(x\\)
 can have a __polytype__ in the environment \\(\\Gamma\\). Our `type_ptr` can only represent monotypes,
 so we must change what `type_env` associates with names to a new struct for representing polytypes,
 which we will call `type_scheme`. The `type_scheme` struct, just like the formal definition of
 a polytype, contains zero or more "forall"-quantified type variables, followed by a monotype which
 may use these variables:
 {{< codelines "C++" "compiler/10/type.hpp" 17 27 >}}
 The `type_scheme::instantiate` method is effectively an implementation of the special
 case of the __Inst__ rule, in which a polytype is specialized to a monotype. Since
 the __App__ and __Case__ rules only use monotypes, we'll be using this special case a lot.
 We implement this method as follows:
 {{< codelines "C++" "compiler/10/type.cpp" 34 41 >}}
 In the above code, if the type scheme represents a monotype (i.e., it has no quantified variables),
 we simply return that monotype. Otherwise, we must perform a substitution, replacing "forall"-quantified
 variables with fresh type parameters to be determined (we will never determine a single type for any of
 the quantified variables, since they are specifically meant to represent any type).
 We build a substitution map, which assigns to each quantified type variable a corresponding
 "fresh" type, and then create a new type with with the substitution applied using `substitute`,
 which is implemented as follows:
 {{< codelines "C++" "compiler/10/type.cpp" 18 32 >}}
 In principle, the function is fairly simple: if the current type is equivalent to a
 quantified type, we return the corresponding "fresh" type. If, on the other hand,
 the type represents a function, we perform a substitution in the function's input
 and output types. This method avoids creating new types where possible; a new type
 is only created if a function's input or output type is changed by a substitution
 (in which case, the function itself is changed by the substitution). In all
 other cases, substitution won't do anything, so we just return the original type.
 Now it is a bit more clear why we saw `instantiate` in a code snippet some time ago;
 to compute a monotype for a variable reference, we must take into account the
 possibility that the variable has a polymorphic type, which needs to be specialized
 (potentially differently in every occurrence of the variable).
 When talking about our new typechecking algorithm, we mentioned using __Gen__ to sprinkle
 polymorphism wherever possible. Whenever possible, __Gen__ will add free variables
 in a type to the "forall" quantifier at the front, making that type polymorphic. 
 We implement this using a new `generalize` added to the `type_env`, which (as per
 convention) generalizes the type of a given variable as much as possible:
 {{< codelines "C++" "compiler/10/type_env.cpp" 31 41 >}}
 For now, we disallow types to be generalized twice, and we naturally disallow generalizing
 types of nonexistent variables. If neither of those things occurs, we find all the free
 variables in the variable's current type using a new method called `type_mgr::find_free`,
 and put them into the "forall" quantifier. `type_mgr::find_free` is implemented as follows:
 {{< codelines "C++" "compiler/10/type.cpp" 138 148 >}}
 The above code is fairly straightforward; if a type is a variable that is not yet bound to anything,
 it is free; if the type is a function, we search for free variables in its input and output types;
 otherwise, the type has no free variables.
 Finally, we have made the necessary changes. Let's test it out with the example from the beginning:
 {{< rawblock "compiler/10/examples/if.txt" >}}
 Running it, we get the output:
 ```
 3
 ```
 Hooray!
 While this is a major success, we are not yet done. Although our functions can now 
 have polymorphic types, the same cannot be said for our data types! We want to
 have lists of integers __and__ lists of booleans, without having to duplicate any code!
 While this also falls into the category of polymorphism, this post has already gotten very long,
 and we will return to it in the near future. Once we're done with that, I still intend
 to go over `let/in` expressions, __lambda functions__, and __Input/Output__ together with
 __strings__. See you in these future posts!