16 Commits

21 changed files with 694 additions and 138 deletions

View File

@@ -302,7 +302,7 @@ void ast_let::translate(global_scope& scope) {
mangled_env->bind(def.first, env->lookup(def.first), visibility::global); mangled_env->bind(def.first, env->lookup(def.first), visibility::global);
mangled_env->set_mangled_name(def.first, global_definition.name); mangled_env->set_mangled_name(def.first, global_definition.name);
ast_ptr global_app(new ast_lid(global_definition.name)); ast_ptr global_app(new ast_lid(original_name));
global_app->env = mangled_env; global_app->env = mangled_env;
for(auto& param : global_definition.params) { for(auto& param : global_definition.params) {
if(!(captured--)) break; if(!(captured--)) break;

View File

@@ -139,7 +139,7 @@ void definition_group::typecheck(type_mgr& mgr, type_env_ptr& env) {
def_defn->typecheck(mgr); def_defn->typecheck(mgr);
} }
for(auto& def_defnn_name : group->members) { for(auto& def_defnn_name : group->members) {
this->env->generalize(def_defnn_name, mgr); this->env->generalize(def_defnn_name, *group, mgr);
} }
} }
} }

View File

@@ -0,0 +1,17 @@
data List a = { Nil, Cons a (List a) }
defn fix f = { let { defn x = { f x } } in { x } }
defn fixpointOnes fo = { Cons 1 fo }
defn sumTwo l = {
case l of {
Nil -> { 0 }
Cons x xs -> {
x + case xs of {
Nil -> { 0 }
Cons y ys -> { y }
}
}
}
}
defn main = { sumTwo (fix fixpointOnes) }

View File

@@ -0,0 +1,19 @@
data List a = { Nil, Cons a (List a) }
defn sum l = {
case l of {
Nil -> { 0 }
Cons x xs -> { x + sum xs}
}
}
defn map f l = {
case l of {
Nil -> { Nil }
Cons x xs -> { Cons (f x) (map f xs) }
}
}
defn main = {
sum (map \x -> { x * x } (map (\x -> { x + x }) (Cons 1 (Cons 2 (Cons 3 Nil)))))
}

View File

@@ -0,0 +1,47 @@
data Bool = { True, False }
data List a = { Nil, Cons a (List a) }
defn if c t e = {
case c of {
True -> { t }
False -> { e }
}
}
defn mergeUntil l r p = {
let {
defn mergeLeft nl nr = {
case nl of {
Nil -> { Nil }
Cons x xs -> { if (p x) (Cons x (mergeRight xs nr)) Nil }
}
}
defn mergeRight nl nr = {
case nr of {
Nil -> { Nil }
Cons x xs -> { if (p x) (Cons x (mergeLeft nl xs)) Nil }
}
}
} in {
mergeLeft l r
}
}
defn const x y = { x }
defn sum l = {
case l of {
Nil -> { 0 }
Cons x xs -> { x + sum xs }
}
}
defn main = {
let {
defn firstList = { Cons 1 (Cons 3 (Cons 5 Nil)) }
defn secondList = { Cons 2 (Cons 4 (Cons 6 Nil)) }
} in {
sum (mergeUntil firstList secondList (const True))
}
}

View File

@@ -0,0 +1,23 @@
data Pair a b = { Pair a b }
defn packer = {
let {
data Packed a = { Packed a }
defn pack a = { Packed a }
defn unpack p = {
case p of {
Packed a -> { a }
}
}
} in {
Pair pack unpack
}
}
defn main = {
case packer of {
Pair pack unpack -> {
unpack (pack 3)
}
}
}

View File

@@ -205,7 +205,13 @@ void type_mgr::find_free(const type_ptr& t, std::set<std::string>& into) const {
void type_mgr::find_free(const type_scheme_ptr& t, std::set<std::string>& into) const { void type_mgr::find_free(const type_scheme_ptr& t, std::set<std::string>& into) const {
std::set<std::string> monotype_free; std::set<std::string> monotype_free;
find_free(t->monotype, monotype_free); type_mgr limited_mgr;
for(auto& binding : types) {
auto existing_position = std::find(t->forall.begin(), t->forall.end(), binding.first);
if(existing_position != t->forall.end()) continue;
limited_mgr.types[binding.first] = binding.second;
}
limited_mgr.find_free(t->monotype, monotype_free);
for(auto& not_free : t->forall) { for(auto& not_free : t->forall) {
monotype_free.erase(not_free); monotype_free.erase(not_free);
} }

View File

@@ -8,11 +8,11 @@ void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const
} }
} }
void type_env::find_free_except(const type_mgr& mgr, const std::string& avoid, void type_env::find_free_except(const type_mgr& mgr, const group& avoid,
std::set<std::string>& into) const { std::set<std::string>& into) const {
if(parent != nullptr) parent->find_free(mgr, into); if(parent != nullptr) parent->find_free(mgr, into);
for(auto& binding : names) { for(auto& binding : names) {
if(binding.first == avoid) continue; if(avoid.members.find(binding.first) != avoid.members.end()) continue;
mgr.find_free(binding.second.type, into); mgr.find_free(binding.second.type, into);
} }
} }
@@ -65,7 +65,7 @@ void type_env::bind_type(const std::string& type_name, type_ptr t) {
type_names[type_name] = t; type_names[type_name] = t;
} }
void type_env::generalize(const std::string& name, type_mgr& mgr) { void type_env::generalize(const std::string& name, const group& grp, type_mgr& mgr) {
auto names_it = names.find(name); auto names_it = names.find(name);
if(names_it == names.end()) throw 0; if(names_it == names.end()) throw 0;
if(names_it->second.type->forall.size() > 0) throw 0; if(names_it->second.type->forall.size() > 0) throw 0;
@@ -73,7 +73,7 @@ void type_env::generalize(const std::string& name, type_mgr& mgr) {
std::set<std::string> free_in_type; std::set<std::string> free_in_type;
std::set<std::string> free_in_env; std::set<std::string> free_in_env;
mgr.find_free(names_it->second.type->monotype, free_in_type); mgr.find_free(names_it->second.type->monotype, free_in_type);
find_free_except(mgr, name, free_in_env); find_free_except(mgr, grp, free_in_env);
for(auto& free : free_in_type) { for(auto& free : free_in_type) {
if(free_in_env.find(free) != free_in_env.end()) continue; if(free_in_env.find(free) != free_in_env.end()) continue;
names_it->second.type->forall.push_back(free); names_it->second.type->forall.push_back(free);

View File

@@ -2,6 +2,7 @@
#include <map> #include <map>
#include <string> #include <string>
#include <set> #include <set>
#include "graph.hpp"
#include "type.hpp" #include "type.hpp"
struct type_env; struct type_env;
@@ -29,7 +30,7 @@ struct type_env {
type_env() : type_env(nullptr) {} type_env() : type_env(nullptr) {}
void find_free(const type_mgr& mgr, std::set<std::string>& into) const; void find_free(const type_mgr& mgr, std::set<std::string>& into) const;
void find_free_except(const type_mgr& mgr, const std::string& avoid, void find_free_except(const type_mgr& mgr, const group& avoid,
std::set<std::string>& into) const; std::set<std::string>& into) const;
type_scheme_ptr lookup(const std::string& name) const; type_scheme_ptr lookup(const std::string& name) const;
bool is_global(const std::string& name) const; bool is_global(const std::string& name) const;
@@ -41,7 +42,7 @@ struct type_env {
void bind(const std::string& name, type_scheme_ptr t, void bind(const std::string& name, type_scheme_ptr t,
visibility v = visibility::local); visibility v = visibility::local);
void bind_type(const std::string& type_name, type_ptr t); void bind_type(const std::string& type_name, type_ptr t);
void generalize(const std::string& name, type_mgr& mgr); void generalize(const std::string& name, const group& grp, type_mgr& mgr);
}; };

View File

@@ -5,3 +5,9 @@ theme = "vanilla"
pygmentsCodeFences = true pygmentsCodeFences = true
pygmentsStyle = "github" pygmentsStyle = "github"
summaryLength = 20 summaryLength = 20
[markup]
[markup.tableOfContents]
endLevel = 4
ordered = false
startLevel = 3

View File

@@ -144,3 +144,5 @@ Here are the posts that I've written so far for this series:
* [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}}) * [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})
* [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}}) * [Polymorphism]({{< relref "10_compiler_polymorphism.md" >}})
* [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}}) * [Polymorphic Data Types]({{< relref "11_compiler_polymorphic_data_types.md" >}})
* [Let/In and Lambdas]({{< relref "12_compiler_let_in_lambda/index.md" >}})

View File

@@ -396,4 +396,5 @@ Result: 4
This looks good! We have added support for polymorphic data types to our compiler. This looks good! We have added support for polymorphic data types to our compiler.
We are now free to move on to `let/in` expressions, __lambda functions__, and __Input/Output__, We are now free to move on to `let/in` expressions, __lambda functions__, and __Input/Output__,
as promised! I'll see you then! as promised, starting with [part 12]({{< relref "12_compiler_let_in_lambda/index.md" >}}) - `let/in`
and lambdas!

View File

@@ -1,14 +1,13 @@
--- ---
title: Compiling a Functional Language Using C++, Part 12 - Let/In and Lambdas title: Compiling a Functional Language Using C++, Part 12 - Let/In and Lambdas
date: 2020-04-20T20:15:16-07:00 date: 2020-06-21T00:50:07-07:00
tags: ["C and C++", "Functional Languages", "Compilers"] tags: ["C and C++", "Functional Languages", "Compilers"]
description: "In this post, we extend our language with let/in expressions and lambda functions." description: "In this post, we extend our language with let/in expressions and lambda functions."
draft: true
--- ---
Now that our language's type system is more fleshed out and pleasant to use, it's time to shift our focus to the ergonomics of the language itself. I've been mentioning `let/in` expressions and __lambda expressions__ for a while now. The former will let us create names for expressions that are limited to a certain scope (without having to create global variable bindings), while the latter will allow us to create functions without giving them any name at all. Now that our language's type system is more fleshed out and pleasant to use, it's time to shift our focus to the ergonomics of the language itself. I've been mentioning `let/in` and __lambda__ expressions for a while now. The former will let us create names for expressions that are limited to a certain scope (without having to create global variable bindings), while the latter will allow us to create functions without giving them any name at all.
Let's take a look at `let/in` expressions first, to make sure we're all on the same page about what it is we're trying to implement. Let's start with some rather basic examples, and then move on to more complex examples. The most basic use of a `let/in` expression is, in Haskell: Let's take a look at `let/in` expressions first, to make sure we're all on the same page about what it is we're trying to implement. Let's start with some rather basic examples, and then move on to more complex ones. A very basic use of a `let/in` expression is, in Haskell:
```Haskell ```Haskell
let x = 5 in x + x let x = 5 in x + x
@@ -93,7 +92,7 @@ addSingle6 x = 6 + x
-- ... and so on ... -- ... and so on ...
``` ```
But now, we end up creating several functions with almost identical bodies, with the exception of the free variables themselves. Wouldn't it be better to perform the well-known strategy of reducing code duplication by factoring out parameters, and leaving only instance of the repeated code? We would end up with: But now, we end up creating several functions with almost identical bodies, with the exception of the free variables themselves. Wouldn't it be better to perform the well-known strategy of reducing code duplication by factoring out parameters, and leaving only one instance of the repeated code? We would end up with:
```Haskell {linenos=table} ```Haskell {linenos=table}
addToAll n xs = map (addSingle n) xs addToAll n xs = map (addSingle n) xs
@@ -145,11 +144,48 @@ to `let/in`, and that's what we'll be using in our language.
This technique of replacing captured variables with arguments, and pulling closures into the global scope to aid compilation, is called [Lambda Lifting](https://en.wikipedia.org/wiki/Lambda_lifting). Its name is no coincidence - lambda functions need to undergo the same kind of transformation as our nested definitions (unlike nested definitions, though, lambda functions need to be named). This is why they are included in this post together with `let/in`! This technique of replacing captured variables with arguments, and pulling closures into the global scope to aid compilation, is called [Lambda Lifting](https://en.wikipedia.org/wiki/Lambda_lifting). Its name is no coincidence - lambda functions need to undergo the same kind of transformation as our nested definitions (unlike nested definitions, though, lambda functions need to be named). This is why they are included in this post together with `let/in`!
What are lambda functions, by the way? A lambda function is just a function
expression that doesn't have a name. For example, if we had Haskell code like
this:
```Haskell
double x = x + x
doubleList xs = map double xs
```
We could rewrite it using a lambda function as follows:
```Haskell
doubleList xs = map (\x -> x + x) xs
```
As you can see, a lambda is an expression in the form `\x -> y` where `x` can
be any variable and `y` can be any expression (including another lambda).
This represents a function that, when applied to a value `x`, will perform
the computation given by `y`. Lambdas are useful when creating single-use
functions that we don't want to make globally available.
Lifting lambda functions will effectively rewrite our program in the
opposite direction to the one shown, replacing the lambda with a reference
to a global declaration which will hold the function's body. Just like
with `let/in`, we will represent captured variables using arguments
and partial appliciation. For instance, when starting with:
```Haskell
addToAll n xs = map (\x -> n + x) xs
```
We would output the following:
```Haskell
addToAll n xs = map (lambda n) xs
lambda n x = n + x
```
### Implementation ### Implementation
Now that we understand what we have to do, it's time to jump straight into Now that we understand what we have to do, it's time to jump straight into
doing it. First, we need to refactor our current code so allow for the changes doing it. First, we need to refactor our current code to allow for the changes
we're going to make; then, we can implement `let/in` expressions; finally, we're going to make; then, we will use the new tools we defined to implement `let/in` expressions and lambda functions.
we'll tackle lambda functions.
#### Infrastructure Changes #### Infrastructure Changes
When finding captured variables, the notion of _free variables_ once again When finding captured variables, the notion of _free variables_ once again
@@ -168,8 +204,8 @@ since it's not defined locally.
The algorithm that we used for computing free variables was rather biased. The algorithm that we used for computing free variables was rather biased.
Previously, we only cared about the difference between a local variable Previously, we only cared about the difference between a local variable
(defined somewhere in a function's body, or referring to one of the function's (defined somewhere in a function's body, or referring to one of the function's
parameters) and a global variable (referring to a function name). This shows in parameters) and a global variable (referring to a global function).
our code for `find_free`. Consider, for example, this segment: This shows in our code for `find_free`. Consider, for example, this snippet:
{{< codelines "C++" "compiler/11/ast.cpp" 33 36 >}} {{< codelines "C++" "compiler/11/ast.cpp" 33 36 >}}
@@ -449,17 +485,17 @@ we're trying to operate on is global or not? I propose a flag in our
this, we update the implementation of `type_env` to map variables to this, we update the implementation of `type_env` to map variables to
values of a struct `variable_data`: values of a struct `variable_data`:
{{< codelines "C++" "compiler/12/type_env.hpp" 13 22 >}} {{< codelines "C++" "compiler/12/type_env.hpp" 14 23 >}}
The `visibility` enum is defined as follows: The `visibility` enum is defined as follows:
{{< codelines "C++" "compiler/12/type_env.hpp" 10 10 >}} {{< codelines "C++" "compiler/12/type_env.hpp" 11 11 >}}
As you can see from the above snippet, we also added a `mangled_name` field As you can see from the above snippet, we also added a `mangled_name` field
to the new `variable_data` struct. We will be using this field shortly. We to the new `variable_data` struct. We will be using this field shortly. We
also add a few methods to our `type_env`, and end up with the following: also add a few methods to our `type_env`, and end up with the following:
{{< codelines "C++" "compiler/12/type_env.hpp" 31 44 >}} {{< codelines "C++" "compiler/12/type_env.hpp" 32 45 >}}
We will come back to `find_free` and `find_free_except`, as well as We will come back to `find_free` and `find_free_except`, as well as
`set_mangled_name` and `get_mangled_name`. For now, we just adjust `bind` to `set_mangled_name` and `get_mangled_name`. For now, we just adjust `bind` to
@@ -536,7 +572,7 @@ And the latter:
{{< codelines "C++" "compiler/12/type_env.cpp" 39 45 >}} {{< codelines "C++" "compiler/12/type_env.cpp" 39 45 >}}
We don't allow the `set_mangled_name` to affect variables that are declared We don't allow `set_mangled_name` to affect variables that are declared
above the receiving `type_env`, and use the empty string as a 'none' value. above the receiving `type_env`, and use the empty string as a 'none' value.
Now, when lifting data type constructors, we'll be able to use Now, when lifting data type constructors, we'll be able to use
`set_mangled_name` to make sure constructor calls are made correctly. We `set_mangled_name` to make sure constructor calls are made correctly. We
@@ -574,69 +610,379 @@ The
observant reader will have noticed that we have a new method: `translate`. observant reader will have noticed that we have a new method: `translate`.
This is a new method for all `ast` descendants, and will implement the This is a new method for all `ast` descendants, and will implement the
steps of moving definitions to the global scope and transforming the steps of moving definitions to the global scope and transforming the
program. Before we get to it, though, let's quickly see the parsing program. Before we get to it, though, let's look at the other relevant
rules for `ast_let` and `ast_lambda`: pieces of code for `ast_let` and `ast_lambda`. First, their grammar
rules in `parser.y`:
{{< codelines "text" "compiler/12/parser.y" 107 115 >}} {{< codelines "text" "compiler/12/parser.y" 107 115 >}}
This is pretty similar to the rest of the grammar, so I will give this no This is pretty similar to the rest of the grammar, so I will give this no
further explanation. further explanation. Next, their `find_free` and `typecheck` code.
We can start with `ast_let`:
{{< todo >}} {{< codelines "C++" "compiler/12/ast.cpp" 275 289 >}}
Explain typechecking for lambda functions and let/in expressions.
{{< /todo >}}
{{< todo >}} As you can see, `ast_let::find_free` works in a similar manner to `ast_case::find_free`.
Explain free variable detection for lambda functions and let/in expressions. It finds the free variables in the `in` node as well as in each of the definitions
{{< /todo >}} (taking advantage of the fact that `definition_group::find_free` populates the
given set with "far away" free variables). It then filters out any variables bound in
the `let` from the set of free variables in `in`, and returns the result.
Typechecking in `ast_let` relies on `definition_group::typecheck`, which holds
all of the required functionality for checking the new definitions.
Once the definitions are typechecked, we use their type information to
typecheck the `in` part of the expression (passing `definitions.env` to the
call to `typecheck` to make the new definitions visible).
Next, we look at `ast_lambda`:
{{< codelines "C++" "compiler/12/ast.cpp" 344 366 >}}
Again, `ast_lambda::find_free` works similarly to `definition_defn`, stripping
the variables expected by the function from the body's list of free variables.
Also like `definition_defn`, this new node remembers the free variables in
its body, which we will later use for lifting.
Typechecking in this node also proceeds similarly to `definition_defn`. We create
new type variables for each parameter and for the return value, and build up
a function type called `full_type`. We then typecheck the body using the
new environment (which now includes the variables), and return the function type we came up with.
#### Translation #### Translation
While collecting all of the definitions into a global list, we can Recalling the transformations we described earlier, we can observe two
also do some program transformations. Let's return to our earlier example: major steps to what we have to do:
```Haskell {linenos=table} 1. Move the body of the original definition into its own
fourthPower x = square * square
where
square = x * x
```
We said it should be translated into something like:
```Haskell {linenos=table}
fourthPower x = square * square
where square = square' x
square' x = x * x
```
In our language, the original program above would be:
```text {linenos=table}
defn fourthPower x = {
let {
defn square = { x * x }
} in {
square * square
}
}
```
And the translated version would be:
```text {linenos=table}
defn fourthPower x = {
let {
defn square = { square' x }
} in {
square * square
}
}
defn square' x = { x * x }
```
Setting aside for the moment the naming of `square'` and `square`, we observe
that we want to perform the following operations:
1. Move the body of the original definition of `square` into its own
global definition, adding all the captured variables as arguments. global definition, adding all the captured variables as arguments.
2. Replace the right hand side of the `let/in` expression with an application 2. Replace the right hand side of the `let/in` expression with an application
of the global definition to the variables it requires. of the global definition to the variables it requires.
We will implement these in a new `translate` method, with the following
signature:
```C++
void ast::translate(global_scope& scope);
```
The `scope` parameter and its `add_function` and `add_constructor` methods will
be used to add definitions to the global scope. Each AST node will also
use this method to implement the second step. Currently, only
`ast_let` and `ast_lambda` will need to modify themselves - all other
nodes will simply recursively call this method on their children. Let's jump
straight into implementing this method for `ast_let`:
{{< codelines "C++" "compiler/12/ast.cpp" 291 316 >}}
Since data type definitions don't really depend on anything else, we process
them first. This amounts to simply calling the `definition_data::into_globals`
method, which itself simply calls `global_scope::add_constructor`:
{{< codelines "C++" "compiler/12/definition.cpp" 86 92 >}}
Note how `into_globals` updates the mangled name of its constructor
via `set_mangled_name`. This will help us decide which global
function to call during code generation. More on that later.
Starting with line 295, we start processing the function definitions
in the `let/in` expression. We remember how many arguments were
explicitly added to the function definition, and then call the
definition's `into_global` method. This method is implemented
as follows:
{{< codelines "C++" "compiler/12/definition.cpp" 40 49 >}}
First, this method collects all the non-global free variables in
its body, which will need to be passed to the global definition
as arguments. It then combines this list with the arguments
the user explicitly added to it, recursively translates
its body, and creates a new global definition using `add_function`.
We return to `ast_let::translate` at line 299. Here,
we determine how many variables ended up being captured, by
subtracting the number of explicit parameters from the total
number of parameters the new global definition has. This number,
combined with the fact that we added all the 'implict' arguments
to the function to the beginning of the list, will let us
iterate over all implict arguments, creating a chain of partial
function applications.
But how do we build the application? We could use the mangled name
of the function, but this seems inelegant, especially since we
alreaady keep track of mangling information in `type_env`. Instead,
we create a new, local environment, in which we place an updated
binding for the function, marking it global, and setting
its mangled name to the one generated by `global_sope`. This work is done
on lines 301-303. We create a reference to the global function
using the new environment on lines 305 and 306, and apply it to
all the implict arguments on lines 307-313. Finally, we
add the new 'basic' equation into `translated_definitions`.
Let's take a look at translating `ast_lambda` next:
{{< codelines "C++" "compiler/12/ast.cpp" 368 392 >}}
Once again, on lines 369-375 we find all the arguments to the
global definition. On lines 377-382 we create a new global
function and a mangled environment, and start creating the
chain of function applications. On lines 384-390, we actually
create the arguments and apply the function to them. Finally,
on line 391, we store this new chain of applications in the
`translated` field.
#### Compilation
There's still another piece of the puzzle missing, and
that's how we're going to compile `let/in` expressions into
G-machine instructions. We have allowed these expressions
to be recursive, and maybe even mutually recursive. This
worked fine with global definitions; instead of specifying
where on the stack we can find the reference to a global
function, we just created a new global node, and called
it good. Things are different now, though, because the definitions
we're referencing aren't _just_ global functions; they are partial
applications of a global function. And to reference themselves,
or their neighbors, they have to have a handle on their own nodes. We do this
using an instruction that we foreshadowed in part 5, but didn't use
until just now: __Alloc__.
__Alloc__ creates placeholder nodes on the stack. These nodes
are indirections, the same kind that we use for lazy evaluation
and sharing elsewhere. We create an indirection node for every
definition that we then build; when an expression needs access
to a definition, we give it the indirection node. After
building the partial application graph for an expression,
we use __Update__, making the corresponding indirection
point to this new graph. This way, the 'handle' to a
definition is always accessible, and once the definition's expression
is built, the handle correctly points to it. Here's the implementation:
{{< codelines "C++" "compiler/12/ast.cpp" 319 332 >}}
First, we create the __Alloc__ instruction. Then, we update
our environment to map each definition name to a location
within the newly allocated batch of nodes. Since we iterate
the definitions in order, 'pushing' them into our environment,
we end up with the convention of having the later definitions
closer to the top of the G-machine stack. Thus, when we
iterate the definitions again, this time to compile their
bodies, we have to do so starting with the highest offset,
and working our way down to __Update__-ing the top of the stack.
Once the definitions have been compiled, we proceed to compiling
the `in` part of the expression as normal, using our updated
environment. Finally, we use __Slide__ to get rid of the definition
graphs, cleaning up the stack.
Compiling the `ast_lambda` is far more straightforward. We just
compile the resulting partial application as we normally would have:
{{< codelines "C++" "compiler/12/ast.cpp" 394 396 >}}
One more thing. Let's adopt the convention of storing __mangled__
names into the compilation environment. This way, rather than looking up
mangled names only for global functions, which would be a 'gotcha'
for anyone working on the compiler, we will always use the mangled
names during compilation. To make this change, we make sure that
`ast_case` also uses `mangled_name`:
{{< codelines "C++" "compiler/12/ast.cpp" 242 242 >}}
We also update the logic for `ast_lid::compile` to use the mangled
name information:
{{< codelines "C++" "compiler/12/ast.cpp" 52 58 >}}
#### Fixing Type Generalization
This is a rather serious bug that made its way into the codebase
since part 10. Recall that we can only generalize type variables
that are free in the environment. Thus far, we haven't done that,
and we really should: I ran into incorrectly inferred types
in my first test of the `let/in` language feature.
We need to make our code capable of finding free variables in the
type environment. This requires the `type_mgr`, which associates
with type variables the real types they represent, if any. We
thus create methods with signatures as follows:
```C++
void type_env::find_free(const type_mgr& mgr, std::set<std::string>& into) const;
void type_env::find_free_except(const type_mgr& mgr, const std::string& avoid,
std::set<std::string>& into) const;
```
Why `find_free_except`? When generalizing a variable whose type was already
stored in the environment, all the type variables we could generalize would
not be 'free'. If they only occur in the type we're generalizing, though,
we shouldn't let that stop us! More generally, if we see type variables that
are only found in the same mutually recursive group as the binding we're
generalizing, we are free to generalize them too. Thus, we pass in
a reference to a `group`, and check if a variable is a member of that group
before searching it for free type variables. The implementations of the two
methods are straightforward:
{{< codelines "C++" "compiler/12/type_env.cpp" 4 18 >}}
Note that `find_free_except` calls `find_free` in its recursive call. This
is not a bug: we _do_ want to include free type variables from bindings
that have the same name as the variable we're generalizing, but aren't found
in the same scope. As far as we're concerned, they're different variables!
The two methods use another `find_free` method which we add to `type_mgr`:
{{< codelines "C++" "compiler/12/type.cpp" 206 219 >}}
This one is a bit of a hack. Typically, while running `find_free`, a
`type_mgr` will resolve any type variables. However, variables from the
`forall` quantifier of a type scheme should not be resolved, since they
are explicitly generic. To prevent the type manager from erroneously resolving
such type variables, we create a new type manager that does not have
these variables bound to anything, and thus marks them as free. We then
filter these variables out of the final list of free variables.
Finally, `generalize` makes sure not to use variables that it finds free:
{{< codelines "C++" "compiler/12/type_env.cpp" 68 81 >}}
#### Putting It All Together
All that's left is to tie the parts we've created into one coherent whole
in `main.cpp`. First of all, since we moved all of the LLVM-related
code into `global_scope`, we can safely replace that functionality
in `main.cpp` with a method call:
{{< codelines "C++" "compiler/12/main.cpp" 121 132 >}}
On the other hand, we need top-level logic to handle `definition_group`s.
This is pretty straightforward, and the main trick is to remember to
update the function's mangled name. Right now, depending on the choice
of manging algorithm, it's possible even for top-level functions to
have their names changed, and we must account for that. The whole code is:
{{< codelines "C++" "compiler/12/main.cpp" 52 62 >}}
Finally, we call `global_scope`'s methods in `main()`:
{{< codelines "C++" "compiler/12/main.cpp" 148 151 >}}
That's it! Please note that I've mentioned or hinted at minor changes to the
codebase. Detailing every single change this late into the project is
needlessly time consuming and verbose; Gitea reports that I've made 677
insertions into and 215 deletions from the code. As always, I provide
the [source code for the compiler](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/12), and you can also take a look at the
[Gitea-generated diff](https://dev.danilafe.com/Web-Projects/blog-static/compare/1905601aaa96d11c771eae9c56bb9fc105050cda...21851e3a9c552383ee8c4bc878ea06e7d28c333e)
at the time of writing. If you want to follow along, feel free to check
them out!
### Running Our Programs
It's important to test all the language features that we just added. This
includes recursive definitions, nested function dependency cycles, and
uses of lambda functions. Some of the following examples will be rather
silly, but they should do a good job of checking that everything works
as we expect. Let's start with a simple use of a recursive definition
inside a `let/in`. A classic definition in that form is of `fix`
(the fixpoint combinator):
```Haskell
fix f = let x = f x in x
```
This defines `x` to be `f x`, which by substitution becomes `f (f x)`, and then
`f (f (f x))` and so on. The fixpoint combinator allows one to write a
recursive function that doesn't use its own name in the body. Rather,
we write a function expecting to receive 'itself' as a value:
```Haskell
fix :: (a -> a) -> a
factRec :: (Int -> Int) -> Int -> Int
factRec f x = if x == 0 then 1 else x * f x
fact :: Int -> Int
fact = fix factRec
```
Notice that `factRec` doesn't reference itself, but rather takes
as argument a function it expects to be 'factorial' called `f`,
and uses that in its recursive case. We can write something similar
in our language, perhaps to create an infinite list of ones:
{{< codeblock "text" "compiler/12/examples/fixpoint.txt" >}}
We want `sumTwo` to take the first two elements from the list,
and return their sum. For an infinite list of ones, we expect
this sum to be equal to 2, and it is:
```
Result: 2
```
Next, let's try to define a function which has a mutually recursive pair
of definitions inside of a `let/in`. Let's also make these expressions
reference a function from the global scope, so that we know our
dependency tracking works as expected:
{{< codeblock "text" "compiler/12/examples/letin.txt" >}}
Here, we have a function `mergeUntil` which, given two lists
and a predicate, combines the two lists as long as
the predicate returns `True`. It does so using a convoluted
pair of mutually recursive functions, one of which
unpacks the left list, and the other the right. Each of the
functions calls the global function `if`. We also use two
definitions inside of `main` to create the two lists we're
going to merge. The compiler outputs the following (correct)
types:
```
const: forall bb bc . bc -> bb -> bc
if: Bool* -> List* Int* -> List* Int* -> List* Int*
main: Int*
mergeUntil: List* Int* -> List* Int* -> (Int* -> Bool*) -> List* Int*
sum: List* Int* -> Int*
```
And the result is 21, as would be expected from the sum of the numbers 1-6:
```
Result: 21
```
Let's try lambda functions now. We can try use them for a higher-order function
like `map`:
{{< codeblock "text" "compiler/12/examples/lambda.txt" >}}
In this example, we first double every element in the list, then square it,
and finally take the sum. This should give us 4+16+36 = 56, and so it does:
```
Result: 56
```
Finally, let's do some magic with a locally-declared data type. We'll make a
"packer" that creates a wrapped instance of a type, `Packed a`. Since the
constructor of this data type is not globally visible, it's not possible
to get the value back out, except by using an 'unpacking' function that
we provide:
{{< codeblock "text" "compiler/12/examples/packed.txt" >}}
Here, the `packer` definition returns a pair of the 'packing'
and 'unpacking' functions. The 'packing' function simply applies
the consntructor of `Packed` to its argument, while the 'unpacking'
function performs pattern matching (which is possible since the
data type is still in scope there). We expect `unpack (pack 3)` to
return 3, and it does:
```
Result: 3
```
Trying to pattern match, though, doesn't work, just like we would want!
This is enough to convince me that our changes do, indeed, work! Of
the 'major' components that I wanted to cover, only __Input/Output__
remains! Additionally, a [lobste.rs](https://lobste.rs) user suggested
that we also cover namespacing, and perhaps we will look into that as well.
Before either of those things, though, I think that I want to go through
the compiler and perform another round of improvements, similarly to
[part 4]({{< relref "04_compiler_improvements" >}}). It's hard to do a lot
of refactoring while covering new content, since major changes need to
be explained and presented for the post to make sense. I hope to see
you in these future posts!

View File

@@ -12,7 +12,7 @@ __py-starbound__, nicely enough, actually has a file named `FORMATS.md`. This fi
> This section will contain information on how to retrieve a value from a BTreeDB5 database. > This section will contain information on how to retrieve a value from a BTreeDB5 database.
Not very helpful. Before I go into what I managed to determine from the code, we may first take a look at one thing that we already know about the world format - it is a [B-Tree](https://en.wikipedia.org/wiki/B-tree). Not very helpful. Before I go into what I managed to determine from the code, we may first take a look at one thing that we already know about the world format - it is a [B-Tree](https://en.wikipedia.org/wiki/B-tree).
## Binary Search Trees ### Binary Search Trees
The B-Tree is a generalization of a Binary Search Tree, or BST for short. Binary Search trees (and B-Trees in general) operate on data that can be ordered consistently, the simplest example being numbers. For instance, as an example, I'll be using a BST that holds integers. A BST is made up of nodes, objects that actually hold the pieces of data that the tree itself organizes. The B-Tree is a generalization of a Binary Search Tree, or BST for short. Binary Search trees (and B-Trees in general) operate on data that can be ordered consistently, the simplest example being numbers. For instance, as an example, I'll be using a BST that holds integers. A BST is made up of nodes, objects that actually hold the pieces of data that the tree itself organizes.
In a BST, the nodes are organized in a simple way. Each node can have up to two _children_ (sub-nodes), and each of those can have up to two children, etc. The children are generally classified as _right_ and _left_. Conventionally, left children always have a value that is below (or comes before) the value of the node whose child they are (their _parent_), and right children have a bigger value. In a BST, the nodes are organized in a simple way. Each node can have up to two _children_ (sub-nodes), and each of those can have up to two children, etc. The children are generally classified as _right_ and _left_. Conventionally, left children always have a value that is below (or comes before) the value of the node whose child they are (their _parent_), and right children have a bigger value.
@@ -45,7 +45,7 @@ __Although the average efficiency of a Binary Search Tree is \\(O(\log n)\\), me
This isn't good enough, and many clever algorithms have been invented to speed up the lookup of the tree by making sure that it remains _balanced_ - that is, it _isn't_ arranged like a simple list. Some of these algorithms include [Red-Black Trees](https://en.wikipedia.org/wiki/Red%E2%80%93black_tree), [AVL Trees](https://en.wikipedia.org/wiki/AVL_tree), and, of course, B-Trees. This isn't good enough, and many clever algorithms have been invented to speed up the lookup of the tree by making sure that it remains _balanced_ - that is, it _isn't_ arranged like a simple list. Some of these algorithms include [Red-Black Trees](https://en.wikipedia.org/wiki/Red%E2%80%93black_tree), [AVL Trees](https://en.wikipedia.org/wiki/AVL_tree), and, of course, B-Trees.
## B-Trees ### B-Trees
B-Trees are a generalization of Binary Search Trees. That means that every Binary Search Tree is a B-Tree, but not all B-Trees are BSTs. The key difference lies in the fact that B-Trees' nodes aren't limited to having only two child nodes, and can also have more than one value. B-Trees are a generalization of Binary Search Trees. That means that every Binary Search Tree is a B-Tree, but not all B-Trees are BSTs. The key difference lies in the fact that B-Trees' nodes aren't limited to having only two child nodes, and can also have more than one value.
Each B-Tree node is a sorted array of values. That is, instead of a single number like the BST that we've looked at, it has multiple, and these numbers _must_ be sorted. Below are some examples of B-Tree nodes: Each B-Tree node is a sorted array of values. That is, instead of a single number like the BST that we've looked at, it has multiple, and these numbers _must_ be sorted. Below are some examples of B-Tree nodes:
@@ -64,7 +64,7 @@ This is solved using another property of B-Trees - the number of children of a n
If we were looking for the number 15, we'd look between the 10 and the 20, examining the 2nd node, and if we were looking for 45 we'd look past the 30, at the 4th node. If we were looking for the number 15, we'd look between the 10 and the 20, examining the 2nd node, and if we were looking for 45 we'd look past the 30, at the 4th node.
## Starbound B-Trees and BTreeDB5 ### Starbound B-Trees and BTreeDB5
The BTreeDB5 data structure uses something other than integers for its keys - it uses sequences of bytes. These bytes are compared in a very similar fashion to integers. The game first looks at the first number in the sequence of bytes (like the largest digit in an integer), and if that's the same, moves on to the next one. Also, Starbound B-Trees not only have the values, or _keys_, that they use to find data, but the data itself. The BTreeDB5 data structure uses something other than integers for its keys - it uses sequences of bytes. These bytes are compared in a very similar fashion to integers. The game first looks at the first number in the sequence of bytes (like the largest digit in an integer), and if that's the same, moves on to the next one. Also, Starbound B-Trees not only have the values, or _keys_, that they use to find data, but the data itself.
The "nodes" in the BTreeDB are called "blocks" and are one of three types - "index", "leaf", and "free" nodes. "Index" nodes are like the `(10, 20, 30)` node in the above example - they point to other nodes, but actually store no data themselves. The "leaf" nodes actually contain the data, and, if that data is longer than the maximum block size, "leaf" nodes contain the index of the next leaf node where the user might continue to read the data. The "free" nodes are simply free data, empty and ready for Starbound to fill them with something useful. The "nodes" in the BTreeDB are called "blocks" and are one of three types - "index", "leaf", and "free" nodes. "Index" nodes are like the `(10, 20, 30)` node in the above example - they point to other nodes, but actually store no data themselves. The "leaf" nodes actually contain the data, and, if that data is longer than the maximum block size, "leaf" nodes contain the index of the next leaf node where the user might continue to read the data. The "free" nodes are simply free data, empty and ready for Starbound to fill them with something useful.

View File

@@ -0,0 +1,47 @@
@import "variables.scss";
@import "mixins.scss";
$margin-width: 30rem;
$margin-inner-offset: 0.5rem;
$margin-outer-offset: 1rem;
@mixin below-two-margins {
@media screen and
(max-width: $container-width-threshold +
2 * ($margin-width + $margin-inner-offset + $margin-outer-offset)) {
@content;
}
}
@mixin below-one-margin {
@media screen and
(max-width: $container-width-threshold +
($margin-width + $margin-inner-offset + $margin-outer-offset)) {
@content;
}
}
@mixin margin-content {
display: block;
position: absolute;
width: $margin-width;
box-sizing: border-box;
}
@mixin margin-content-left {
left: 0;
margin-left: -($margin-width + $container-min-padding + $margin-inner-offset);
@include below-two-margins {
display: none;
}
}
@mixin margin-content-right {
right: 0;
margin-right: -($margin-width + $container-min-padding + $margin-inner-offset);
@include below-one-margin {
display: none;
}
}

View File

@@ -6,7 +6,7 @@
} }
@mixin below-container-width { @mixin below-container-width {
@media screen and (max-width: $container-width){ @media screen and (max-width: $container-width-threshold){
@content; @content;
} }
} }

View File

@@ -1,28 +1,10 @@
@import "variables.scss"; @import "variables.scss";
@import "mixins.scss"; @import "mixins.scss";
@import "margin.scss";
$sidenote-accommodate-shrink: 10rem;
$sidenote-width: 30rem;
$sidenote-offset: 1.5rem;
$sidenote-padding: 1rem; $sidenote-padding: 1rem;
$sidenote-highlight-border-width: .2rem; $sidenote-highlight-border-width: .2rem;
@mixin below-two-sidenotes {
@media screen and
(max-width: $container-width +
2 * ($sidenote-width + 2 * $sidenote-offset)) {
@content;
}
}
@mixin below-one-sidenote {
@media screen and
(max-width: $container-width +
($sidenote-width + 3 * $sidenote-offset)) {
@content;
}
}
.sidenote { .sidenote {
&:hover { &:hover {
.sidenote-label { .sidenote-label {
@@ -48,25 +30,19 @@ $sidenote-highlight-border-width: .2rem;
} }
.sidenote-content { .sidenote-content {
display: block; @include margin-content;
position: absolute; @include bordered-block;
width: $sidenote-width;
margin-top: -1.5rem; margin-top: -1.5rem;
padding: $sidenote-padding;
text-align: left;
&.sidenote-right { &.sidenote-right {
right: 0; @include margin-content-right;
margin-right: -($sidenote-width + $sidenote-offset);
} }
&.sidenote-left { &.sidenote-left {
left: 0; @include margin-content-left;
margin-left: -($sidenote-width + $sidenote-offset);
} }
@include bordered-block;
padding: $sidenote-padding;
box-sizing: border-box;
text-align: left;
} }
.sidenote-delimiter { .sidenote-delimiter {
@@ -78,36 +54,22 @@ $sidenote-highlight-border-width: .2rem;
margin-top: 1rem; margin-top: 1rem;
margin-bottom: 1rem; margin-bottom: 1rem;
width: 100%; width: 100%;
display: none;
.sidenote-checkbox:checked ~ & { .sidenote-checkbox:checked ~ & {
display: block; display: block;
} }
} }
@include below-two-sidenotes { @include below-two-margins {
.sidenote-content.sidenote-left { .sidenote-content.sidenote-left {
@include hidden-sidenote; @include hidden-sidenote;
margin-left: 0rem; margin-left: 0rem;
} }
.container {
left: -$sidenote-width/2
}
}
@include below-one-sidenote {
.post-content {
max-width: 100%;
} }
@include below-one-margin {
.sidenote-content.sidenote-right { .sidenote-content.sidenote-right {
@include hidden-sidenote; @include hidden-sidenote;
margin-right: 0rem; margin-right: 0rem;
} }
.container {
position: initial;
} }
}

View File

@@ -1,5 +1,7 @@
@import "variables.scss"; @import "variables.scss";
@import "mixins.scss"; @import "mixins.scss";
@import "margin.scss";
@import "toc.scss";
body { body {
font-family: $font-body; font-family: $font-body;
@@ -38,13 +40,22 @@ pre code {
display: block; display: block;
padding: 0.5rem; padding: 0.5rem;
overflow-x: auto; overflow-x: auto;
background-color: $code-color; border: $code-border;
} }
div.highlight table pre { div.highlight table {
border: $code-border !important;
border-radius: 0px;
pre {
margin: 0; margin: 0;
} }
code {
border: none;
}
}
.container { .container {
position: relative; position: relative;
margin: auto; margin: auto;
@@ -53,7 +64,17 @@ div.highlight table pre {
box-sizing: border-box; box-sizing: border-box;
@include below-container-width { @include below-container-width {
padding: 0rem 1rem 0rem 1rem; padding: 0 $container-min-padding 0 $container-min-padding;
margin: 0;
max-width: $container-width + 2 * $container-min-padding;
}
@include below-two-margins {
left: -($margin-width + $margin-inner-offset + $margin-outer-offset)/2;
}
@include below-one-margin {
left: 0;
} }
} }
@@ -62,8 +83,7 @@ div.highlight table pre {
background-color: $primary-color; background-color: $primary-color;
border: none; border: none;
color: white; color: white;
transition: color 0.25s; transition: color 0.25s, background-color 0.25s;
transition: background-color 0.25s;
text-align: left; text-align: left;
&:focus { &:focus {

View File

@@ -0,0 +1,49 @@
@import "variables.scss";
@import "mixins.scss";
$toc-color: $code-color;
$toc-border-color: $code-border-color;
.table-of-contents {
@include margin-content;
@include margin-content-left;
display: flex;
flex-direction: column;
align-items: end;
margin-bottom: 1rem;
em {
font-style: normal;
font-weight: bold;
font-size: 1.2em;
display: block;
margin-bottom: 0.5rem;
}
#TableOfContents > ul {
padding-left: 0;
}
nav {
margin: 0px;
}
ul {
list-style: none;
padding-left: 2rem;
margin: 0px;
}
a {
padding: 0;
}
div.wrapper {
@include bordered-block;
padding: 1rem;
background-color: $toc-color;
border-color: $toc-border-color;
box-sizing: border-box;
max-width: 100%;
}
}

View File

@@ -1,14 +1,16 @@
$container-width: 45rem; $container-width: 45rem;
$container-min-padding: 1rem;
$container-width-threshold: $container-width + 2 * $container-min-padding;
$standard-border-width: .075rem; $standard-border-width: .075rem;
$primary-color: #36e281; $primary-color: #36e281;
$primary-color-dark: darken($primary-color, 10%);
$code-color: #f0f0f0;
$code-color-dark: darken($code-color, 10%);
$border-color: #bfbfbf; $border-color: #bfbfbf;
$code-color: #f0f0f0;
$code-border-color: darken($code-color, 10%);
$font-heading: "Lora", serif; $font-heading: "Lora", serif;
$font-body: "Raleway", serif; $font-body: "Raleway", serif;
$font-code: "Inconsolata", monospace; $font-code: "Inconsolata", monospace;
$standard-border: $standard-border-width solid $border-color; $standard-border: $standard-border-width solid $border-color;
$code-border: $standard-border-width solid $code-border-color;

View File

@@ -10,6 +10,14 @@
</div> </div>
<div class="post-content"> <div class="post-content">
{{ if not (eq .TableOfContents "<nav id=\"TableOfContents\"></nav>") }}
<div class="table-of-contents">
<div class="wrapper">
<em>Table of Contents</em>
{{ .TableOfContents }}
</div>
</div>
{{ end }}
{{ .Content }} {{ .Content }}
</div> </div>
{{ end }} {{ end }}