Start work on algorithms in compiler post 10
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Danila Fedorin 2020-03-14 17:18:06 -07:00
parent 6966973497
commit 1bdb4a650e
2 changed files with 242 additions and 0 deletions

160
code/compiler/10/graph.hpp Normal file
View File

@ -0,0 +1,160 @@
#pragma once
#include <algorithm>
#include <cstddef>
#include <queue>
#include <set>
#include <string>
#include <map>
#include <memory>
#include <vector>
#include <iostream>
using function = std::string;
struct group {
std::set<function> members;
};
using group_ptr = std::unique_ptr<group>;
class function_graph {
using group_id = size_t;
struct group_data {
std::set<function> functions;
std::set<group_id> adjacency_list;
size_t indegree;
};
using edge = std::pair<function, function>;
using data_ptr = std::shared_ptr<group_data>;
using group_edge = std::pair<group_id, group_id>;
std::map<function, std::set<function>> adjacency_lists;
std::set<edge> edges;
std::set<edge> compute_transitive_edges();
void create_groups(
const std::set<edge>&,
std::map<function, group_id>&,
std::map<group_id, data_ptr>&);
void create_edges(
std::map<function, group_id>&,
std::map<group_id, data_ptr>&);
std::vector<group_ptr> generate_order(
std::map<function, group_id>&,
std::map<group_id, data_ptr>&);
public:
void add_edge(const function& from, const function& to);
std::vector<group_ptr> compute_order();
};
std::set<function_graph::edge> function_graph::compute_transitive_edges() {
std::set<edge> transitive_edges;
transitive_edges.insert(edges.begin(), edges.end());
for(auto& connector : adjacency_lists) {
for(auto& from : adjacency_lists) {
edge to_connector { from.first, connector.first };
for(auto& to : adjacency_lists) {
edge full_jump { from.first, to.first };
if(transitive_edges.find(full_jump) != transitive_edges.end()) continue;
edge from_connector { connector.first, to.first };
if(transitive_edges.find(to_connector) != transitive_edges.end() &&
transitive_edges.find(from_connector) != transitive_edges.end())
transitive_edges.insert(std::move(full_jump));
}
}
}
return transitive_edges;
}
void function_graph::create_groups(
const std::set<edge>& transitive_edges,
std::map<function, group_id>& group_ids,
std::map<group_id, data_ptr>& group_data_map) {
group_id id_counter = 0;
for(auto& vertex : adjacency_lists) {
if(group_ids.find(vertex.first) != group_ids.end())
continue;
data_ptr new_group(new group_data);
new_group->functions.insert(vertex.first);
group_data_map[id_counter] = new_group;
group_ids[vertex.first] = id_counter;
for(auto& other_vertex : adjacency_lists) {
if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() &&
transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) {
group_ids[other_vertex.first] = id_counter;
new_group->functions.insert(other_vertex.first);
}
}
id_counter++;
}
}
void function_graph::create_edges(
std::map<function, group_id>& group_ids,
std::map<group_id, data_ptr>& group_data_map) {
std::set<std::pair<group_id, group_id>> group_edges;
for(auto& vertex : adjacency_lists) {
auto vertex_id = group_ids[vertex.first];
auto& vertex_data = group_data_map[vertex_id];
for(auto& other_vertex : vertex.second) {
auto other_id = group_ids[other_vertex];
if(vertex_id == other_id) continue;
if(group_edges.find({vertex_id, other_id}) != group_edges.end())
continue;
group_edges.insert({vertex_id, other_id});
vertex_data->adjacency_list.insert(other_id);
group_data_map[other_id]->indegree++;
}
}
}
std::vector<group_ptr> function_graph::generate_order(
std::map<function, group_id>& group_ids,
std::map<group_id, data_ptr>& group_data_map) {
std::queue<group_id> id_queue;
std::vector<group_ptr> output;
for(auto& group : group_data_map) {
if(group.second->indegree == 0) id_queue.push(group.first);
}
while(!id_queue.empty()) {
auto new_id = id_queue.front();
auto& group_data = group_data_map[new_id];
group_ptr output_group(new group);
output_group->members = std::move(group_data->functions);
id_queue.pop();
for(auto& adjacent_group : group_data->adjacency_list) {
if(--group_data_map[adjacent_group]->indegree == 0)
id_queue.push(adjacent_group);
}
output.push_back(std::move(output_group));
}
return output;
}
void function_graph::add_edge(const function& from, const function& to) {
auto adjacency_list_it = adjacency_lists.find(from);
if(adjacency_list_it != adjacency_lists.end()) {
adjacency_list_it->second.insert(to);
} else {
adjacency_lists[from] = { to };
}
edges.insert({ from, to });
}
std::vector<group_ptr> function_graph::compute_order() {
std::set<edge> transitive_edges = compute_transitive_edges();
std::map<function, group_id> group_ids;
std::map<group_id, data_ptr> group_data_map;
create_groups(transitive_edges, group_ids, group_data_map);
create_edges(group_ids, group_data_map);
return generate_order(group_ids, group_data_map);
}

View File

@ -261,3 +261,85 @@ within a group does not matter.
4. We typecheck the function groups, and functions within them, following the above topological order. 4. We typecheck the function groups, and functions within them, following the above topological order.
To find the transitive closure of a graph, we can use [Warshall's Algorithm](https://cs.winona.edu/lin/cs440/ch08-2.pdf). To find the transitive closure of a graph, we can use [Warshall's Algorithm](https://cs.winona.edu/lin/cs440/ch08-2.pdf).
This algorithm, with complexity \\(O(|V|^3)\\), goes as follows:
{{< latex >}}
\begin{aligned}
& A, R^{(i)} \in \mathbb{B}^{n \times n} \\
& \\
& R^{(0)} \leftarrow A \\
& \textbf{for} \; k \leftarrow 1 \; \textbf{to} \; n \; \textbf{do} \\
& \quad \textbf{for} \; i \leftarrow 1 \; \textbf{to} \; n \; \textbf{do} \\
& \quad \quad \textbf{for} \; j \leftarrow 1 \; \textbf{to} \; n \; \textbf{do} \\
& \quad \quad \quad R^{(k)}[i,j] \leftarrow R^{(k-1)}[i,j] \; \textbf{or} \; R^{(k-1)}[i,k] \; \textbf{and} \; R^{(k-1)}[k,j] \\
& \textbf{return} \; R^{(n)}
\end{aligned}
{{< /latex >}}
In the above notation, \\(R^{(i)}\\) is the \\(i\\)th matrix \\(R\\), and \\(A\\) is the adjacency
matrix of the graph in question. All matrices in the algorithm are from \\(\\mathbb{B}^{n \times n}\\),
the set of \\(n\\) by \\(n\\) boolean matrices. Once this algorithm is complete, we get as output a
transitive closure adjacency matrix \\(R^{(n)}\\). Mutually dependent functions will be pretty easy to
isolate from this matrix. If \\(R^{(n)}[i,j]\\) and \\(R^{(n)}[j,i]\\), then the functions represented by vertices
\\(i\\) and \\(j\\) depend on each other.
Once we've identified the groups, and
{{< sidenote "right" "group-graph-note" "constructed a group graph," >}}
This might seem like a "draw the rest of the owl" situation, but it really isn't.
We'll follow a naive algorithm for findings groups, and for translating function dependencies
into group dependencies. This algorithm, in C++, will be presented later on.
{{< /sidenote >}} it is time to compute the topological order. For this, we will use
[Kahn's Algorithm](https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm).
The algorithm goes as follows:
{{< latex >}}
\begin{aligned}
& L \leftarrow \text{empty list} \\
& S \leftarrow \text{set of all nodes with no incoming edges} \\
& \\
& \textbf{while} \; S \; \text{is non-empty} \; \textbf{do} \\
& \quad \text{remove a node} \; n \; \text{from} \; S \\
& \quad \text{add} \; n \; \text{to the end of} \; L \\
& \quad \textbf{for each} \; \text{node} \; m \; \text{with edge} \;
e \; \text{from} \; n \; \text{to} \; m \; \textbf{do} \\
& \quad \quad \text{remove edge} \; e \; \text{from the graph} \\
& \quad \quad \textbf{if} \; m \; \text{has no other incoming edges} \; \textbf{then} \\
& \quad \quad \quad \text{insert} \; m \; \text{into} \; S \\
& \\
& \textbf{if} \; \text{the graph has edges} \; \textbf{then} \\
& \quad \textbf{return} \; \text{error} \quad \textit{(graph has at least once cycle)} \\
& \textbf{else} \\
& \quad \textbf{return} \; L \quad \textit{(a topologically sorted order)}
\end{aligned}
{{< /latex >}}
Note that since we've already isolated all mutually dependent functions into
groups, our graph will never have cycles, and this algorithm will always succeed.
Also note that since we start with nodes with no incoming edges, our list will
__begin with the groups that should be checked last__. This is because a node
with no incoming edges might (and probably does) still have outgoing edges,
and thus depends on other functions / groups. Like in our successful example,
we want to __typecheck functions that are depended on first__.
### Implementation
Let's start working on a C++ implementation of all of this now. First,
I think that we should create a C++ class that will represent our function
dependency graph. Let's call it `function_graph`. I propose the following
definition:
{{< codelines "C++" "compiler/10/graph.hpp" 12 51 >}}
There's a lot to unpack here. First of all, we create a type alias `function` that
represents the label of a function in our graph. It is probably most convenient
to work with `std::string` instances, so we settle for that. Next, we define
a struct that will represent a single group of mutually dependent functions.
Passing this struct by value seems wrong, so we'll settle for a C++ `unique_pt`
to help carry instances around.
Finally, we arrive at the definition of `function_graph`. Inside this class,
we define a helper struct, `group_data`, which holds information
about an individual group as it is being constructed. This information
includes the group's adjacency list and
[indegree](https://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree)
(both used for Kahn's topological sorting algorithm), as well as the set
of functions in the group (which we will eventually return).