Start work on algorithms in compiler post 10
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
6966973497
commit
1bdb4a650e
160
code/compiler/10/graph.hpp
Normal file
160
code/compiler/10/graph.hpp
Normal file
@ -0,0 +1,160 @@
|
||||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
using function = std::string;
|
||||
|
||||
struct group {
|
||||
std::set<function> members;
|
||||
};
|
||||
|
||||
using group_ptr = std::unique_ptr<group>;
|
||||
|
||||
class function_graph {
|
||||
using group_id = size_t;
|
||||
|
||||
struct group_data {
|
||||
std::set<function> functions;
|
||||
std::set<group_id> adjacency_list;
|
||||
size_t indegree;
|
||||
};
|
||||
|
||||
using edge = std::pair<function, function>;
|
||||
using data_ptr = std::shared_ptr<group_data>;
|
||||
using group_edge = std::pair<group_id, group_id>;
|
||||
|
||||
std::map<function, std::set<function>> adjacency_lists;
|
||||
std::set<edge> edges;
|
||||
|
||||
std::set<edge> compute_transitive_edges();
|
||||
void create_groups(
|
||||
const std::set<edge>&,
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
void create_edges(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
std::vector<group_ptr> generate_order(
|
||||
std::map<function, group_id>&,
|
||||
std::map<group_id, data_ptr>&);
|
||||
|
||||
public:
|
||||
void add_edge(const function& from, const function& to);
|
||||
std::vector<group_ptr> compute_order();
|
||||
};
|
||||
|
||||
std::set<function_graph::edge> function_graph::compute_transitive_edges() {
|
||||
std::set<edge> transitive_edges;
|
||||
transitive_edges.insert(edges.begin(), edges.end());
|
||||
for(auto& connector : adjacency_lists) {
|
||||
for(auto& from : adjacency_lists) {
|
||||
edge to_connector { from.first, connector.first };
|
||||
for(auto& to : adjacency_lists) {
|
||||
edge full_jump { from.first, to.first };
|
||||
if(transitive_edges.find(full_jump) != transitive_edges.end()) continue;
|
||||
|
||||
edge from_connector { connector.first, to.first };
|
||||
if(transitive_edges.find(to_connector) != transitive_edges.end() &&
|
||||
transitive_edges.find(from_connector) != transitive_edges.end())
|
||||
transitive_edges.insert(std::move(full_jump));
|
||||
}
|
||||
}
|
||||
}
|
||||
return transitive_edges;
|
||||
}
|
||||
|
||||
void function_graph::create_groups(
|
||||
const std::set<edge>& transitive_edges,
|
||||
std::map<function, group_id>& group_ids,
|
||||
std::map<group_id, data_ptr>& group_data_map) {
|
||||
group_id id_counter = 0;
|
||||
for(auto& vertex : adjacency_lists) {
|
||||
if(group_ids.find(vertex.first) != group_ids.end())
|
||||
continue;
|
||||
data_ptr new_group(new group_data);
|
||||
new_group->functions.insert(vertex.first);
|
||||
group_data_map[id_counter] = new_group;
|
||||
group_ids[vertex.first] = id_counter;
|
||||
for(auto& other_vertex : adjacency_lists) {
|
||||
if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() &&
|
||||
transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) {
|
||||
group_ids[other_vertex.first] = id_counter;
|
||||
new_group->functions.insert(other_vertex.first);
|
||||
}
|
||||
}
|
||||
id_counter++;
|
||||
}
|
||||
}
|
||||
|
||||
void function_graph::create_edges(
|
||||
std::map<function, group_id>& group_ids,
|
||||
std::map<group_id, data_ptr>& group_data_map) {
|
||||
std::set<std::pair<group_id, group_id>> group_edges;
|
||||
for(auto& vertex : adjacency_lists) {
|
||||
auto vertex_id = group_ids[vertex.first];
|
||||
auto& vertex_data = group_data_map[vertex_id];
|
||||
for(auto& other_vertex : vertex.second) {
|
||||
auto other_id = group_ids[other_vertex];
|
||||
if(vertex_id == other_id) continue;
|
||||
if(group_edges.find({vertex_id, other_id}) != group_edges.end())
|
||||
continue;
|
||||
group_edges.insert({vertex_id, other_id});
|
||||
vertex_data->adjacency_list.insert(other_id);
|
||||
group_data_map[other_id]->indegree++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<group_ptr> function_graph::generate_order(
|
||||
std::map<function, group_id>& group_ids,
|
||||
std::map<group_id, data_ptr>& group_data_map) {
|
||||
std::queue<group_id> id_queue;
|
||||
std::vector<group_ptr> output;
|
||||
for(auto& group : group_data_map) {
|
||||
if(group.second->indegree == 0) id_queue.push(group.first);
|
||||
}
|
||||
|
||||
while(!id_queue.empty()) {
|
||||
auto new_id = id_queue.front();
|
||||
auto& group_data = group_data_map[new_id];
|
||||
group_ptr output_group(new group);
|
||||
output_group->members = std::move(group_data->functions);
|
||||
id_queue.pop();
|
||||
|
||||
for(auto& adjacent_group : group_data->adjacency_list) {
|
||||
if(--group_data_map[adjacent_group]->indegree == 0)
|
||||
id_queue.push(adjacent_group);
|
||||
}
|
||||
|
||||
output.push_back(std::move(output_group));
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
void function_graph::add_edge(const function& from, const function& to) {
|
||||
auto adjacency_list_it = adjacency_lists.find(from);
|
||||
if(adjacency_list_it != adjacency_lists.end()) {
|
||||
adjacency_list_it->second.insert(to);
|
||||
} else {
|
||||
adjacency_lists[from] = { to };
|
||||
}
|
||||
edges.insert({ from, to });
|
||||
}
|
||||
|
||||
std::vector<group_ptr> function_graph::compute_order() {
|
||||
std::set<edge> transitive_edges = compute_transitive_edges();
|
||||
std::map<function, group_id> group_ids;
|
||||
std::map<group_id, data_ptr> group_data_map;
|
||||
|
||||
create_groups(transitive_edges, group_ids, group_data_map);
|
||||
create_edges(group_ids, group_data_map);
|
||||
return generate_order(group_ids, group_data_map);
|
||||
}
|
@ -261,3 +261,85 @@ within a group does not matter.
|
||||
4. We typecheck the function groups, and functions within them, following the above topological order.
|
||||
|
||||
To find the transitive closure of a graph, we can use [Warshall's Algorithm](https://cs.winona.edu/lin/cs440/ch08-2.pdf).
|
||||
This algorithm, with complexity \\(O(|V|^3)\\), goes as follows:
|
||||
{{< latex >}}
|
||||
\begin{aligned}
|
||||
& A, R^{(i)} \in \mathbb{B}^{n \times n} \\
|
||||
& \\
|
||||
& R^{(0)} \leftarrow A \\
|
||||
& \textbf{for} \; k \leftarrow 1 \; \textbf{to} \; n \; \textbf{do} \\
|
||||
& \quad \textbf{for} \; i \leftarrow 1 \; \textbf{to} \; n \; \textbf{do} \\
|
||||
& \quad \quad \textbf{for} \; j \leftarrow 1 \; \textbf{to} \; n \; \textbf{do} \\
|
||||
& \quad \quad \quad R^{(k)}[i,j] \leftarrow R^{(k-1)}[i,j] \; \textbf{or} \; R^{(k-1)}[i,k] \; \textbf{and} \; R^{(k-1)}[k,j] \\
|
||||
& \textbf{return} \; R^{(n)}
|
||||
\end{aligned}
|
||||
{{< /latex >}}
|
||||
|
||||
In the above notation, \\(R^{(i)}\\) is the \\(i\\)th matrix \\(R\\), and \\(A\\) is the adjacency
|
||||
matrix of the graph in question. All matrices in the algorithm are from \\(\\mathbb{B}^{n \times n}\\),
|
||||
the set of \\(n\\) by \\(n\\) boolean matrices. Once this algorithm is complete, we get as output a
|
||||
transitive closure adjacency matrix \\(R^{(n)}\\). Mutually dependent functions will be pretty easy to
|
||||
isolate from this matrix. If \\(R^{(n)}[i,j]\\) and \\(R^{(n)}[j,i]\\), then the functions represented by vertices
|
||||
\\(i\\) and \\(j\\) depend on each other.
|
||||
|
||||
Once we've identified the groups, and
|
||||
{{< sidenote "right" "group-graph-note" "constructed a group graph," >}}
|
||||
This might seem like a "draw the rest of the owl" situation, but it really isn't.
|
||||
We'll follow a naive algorithm for findings groups, and for translating function dependencies
|
||||
into group dependencies. This algorithm, in C++, will be presented later on.
|
||||
{{< /sidenote >}} it is time to compute the topological order. For this, we will use
|
||||
[Kahn's Algorithm](https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm).
|
||||
The algorithm goes as follows:
|
||||
|
||||
{{< latex >}}
|
||||
\begin{aligned}
|
||||
& L \leftarrow \text{empty list} \\
|
||||
& S \leftarrow \text{set of all nodes with no incoming edges} \\
|
||||
& \\
|
||||
& \textbf{while} \; S \; \text{is non-empty} \; \textbf{do} \\
|
||||
& \quad \text{remove a node} \; n \; \text{from} \; S \\
|
||||
& \quad \text{add} \; n \; \text{to the end of} \; L \\
|
||||
& \quad \textbf{for each} \; \text{node} \; m \; \text{with edge} \;
|
||||
e \; \text{from} \; n \; \text{to} \; m \; \textbf{do} \\
|
||||
& \quad \quad \text{remove edge} \; e \; \text{from the graph} \\
|
||||
& \quad \quad \textbf{if} \; m \; \text{has no other incoming edges} \; \textbf{then} \\
|
||||
& \quad \quad \quad \text{insert} \; m \; \text{into} \; S \\
|
||||
& \\
|
||||
& \textbf{if} \; \text{the graph has edges} \; \textbf{then} \\
|
||||
& \quad \textbf{return} \; \text{error} \quad \textit{(graph has at least once cycle)} \\
|
||||
& \textbf{else} \\
|
||||
& \quad \textbf{return} \; L \quad \textit{(a topologically sorted order)}
|
||||
\end{aligned}
|
||||
{{< /latex >}}
|
||||
|
||||
Note that since we've already isolated all mutually dependent functions into
|
||||
groups, our graph will never have cycles, and this algorithm will always succeed.
|
||||
Also note that since we start with nodes with no incoming edges, our list will
|
||||
__begin with the groups that should be checked last__. This is because a node
|
||||
with no incoming edges might (and probably does) still have outgoing edges,
|
||||
and thus depends on other functions / groups. Like in our successful example,
|
||||
we want to __typecheck functions that are depended on first__.
|
||||
|
||||
### Implementation
|
||||
Let's start working on a C++ implementation of all of this now. First,
|
||||
I think that we should create a C++ class that will represent our function
|
||||
dependency graph. Let's call it `function_graph`. I propose the following
|
||||
definition:
|
||||
|
||||
{{< codelines "C++" "compiler/10/graph.hpp" 12 51 >}}
|
||||
|
||||
There's a lot to unpack here. First of all, we create a type alias `function` that
|
||||
represents the label of a function in our graph. It is probably most convenient
|
||||
to work with `std::string` instances, so we settle for that. Next, we define
|
||||
a struct that will represent a single group of mutually dependent functions.
|
||||
Passing this struct by value seems wrong, so we'll settle for a C++ `unique_pt`
|
||||
to help carry instances around.
|
||||
|
||||
Finally, we arrive at the definition of `function_graph`. Inside this class,
|
||||
we define a helper struct, `group_data`, which holds information
|
||||
about an individual group as it is being constructed. This information
|
||||
includes the group's adjacency list and
|
||||
[indegree](https://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree)
|
||||
(both used for Kahn's topological sorting algorithm), as well as the set
|
||||
of functions in the group (which we will eventually return).
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user