From 1bdb4a650eece88b8dd2e1f3d068789fb0e6fa86 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Sat, 14 Mar 2020 17:18:06 -0700 Subject: [PATCH] Start work on algorithms in compiler post 10 --- code/compiler/10/graph.hpp | 160 +++++++++++++++++++++++ content/blog/10_compiler_polymorphism.md | 82 ++++++++++++ 2 files changed, 242 insertions(+) create mode 100644 code/compiler/10/graph.hpp diff --git a/code/compiler/10/graph.hpp b/code/compiler/10/graph.hpp new file mode 100644 index 0000000..a15b2ac --- /dev/null +++ b/code/compiler/10/graph.hpp @@ -0,0 +1,160 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using function = std::string; + +struct group { + std::set members; +}; + +using group_ptr = std::unique_ptr; + +class function_graph { + using group_id = size_t; + + struct group_data { + std::set functions; + std::set adjacency_list; + size_t indegree; + }; + + using edge = std::pair; + using data_ptr = std::shared_ptr; + using group_edge = std::pair; + + std::map> adjacency_lists; + std::set edges; + + std::set compute_transitive_edges(); + void create_groups( + const std::set&, + std::map&, + std::map&); + void create_edges( + std::map&, + std::map&); + std::vector generate_order( + std::map&, + std::map&); + + public: + void add_edge(const function& from, const function& to); + std::vector compute_order(); +}; + +std::set function_graph::compute_transitive_edges() { + std::set transitive_edges; + transitive_edges.insert(edges.begin(), edges.end()); + for(auto& connector : adjacency_lists) { + for(auto& from : adjacency_lists) { + edge to_connector { from.first, connector.first }; + for(auto& to : adjacency_lists) { + edge full_jump { from.first, to.first }; + if(transitive_edges.find(full_jump) != transitive_edges.end()) continue; + + edge from_connector { connector.first, to.first }; + if(transitive_edges.find(to_connector) != transitive_edges.end() && + transitive_edges.find(from_connector) != transitive_edges.end()) + transitive_edges.insert(std::move(full_jump)); + } + } + } + return transitive_edges; +} + +void function_graph::create_groups( + const std::set& transitive_edges, + std::map& group_ids, + std::map& group_data_map) { + group_id id_counter = 0; + for(auto& vertex : adjacency_lists) { + if(group_ids.find(vertex.first) != group_ids.end()) + continue; + data_ptr new_group(new group_data); + new_group->functions.insert(vertex.first); + group_data_map[id_counter] = new_group; + group_ids[vertex.first] = id_counter; + for(auto& other_vertex : adjacency_lists) { + if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() && + transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) { + group_ids[other_vertex.first] = id_counter; + new_group->functions.insert(other_vertex.first); + } + } + id_counter++; + } +} + +void function_graph::create_edges( + std::map& group_ids, + std::map& group_data_map) { + std::set> group_edges; + for(auto& vertex : adjacency_lists) { + auto vertex_id = group_ids[vertex.first]; + auto& vertex_data = group_data_map[vertex_id]; + for(auto& other_vertex : vertex.second) { + auto other_id = group_ids[other_vertex]; + if(vertex_id == other_id) continue; + if(group_edges.find({vertex_id, other_id}) != group_edges.end()) + continue; + group_edges.insert({vertex_id, other_id}); + vertex_data->adjacency_list.insert(other_id); + group_data_map[other_id]->indegree++; + } + } +} + +std::vector function_graph::generate_order( + std::map& group_ids, + std::map& group_data_map) { + std::queue id_queue; + std::vector output; + for(auto& group : group_data_map) { + if(group.second->indegree == 0) id_queue.push(group.first); + } + + while(!id_queue.empty()) { + auto new_id = id_queue.front(); + auto& group_data = group_data_map[new_id]; + group_ptr output_group(new group); + output_group->members = std::move(group_data->functions); + id_queue.pop(); + + for(auto& adjacent_group : group_data->adjacency_list) { + if(--group_data_map[adjacent_group]->indegree == 0) + id_queue.push(adjacent_group); + } + + output.push_back(std::move(output_group)); + } + + return output; +} + +void function_graph::add_edge(const function& from, const function& to) { + auto adjacency_list_it = adjacency_lists.find(from); + if(adjacency_list_it != adjacency_lists.end()) { + adjacency_list_it->second.insert(to); + } else { + adjacency_lists[from] = { to }; + } + edges.insert({ from, to }); +} + +std::vector function_graph::compute_order() { + std::set transitive_edges = compute_transitive_edges(); + std::map group_ids; + std::map group_data_map; + + create_groups(transitive_edges, group_ids, group_data_map); + create_edges(group_ids, group_data_map); + return generate_order(group_ids, group_data_map); +} diff --git a/content/blog/10_compiler_polymorphism.md b/content/blog/10_compiler_polymorphism.md index aa8f9dc..bb428e3 100644 --- a/content/blog/10_compiler_polymorphism.md +++ b/content/blog/10_compiler_polymorphism.md @@ -261,3 +261,85 @@ within a group does not matter. 4. We typecheck the function groups, and functions within them, following the above topological order. To find the transitive closure of a graph, we can use [Warshall's Algorithm](https://cs.winona.edu/lin/cs440/ch08-2.pdf). +This algorithm, with complexity \\(O(|V|^3)\\), goes as follows: +{{< latex >}} +\begin{aligned} +& A, R^{(i)} \in \mathbb{B}^{n \times n} \\ +& \\ +& R^{(0)} \leftarrow A \\ +& \textbf{for} \; k \leftarrow 1 \; \textbf{to} \; n \; \textbf{do} \\ +& \quad \textbf{for} \; i \leftarrow 1 \; \textbf{to} \; n \; \textbf{do} \\ +& \quad \quad \textbf{for} \; j \leftarrow 1 \; \textbf{to} \; n \; \textbf{do} \\ +& \quad \quad \quad R^{(k)}[i,j] \leftarrow R^{(k-1)}[i,j] \; \textbf{or} \; R^{(k-1)}[i,k] \; \textbf{and} \; R^{(k-1)}[k,j] \\ +& \textbf{return} \; R^{(n)} +\end{aligned} +{{< /latex >}} + +In the above notation, \\(R^{(i)}\\) is the \\(i\\)th matrix \\(R\\), and \\(A\\) is the adjacency +matrix of the graph in question. All matrices in the algorithm are from \\(\\mathbb{B}^{n \times n}\\), +the set of \\(n\\) by \\(n\\) boolean matrices. Once this algorithm is complete, we get as output a +transitive closure adjacency matrix \\(R^{(n)}\\). Mutually dependent functions will be pretty easy to +isolate from this matrix. If \\(R^{(n)}[i,j]\\) and \\(R^{(n)}[j,i]\\), then the functions represented by vertices +\\(i\\) and \\(j\\) depend on each other. + +Once we've identified the groups, and +{{< sidenote "right" "group-graph-note" "constructed a group graph," >}} +This might seem like a "draw the rest of the owl" situation, but it really isn't. +We'll follow a naive algorithm for findings groups, and for translating function dependencies +into group dependencies. This algorithm, in C++, will be presented later on. +{{< /sidenote >}} it is time to compute the topological order. For this, we will use +[Kahn's Algorithm](https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm). +The algorithm goes as follows: + +{{< latex >}} +\begin{aligned} +& L \leftarrow \text{empty list} \\ +& S \leftarrow \text{set of all nodes with no incoming edges} \\ +& \\ +& \textbf{while} \; S \; \text{is non-empty} \; \textbf{do} \\ +& \quad \text{remove a node} \; n \; \text{from} \; S \\ +& \quad \text{add} \; n \; \text{to the end of} \; L \\ +& \quad \textbf{for each} \; \text{node} \; m \; \text{with edge} \; + e \; \text{from} \; n \; \text{to} \; m \; \textbf{do} \\ +& \quad \quad \text{remove edge} \; e \; \text{from the graph} \\ +& \quad \quad \textbf{if} \; m \; \text{has no other incoming edges} \; \textbf{then} \\ +& \quad \quad \quad \text{insert} \; m \; \text{into} \; S \\ +& \\ +& \textbf{if} \; \text{the graph has edges} \; \textbf{then} \\ +& \quad \textbf{return} \; \text{error} \quad \textit{(graph has at least once cycle)} \\ +& \textbf{else} \\ +& \quad \textbf{return} \; L \quad \textit{(a topologically sorted order)} +\end{aligned} +{{< /latex >}} + +Note that since we've already isolated all mutually dependent functions into +groups, our graph will never have cycles, and this algorithm will always succeed. +Also note that since we start with nodes with no incoming edges, our list will +__begin with the groups that should be checked last__. This is because a node +with no incoming edges might (and probably does) still have outgoing edges, +and thus depends on other functions / groups. Like in our successful example, +we want to __typecheck functions that are depended on first__. + +### Implementation +Let's start working on a C++ implementation of all of this now. First, +I think that we should create a C++ class that will represent our function +dependency graph. Let's call it `function_graph`. I propose the following +definition: + +{{< codelines "C++" "compiler/10/graph.hpp" 12 51 >}} + +There's a lot to unpack here. First of all, we create a type alias `function` that +represents the label of a function in our graph. It is probably most convenient +to work with `std::string` instances, so we settle for that. Next, we define +a struct that will represent a single group of mutually dependent functions. +Passing this struct by value seems wrong, so we'll settle for a C++ `unique_pt` +to help carry instances around. + +Finally, we arrive at the definition of `function_graph`. Inside this class, +we define a helper struct, `group_data`, which holds information +about an individual group as it is being constructed. This information +includes the group's adjacency list and +[indegree](https://en.wikipedia.org/wiki/Directed_graph#Indegree_and_outdegree) +(both used for Kahn's topological sorting algorithm), as well as the set +of functions in the group (which we will eventually return). +