2026-06-09 19:30:42 -07:00
|
|
|
|
import Spa.Language.Base
|
|
|
|
|
|
import Mathlib.Data.Fin.Tuple.Basic
|
|
|
|
|
|
import Mathlib.Data.List.ProdSigma
|
|
|
|
|
|
import Mathlib.Data.List.FinRange
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-!
|
|
|
|
|
|
|
|
|
|
|
|
Algebraic Control Flow Graphs.
|
|
|
|
|
|
|
|
|
|
|
|
This file defines control flow graphs and operations to naturally compose them,
|
|
|
|
|
|
making it possible to inductively covnert a program in the object language
|
|
|
|
|
|
(see `Spa.Stmt` in `Spa/Language/Base.lean`) into its corresponding graph.
|
|
|
|
|
|
|
|
|
|
|
|
Graphs are, in general, parameterized by their "payload" (the per-node data); see `GGraph`.
|
|
|
|
|
|
This is useful because other operations, such as finding the CFG node corresponding
|
|
|
|
|
|
to an AST node, are performed by embellishing a graph's basic blocks with their AST
|
|
|
|
|
|
identifiers.
|
|
|
|
|
|
|
|
|
|
|
|
The operations are deliberately a little bit sloppy here, creating empty / statement-less
|
|
|
|
|
|
CFG nodes. Additionally, the current CFG construction algorithm doesn't group
|
|
|
|
|
|
consecutive statements in a single notional basic block into one node.
|
|
|
|
|
|
This makes graph construction much easier to define, and might save us the
|
|
|
|
|
|
trouble of (when trying to find the CFG node for an AST node) doing
|
|
|
|
|
|
indexing into a list.
|
|
|
|
|
|
|
|
|
|
|
|
-/
|
|
|
|
|
|
|
|
|
|
|
|
/-- Bump the upper bound of a list of `Fin`s without changing their value. -/
|
2026-06-23 14:00:06 -05:00
|
|
|
|
def List.finCastAdd {n : ℕ} (l : List (Fin n)) (m : ℕ) : List (Fin (n + m)) :=
|
|
|
|
|
|
l.map (Fin.castAdd m)
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- Bump the upper bound of a list of `Fin`s by adding the amount to their value. -/
|
2026-06-23 14:00:06 -05:00
|
|
|
|
def List.finNatAdd {m : ℕ} (l : List (Fin m)) (n : ℕ) : List (Fin (n + m)) :=
|
|
|
|
|
|
l.map (Fin.natAdd n)
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- Bump the upper bound of a list of `Fin` pairs without changing their value. -/
|
2026-06-23 14:00:06 -05:00
|
|
|
|
def List.finCastAddProd {n : ℕ} (l : List (Fin n × Fin n)) (m : ℕ) :
|
|
|
|
|
|
List (Fin (n + m) × Fin (n + m)) :=
|
|
|
|
|
|
l.map (fun e => (e.1.castAdd m, e.2.castAdd m))
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- Bump the upper bound of a list of `Fin` pairs by adding the amount to their value. -/
|
2026-06-23 14:00:06 -05:00
|
|
|
|
def List.finNatAddProd {m : ℕ} (l : List (Fin m × Fin m)) (n : ℕ) :
|
|
|
|
|
|
List (Fin (n + m) × Fin (n + m)) :=
|
|
|
|
|
|
l.map (fun e => (e.1.natAdd n, e.2.natAdd n))
|
|
|
|
|
|
|
2026-06-09 19:30:42 -07:00
|
|
|
|
namespace Spa
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- Graph with general (`α`-labeled) nodes. By using a tuple `Fin size → α`
|
|
|
|
|
|
and writing `edges` over the `Fin size`, guarantees all edges are between real nodes.
|
|
|
|
|
|
|
|
|
|
|
|
To make graph composition via operations not force a
|
|
|
|
|
|
[`alga`](https://hackage.haskell.org/package/algebraic-graphs)-style "connect"-based
|
|
|
|
|
|
algebra, explicitly defines `inputs` and `outputs`, which are the only nodes that
|
|
|
|
|
|
get connected when graphs are sequenced. This makes the graph construction
|
|
|
|
|
|
operations more naturally fit with how CFGs are created from `Stmt`s. -/
|
2026-06-24 16:02:49 -05:00
|
|
|
|
structure GGraph (α : Type) where
|
2026-06-09 19:30:42 -07:00
|
|
|
|
size : ℕ
|
2026-06-24 16:02:49 -05:00
|
|
|
|
nodes : Fin size → α
|
2026-06-09 19:30:42 -07:00
|
|
|
|
edges : List (Fin size × Fin size)
|
|
|
|
|
|
inputs : List (Fin size)
|
|
|
|
|
|
outputs : List (Fin size)
|
|
|
|
|
|
|
2026-06-24 16:02:49 -05:00
|
|
|
|
namespace GGraph
|
|
|
|
|
|
|
|
|
|
|
|
variable {α β : Type}
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- An index (node) in the CFG. -/
|
2026-06-24 16:02:49 -05:00
|
|
|
|
abbrev Index (g : GGraph α) : Type := Fin g.size
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- An edge in the CFG. -/
|
2026-06-24 16:02:49 -05:00
|
|
|
|
abbrev Edge (g : GGraph α) : Type := g.Index × g.Index
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
instance : Functor GGraph where
|
|
|
|
|
|
map {α β : Type} (f : α → β) (g : GGraph α) : GGraph β :=
|
|
|
|
|
|
{ size := g.size,
|
|
|
|
|
|
nodes := f ∘ g.nodes
|
|
|
|
|
|
edges := g.edges,
|
|
|
|
|
|
inputs := g.inputs,
|
|
|
|
|
|
outputs := g.outputs }
|
|
|
|
|
|
|
|
|
|
|
|
@[simp] lemma map_size (f : α → β) (g : GGraph α) : (f <$> g).size = g.size := rfl
|
|
|
|
|
|
@[simp] lemma map_edges (f : α → β) (g : GGraph α) : (f <$> g).edges = g.edges := rfl
|
|
|
|
|
|
@[simp] lemma map_inputs (f : α → β) (g : GGraph α) : (f <$> g).inputs = g.inputs := rfl
|
|
|
|
|
|
@[simp] lemma map_outputs (f : α → β) (g : GGraph α) : (f <$> g).outputs = g.outputs := rfl
|
|
|
|
|
|
|
|
|
|
|
|
/-- Overlay two graphs: create a new graph whose nodes and edges come from two
|
|
|
|
|
|
sub-graphs, without inserting any additional edges. Also combines the
|
|
|
|
|
|
input and output node sets. -/
|
|
|
|
|
|
def overlay (g₁ g₂ : GGraph α) : GGraph α where
|
2026-06-09 19:30:42 -07:00
|
|
|
|
size := g₁.size + g₂.size
|
|
|
|
|
|
nodes := Fin.append g₁.nodes g₂.nodes
|
2026-06-23 14:00:06 -05:00
|
|
|
|
edges := g₁.edges.finCastAddProd g₂.size ++ g₂.edges.finNatAddProd g₁.size
|
|
|
|
|
|
inputs := g₁.inputs.finCastAdd g₂.size ++ g₂.inputs.finNatAdd g₁.size
|
|
|
|
|
|
outputs := g₁.outputs.finCastAdd g₂.size ++ g₂.outputs.finNatAdd g₁.size
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
@[inherit_doc] scoped infixr:70 " ∙ " => GGraph.overlay
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- Sequence two CFGs: create a combined graph whose nodes and edges come
|
|
|
|
|
|
from two subgraphs, __and__ make all the outputs of the left graph have edges to
|
|
|
|
|
|
all the inputs of the right graph. By the semantics of CFGs, this
|
|
|
|
|
|
encodes the fact that code first traverses the basic blocks in theleft
|
|
|
|
|
|
graph, and does the same for the right graph. -/
|
|
|
|
|
|
def sequence (g₁ g₂ : GGraph α) : GGraph α where
|
2026-06-09 19:30:42 -07:00
|
|
|
|
size := g₁.size + g₂.size
|
|
|
|
|
|
nodes := Fin.append g₁.nodes g₂.nodes
|
2026-06-23 14:00:06 -05:00
|
|
|
|
edges := g₁.edges.finCastAddProd g₂.size ++ g₂.edges.finNatAddProd g₁.size ++
|
|
|
|
|
|
(g₁.outputs.finCastAdd g₂.size).product (g₂.inputs.finNatAdd g₁.size)
|
|
|
|
|
|
inputs := g₁.inputs.finCastAdd g₂.size
|
|
|
|
|
|
outputs := g₂.outputs.finNatAdd g₁.size
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
@[inherit_doc] scoped infixr:70 " ⤳ " => GGraph.sequence
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- When a graph `g` is wrapped in a `loop`, the index / node corresponding
|
|
|
|
|
|
to the input of the new loop. -/
|
2026-06-24 16:02:49 -05:00
|
|
|
|
def loopIn (g : GGraph α) : Fin (2 + g.size) := (0 : Fin 2).castAdd g.size
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- When a graph `g` is wrapped in a `loop`, the index / node corresponding
|
|
|
|
|
|
to the output of the new loop. -/
|
2026-06-24 16:02:49 -05:00
|
|
|
|
def loopOut (g : GGraph α) : Fin (2 + g.size) := (1 : Fin 2).castAdd g.size
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- Creates a zero-or-more loop loop in the CFG: connects all the output
|
|
|
|
|
|
nodes of the CFG back to the graph's beginning, and also introduces a path
|
|
|
|
|
|
to a new ending node (see `loopOut`) which bypasses the entire graph.
|
|
|
|
|
|
|
|
|
|
|
|
Notably, both the new input (`loopIn`) and new output (`loopOut`)
|
|
|
|
|
|
nodes are necessary for correctness: adding a path from inputs to a
|
|
|
|
|
|
hypothetical no-op end node encodes something like "just the first statement is executed".
|
|
|
|
|
|
Similarly, just adding a path from a a hypothetical no-op beginning node
|
|
|
|
|
|
to the outputs encodes "just the last statement is executed".
|
|
|
|
|
|
|
|
|
|
|
|
This is technically sloppy (see module comment), but it's simple.
|
|
|
|
|
|
-/
|
2026-06-24 16:02:49 -05:00
|
|
|
|
def loop (g : GGraph (List β)) : GGraph (List β) where
|
2026-06-09 19:30:42 -07:00
|
|
|
|
size := 2 + g.size
|
|
|
|
|
|
nodes := Fin.append (fun _ : Fin 2 => []) g.nodes
|
2026-06-23 14:00:06 -05:00
|
|
|
|
edges := g.edges.finNatAddProd 2 ++
|
2026-06-25 17:01:27 -05:00
|
|
|
|
((g.loopIn, ·) <$> g.inputs.finNatAdd 2) ++
|
|
|
|
|
|
((·, g.loopOut) <$> g.outputs.finNatAdd 2) ++
|
2026-06-09 19:30:42 -07:00
|
|
|
|
[(g.loopOut, g.loopIn), (g.loopIn, g.loopOut)]
|
|
|
|
|
|
inputs := [g.loopIn]
|
|
|
|
|
|
outputs := [g.loopOut]
|
|
|
|
|
|
|
2026-06-25 13:59:08 -05:00
|
|
|
|
@[simp] lemma loop_inputs (g : GGraph (List β)) : (loop g).inputs = [g.loopIn] := rfl
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 13:59:08 -05:00
|
|
|
|
@[simp] lemma loop_outputs (g : GGraph (List β)) : (loop g).outputs = [g.loopOut] := rfl
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- Creates a single-node graph whose node contains the given value. -/
|
2026-06-24 16:02:49 -05:00
|
|
|
|
def singleton (a : α) : GGraph α where
|
2026-06-09 19:30:42 -07:00
|
|
|
|
size := 1
|
2026-06-24 16:02:49 -05:00
|
|
|
|
nodes := fun _ => a
|
2026-06-09 19:30:42 -07:00
|
|
|
|
edges := []
|
|
|
|
|
|
inputs := [0]
|
|
|
|
|
|
outputs := [0]
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- Creates a new graph with a single input and single output node. Useful to ensure there's
|
|
|
|
|
|
a single point of entry and single point of exit. -/
|
2026-06-24 16:02:49 -05:00
|
|
|
|
def wrap (g : GGraph (List β)) : GGraph (List β) :=
|
2026-06-09 19:30:42 -07:00
|
|
|
|
singleton [] ⤳ g ⤳ singleton []
|
|
|
|
|
|
|
2026-06-25 13:59:08 -05:00
|
|
|
|
@[simp] lemma map_singleton (f : α → β) (a : α) :
|
2026-06-25 17:01:27 -05:00
|
|
|
|
f <$> singleton a = singleton (f a) := rfl
|
2026-06-25 09:25:35 -05:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
@[simp] lemma map_overlay (f : α → β) (g₁ g₂ : GGraph α) :
|
|
|
|
|
|
f<$> (g₁ ∙ g₂) = f <$> g₁ ∙ f <$> g₂ := by
|
2026-06-25 09:25:35 -05:00
|
|
|
|
rcases g₁ with ⟨n₁, nd₁, e₁, i₁, o₁⟩; rcases g₂ with ⟨n₂, nd₂, e₂, i₂, o₂⟩
|
2026-06-25 17:01:27 -05:00
|
|
|
|
simp only [Functor.map, GGraph.overlay]
|
2026-06-25 09:25:35 -05:00
|
|
|
|
congr 1
|
|
|
|
|
|
funext i
|
|
|
|
|
|
refine Fin.addCases ?_ ?_ i <;> intro j <;> simp [Fin.append_left, Fin.append_right]
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
@[simp] lemma map_sequence (f : α → β) (g₁ g₂ : GGraph α) :
|
|
|
|
|
|
f <$> (g₁ ⤳ g₂) = (f <$> g₁) ⤳ (f <$> g₂) := by
|
2026-06-25 09:25:35 -05:00
|
|
|
|
rcases g₁ with ⟨n₁, nd₁, e₁, i₁, o₁⟩; rcases g₂ with ⟨n₂, nd₂, e₂, i₂, o₂⟩
|
2026-06-25 17:01:27 -05:00
|
|
|
|
simp only [Functor.map, GGraph.sequence]
|
2026-06-25 09:25:35 -05:00
|
|
|
|
congr 1
|
|
|
|
|
|
funext i
|
|
|
|
|
|
refine Fin.addCases ?_ ?_ i <;> intro j <;> simp [Fin.append_left, Fin.append_right]
|
|
|
|
|
|
|
2026-06-25 13:59:08 -05:00
|
|
|
|
@[simp] lemma map_loop (h : β → γ) (g : GGraph (List β)) :
|
2026-06-25 17:01:27 -05:00
|
|
|
|
(List.map h) <$> (loop g) = loop (List.map h <$> g) := by
|
2026-06-25 09:25:35 -05:00
|
|
|
|
rcases g with ⟨n, nd, e, i, o⟩
|
2026-06-25 17:01:27 -05:00
|
|
|
|
simp only [Functor.map, GGraph.loop]
|
2026-06-25 09:25:35 -05:00
|
|
|
|
congr 1
|
|
|
|
|
|
funext i
|
|
|
|
|
|
refine Fin.addCases ?_ ?_ i <;> intro j <;> simp [Fin.append_left, Fin.append_right]
|
|
|
|
|
|
|
2026-06-25 13:59:08 -05:00
|
|
|
|
@[simp] lemma map_wrap (h : β → γ) (g : GGraph (List β)) :
|
2026-06-25 17:01:27 -05:00
|
|
|
|
(List.map h) <$> wrap g = wrap (List.map h <$> g) := by
|
|
|
|
|
|
simp [GGraph.wrap, GGraph.map_sequence, GGraph.map_singleton]
|
2026-06-25 09:25:35 -05:00
|
|
|
|
|
2026-06-24 16:02:49 -05:00
|
|
|
|
variable (g : GGraph α)
|
2026-06-09 19:30:42 -07:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- All the nodes in the graph. -/
|
2026-06-09 19:30:42 -07:00
|
|
|
|
def indices : List g.Index := List.finRange g.size
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- All of the graph's indices are listed in `indices`. -/
|
2026-06-25 13:59:08 -05:00
|
|
|
|
lemma mem_indices (idx : g.Index) : idx ∈ g.indices :=
|
2026-06-09 19:30:42 -07:00
|
|
|
|
List.mem_finRange idx
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- `indices` does not have duplicates. -/
|
2026-06-25 13:59:08 -05:00
|
|
|
|
lemma nodup_indices : g.indices.Nodup :=
|
2026-06-09 19:30:42 -07:00
|
|
|
|
List.nodup_finRange g.size
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- Predecessors of a particular node in the graph. --/
|
2026-06-09 19:30:42 -07:00
|
|
|
|
def predecessors (idx : g.Index) : List g.Index :=
|
|
|
|
|
|
g.indices.filter (fun idx' => (idx', idx) ∈ g.edges)
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- There's there's an edge between two nodes `idx₁` and `idx₂`,
|
|
|
|
|
|
then `idx₁` is the predecessor of `idx₂`. -/
|
2026-06-25 13:59:08 -05:00
|
|
|
|
lemma mem_predecessors_of_edge {idx₁ idx₂ : g.Index}
|
2026-06-09 19:30:42 -07:00
|
|
|
|
(h : (idx₁, idx₂) ∈ g.edges) : idx₁ ∈ g.predecessors idx₂ :=
|
|
|
|
|
|
List.mem_filter.mpr ⟨g.mem_indices idx₁, by simpa using h⟩
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- A node is a predecessor of another node only if there's an
|
|
|
|
|
|
edge between them. -/
|
2026-06-25 13:59:08 -05:00
|
|
|
|
lemma edge_of_mem_predecessors {idx₁ idx₂ : g.Index}
|
2026-06-09 19:30:42 -07:00
|
|
|
|
(h : idx₁ ∈ g.predecessors idx₂) : (idx₁, idx₂) ∈ g.edges := by
|
|
|
|
|
|
simpa using (List.mem_filter.mp h).2
|
|
|
|
|
|
|
2026-06-24 16:02:49 -05:00
|
|
|
|
end GGraph
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
/-- "Normal" graphs, for the purposes of the analyses in this
|
|
|
|
|
|
framework, have basic blocks in their nodes, and nothing else. -/
|
2026-06-24 16:02:49 -05:00
|
|
|
|
abbrev Graph : Type := GGraph (List BasicStmt)
|
|
|
|
|
|
|
|
|
|
|
|
namespace Graph
|
|
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
export GGraph (overlay sequence loop singleton wrap loop_inputs loop_outputs)
|
2026-06-24 16:02:49 -05:00
|
|
|
|
|
2026-06-25 17:01:27 -05:00
|
|
|
|
@[inherit_doc] scoped infixr:70 " ∙ " => GGraph.overlay
|
|
|
|
|
|
@[inherit_doc] scoped infixr:70 " ⤳ " => GGraph.sequence
|
2026-06-24 16:02:49 -05:00
|
|
|
|
|
2026-06-09 19:30:42 -07:00
|
|
|
|
end Graph
|
|
|
|
|
|
|
2026-06-24 13:54:37 -05:00
|
|
|
|
open Graph in
|
2026-06-25 09:45:30 -05:00
|
|
|
|
def Stmt.cfg : Stmt → Graph
|
2026-06-25 17:01:27 -05:00
|
|
|
|
-- A basic statement goes into a single basic block
|
2026-06-24 16:02:49 -05:00
|
|
|
|
| .basic bs => singleton [bs]
|
2026-06-25 17:01:27 -05:00
|
|
|
|
-- Sequencing of statements corresponds naturally to CFG sequencing
|
2026-06-25 09:45:30 -05:00
|
|
|
|
| .andThen s₁ s₂ => s₁.cfg ⤳ s₂.cfg
|
2026-06-25 17:01:27 -05:00
|
|
|
|
-- An if can execute either one branch or the other; overlap them.
|
|
|
|
|
|
-- Subsequent sequencing (etc.) will end up creating the forks and joins.
|
2026-06-25 09:45:30 -05:00
|
|
|
|
| .ifElse _ s₁ s₂ => s₁.cfg ∙ s₂.cfg
|
2026-06-25 17:01:27 -05:00
|
|
|
|
-- The `loop` construct was developed specifically for zero-or-more loops like this.
|
2026-06-25 09:45:30 -05:00
|
|
|
|
| .whileLoop _ s => loop s.cfg
|
2026-06-24 13:54:37 -05:00
|
|
|
|
|
2026-06-09 19:30:42 -07:00
|
|
|
|
end Spa
|