This requires a few pieces: * Make node tags use `Fin n` intead of natural numbers. This makes it possible to build a finite lattice over AST nodes, and also ensure automatic, total indexing from CFG nodes into the AST that created them. For this, use the elaborator to derive the ordering statements etc. where possible. * Adjust the forward framework to enable proofs that don't just state correctness on the environment, but also on an arbitrary additional state accumulated from traversing the trace. * State the reaching definition analysis's correctness in terms of this new framework. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
248 lines
10 KiB
Lean4
248 lines
10 KiB
Lean4
import Spa.Language.Base
|
||
import Mathlib.Data.Fin.Tuple.Basic
|
||
import Mathlib.Data.List.ProdSigma
|
||
import Mathlib.Data.List.FinRange
|
||
|
||
/-!
|
||
|
||
# Algebraic Control Flow Graphs
|
||
|
||
This file defines control flow graphs and operations to naturally compose them,
|
||
making it possible to inductively covnert a program in the object language
|
||
(see `Spa.Stmt` in `Spa/Language/Base.lean`) into its corresponding graph.
|
||
|
||
Graphs are, in general, parameterized by their "payload" (the per-node data); see `GGraph`.
|
||
This is useful because other operations, such as finding the CFG node corresponding
|
||
to an AST node, are performed by embellishing a graph's basic blocks with their AST
|
||
identifiers.
|
||
|
||
The operations are deliberately a little bit sloppy here, creating empty / statement-less
|
||
CFG nodes. Additionally, the current CFG construction algorithm doesn't group
|
||
consecutive statements in a single notional basic block into one node.
|
||
This makes graph construction much easier to define, and might save us the
|
||
trouble of (when trying to find the CFG node for an AST node) doing
|
||
indexing into a list.
|
||
|
||
-/
|
||
|
||
/-- Bump the upper bound of a list of `Fin`s without changing their value. -/
|
||
def List.finCastAdd {n : ℕ} (l : List (Fin n)) (m : ℕ) : List (Fin (n + m)) :=
|
||
l.map (Fin.castAdd m)
|
||
|
||
/-- Bump the upper bound of a list of `Fin`s by adding the amount to their value. -/
|
||
def List.finNatAdd {m : ℕ} (l : List (Fin m)) (n : ℕ) : List (Fin (n + m)) :=
|
||
l.map (Fin.natAdd n)
|
||
|
||
/-- Bump the upper bound of a list of `Fin` pairs without changing their value. -/
|
||
def List.finCastAddProd {n : ℕ} (l : List (Fin n × Fin n)) (m : ℕ) :
|
||
List (Fin (n + m) × Fin (n + m)) :=
|
||
l.map (fun e => (e.1.castAdd m, e.2.castAdd m))
|
||
|
||
/-- Bump the upper bound of a list of `Fin` pairs by adding the amount to their value. -/
|
||
def List.finNatAddProd {m : ℕ} (l : List (Fin m × Fin m)) (n : ℕ) :
|
||
List (Fin (n + m) × Fin (n + m)) :=
|
||
l.map (fun e => (e.1.natAdd n, e.2.natAdd n))
|
||
|
||
namespace Spa
|
||
|
||
/-- Graph with general (`α`-labeled) nodes. By using a tuple `Fin size → α`
|
||
and writing `edges` over the `Fin size`, guarantees all edges are between real nodes.
|
||
|
||
To make graph composition via operations not force a
|
||
[`alga`](https://hackage.haskell.org/package/algebraic-graphs)-style "connect"-based
|
||
algebra, explicitly defines `inputs` and `outputs`, which are the only nodes that
|
||
get connected when graphs are sequenced. This makes the graph construction
|
||
operations more naturally fit with how CFGs are created from `Stmt`s. -/
|
||
structure GGraph (α : Type) where
|
||
size : ℕ
|
||
nodes : Fin size → α
|
||
edges : List (Fin size × Fin size)
|
||
inputs : List (Fin size)
|
||
outputs : List (Fin size)
|
||
|
||
namespace GGraph
|
||
|
||
variable {α β : Type}
|
||
|
||
/-- An index (node) in the CFG. -/
|
||
abbrev Index (g : GGraph α) : Type := Fin g.size
|
||
|
||
/-- An edge in the CFG. -/
|
||
abbrev Edge (g : GGraph α) : Type := g.Index × g.Index
|
||
|
||
instance : Functor GGraph where
|
||
map {α β : Type} (f : α → β) (g : GGraph α) : GGraph β :=
|
||
{ size := g.size,
|
||
nodes := f ∘ g.nodes
|
||
edges := g.edges,
|
||
inputs := g.inputs,
|
||
outputs := g.outputs }
|
||
|
||
@[simp] lemma map_size (f : α → β) (g : GGraph α) : (f <$> g).size = g.size := rfl
|
||
@[simp] lemma map_edges (f : α → β) (g : GGraph α) : (f <$> g).edges = g.edges := rfl
|
||
@[simp] lemma map_inputs (f : α → β) (g : GGraph α) : (f <$> g).inputs = g.inputs := rfl
|
||
@[simp] lemma map_outputs (f : α → β) (g : GGraph α) : (f <$> g).outputs = g.outputs := rfl
|
||
|
||
/-- Overlay two graphs: create a new graph whose nodes and edges come from two
|
||
sub-graphs, without inserting any additional edges. Also combines the
|
||
input and output node sets. -/
|
||
def overlay (g₁ g₂ : GGraph α) : GGraph α where
|
||
size := g₁.size + g₂.size
|
||
nodes := Fin.append g₁.nodes g₂.nodes
|
||
edges := g₁.edges.finCastAddProd g₂.size ++ g₂.edges.finNatAddProd g₁.size
|
||
inputs := g₁.inputs.finCastAdd g₂.size ++ g₂.inputs.finNatAdd g₁.size
|
||
outputs := g₁.outputs.finCastAdd g₂.size ++ g₂.outputs.finNatAdd g₁.size
|
||
|
||
@[inherit_doc] scoped infixr:70 " ∙ " => GGraph.overlay
|
||
|
||
/-- Sequence two CFGs: create a combined graph whose nodes and edges come
|
||
from two subgraphs, __and__ make all the outputs of the left graph have edges to
|
||
all the inputs of the right graph. By the semantics of CFGs, this
|
||
encodes the fact that code first traverses the basic blocks in theleft
|
||
graph, and does the same for the right graph. -/
|
||
def sequence (g₁ g₂ : GGraph α) : GGraph α where
|
||
size := g₁.size + g₂.size
|
||
nodes := Fin.append g₁.nodes g₂.nodes
|
||
edges := g₁.edges.finCastAddProd g₂.size ++ g₂.edges.finNatAddProd g₁.size ++
|
||
(g₁.outputs.finCastAdd g₂.size).product (g₂.inputs.finNatAdd g₁.size)
|
||
inputs := g₁.inputs.finCastAdd g₂.size
|
||
outputs := g₂.outputs.finNatAdd g₁.size
|
||
|
||
@[inherit_doc] scoped infixr:70 " ⤳ " => GGraph.sequence
|
||
|
||
/-- When a graph `g` is wrapped in a `loop`, the index / node corresponding
|
||
to the input of the new loop. -/
|
||
def loopIn (g : GGraph α) : Fin (2 + g.size) := (0 : Fin 2).castAdd g.size
|
||
|
||
/-- When a graph `g` is wrapped in a `loop`, the index / node corresponding
|
||
to the output of the new loop. -/
|
||
def loopOut (g : GGraph α) : Fin (2 + g.size) := (1 : Fin 2).castAdd g.size
|
||
|
||
/-- Creates a zero-or-more loop loop in the CFG: connects all the output
|
||
nodes of the CFG back to the graph's beginning, and also introduces a path
|
||
to a new ending node (see `loopOut`) which bypasses the entire graph.
|
||
|
||
Notably, both the new input (`loopIn`) and new output (`loopOut`)
|
||
nodes are necessary for correctness: adding a path from inputs to a
|
||
hypothetical no-op end node encodes something like "just the first statement is executed".
|
||
Similarly, just adding a path from a a hypothetical no-op beginning node
|
||
to the outputs encodes "just the last statement is executed".
|
||
|
||
This is technically sloppy (see module comment), but it's simple.
|
||
-/
|
||
def loop (g : GGraph (Option β)) : GGraph (Option β) where
|
||
size := 2 + g.size
|
||
nodes := Fin.append (fun _ : Fin 2 => none) g.nodes
|
||
edges := g.edges.finNatAddProd 2 ++
|
||
((g.loopIn, ·) <$> g.inputs.finNatAdd 2) ++
|
||
((·, g.loopOut) <$> g.outputs.finNatAdd 2) ++
|
||
[(g.loopOut, g.loopIn), (g.loopIn, g.loopOut)]
|
||
inputs := [g.loopIn]
|
||
outputs := [g.loopOut]
|
||
|
||
@[simp] lemma loop_inputs (g : GGraph (Option β)) : (loop g).inputs = [g.loopIn] := rfl
|
||
|
||
@[simp] lemma loop_outputs (g : GGraph (Option β)) : (loop g).outputs = [g.loopOut] := rfl
|
||
|
||
/-- Creates a single-node graph whose node contains the given value. -/
|
||
def singleton (a : α) : GGraph α where
|
||
size := 1
|
||
nodes := fun _ => a
|
||
edges := []
|
||
inputs := [0]
|
||
outputs := [0]
|
||
|
||
/-- Creates a new graph with a single input and single output node. Useful to ensure there's
|
||
a single point of entry and single point of exit. -/
|
||
def wrap (g : GGraph (Option β)) : GGraph (Option β) :=
|
||
singleton none ⤳ g ⤳ singleton none
|
||
|
||
@[simp] lemma map_singleton (f : α → β) (a : α) :
|
||
f <$> singleton a = singleton (f a) := rfl
|
||
|
||
@[simp] lemma map_overlay (f : α → β) (g₁ g₂ : GGraph α) :
|
||
f<$> (g₁ ∙ g₂) = f <$> g₁ ∙ f <$> g₂ := by
|
||
rcases g₁ with ⟨n₁, nd₁, e₁, i₁, o₁⟩; rcases g₂ with ⟨n₂, nd₂, e₂, i₂, o₂⟩
|
||
simp only [Functor.map, GGraph.overlay]
|
||
congr 1
|
||
funext i
|
||
refine Fin.addCases ?_ ?_ i <;> intro j <;> simp [Fin.append_left, Fin.append_right]
|
||
|
||
@[simp] lemma map_sequence (f : α → β) (g₁ g₂ : GGraph α) :
|
||
f <$> (g₁ ⤳ g₂) = (f <$> g₁) ⤳ (f <$> g₂) := by
|
||
rcases g₁ with ⟨n₁, nd₁, e₁, i₁, o₁⟩; rcases g₂ with ⟨n₂, nd₂, e₂, i₂, o₂⟩
|
||
simp only [Functor.map, GGraph.sequence]
|
||
congr 1
|
||
funext i
|
||
refine Fin.addCases ?_ ?_ i <;> intro j <;> simp [Fin.append_left, Fin.append_right]
|
||
|
||
@[simp] lemma map_loop (h : β → γ) (g : GGraph (Option β)) :
|
||
(Option.map h) <$> (loop g) = loop (Option.map h <$> g) := by
|
||
rcases g with ⟨n, nd, e, i, o⟩
|
||
simp only [Functor.map, GGraph.loop]
|
||
congr 1
|
||
funext i
|
||
refine Fin.addCases ?_ ?_ i <;> intro j <;> simp [Fin.append_left, Fin.append_right]
|
||
|
||
@[simp] lemma map_wrap (h : β → γ) (g : GGraph (Option β)) :
|
||
(Option.map h) <$> wrap g = wrap (Option.map h <$> g) := by
|
||
simp [GGraph.wrap, GGraph.map_sequence, GGraph.map_singleton]
|
||
|
||
variable (g : GGraph α)
|
||
|
||
/-- All the nodes in the graph. -/
|
||
def indices : List g.Index := List.finRange g.size
|
||
|
||
/-- All of the graph's indices are listed in `indices`. -/
|
||
lemma mem_indices (idx : g.Index) : idx ∈ g.indices :=
|
||
List.mem_finRange idx
|
||
|
||
/-- `indices` does not have duplicates. -/
|
||
lemma nodup_indices : g.indices.Nodup :=
|
||
List.nodup_finRange g.size
|
||
|
||
/-- Predecessors of a particular node in the graph. --/
|
||
def predecessors (idx : g.Index) : List g.Index :=
|
||
g.indices.filter (fun idx' => (idx', idx) ∈ g.edges)
|
||
|
||
/-- There's there's an edge between two nodes `idx₁` and `idx₂`,
|
||
then `idx₁` is the predecessor of `idx₂`. -/
|
||
lemma mem_predecessors_of_edge {idx₁ idx₂ : g.Index}
|
||
(h : (idx₁, idx₂) ∈ g.edges) : idx₁ ∈ g.predecessors idx₂ :=
|
||
List.mem_filter.mpr ⟨g.mem_indices idx₁, by simpa using h⟩
|
||
|
||
/-- A node is a predecessor of another node only if there's an
|
||
edge between them. -/
|
||
lemma edge_of_mem_predecessors {idx₁ idx₂ : g.Index}
|
||
(h : idx₁ ∈ g.predecessors idx₂) : (idx₁, idx₂) ∈ g.edges := by
|
||
simpa using (List.mem_filter.mp h).2
|
||
|
||
end GGraph
|
||
|
||
/-- "Normal" graphs, for the purposes of the analyses in this
|
||
framework, have basic statements in their nodes, and nothing else. -/
|
||
abbrev Graph : Type := GGraph (Option BasicStmt)
|
||
|
||
namespace Graph
|
||
|
||
export GGraph (overlay sequence loop singleton wrap loop_inputs loop_outputs)
|
||
|
||
@[inherit_doc] scoped infixr:70 " ∙ " => GGraph.overlay
|
||
@[inherit_doc] scoped infixr:70 " ⤳ " => GGraph.sequence
|
||
|
||
end Graph
|
||
|
||
open Graph in
|
||
def Stmt.cfg : Stmt → Graph
|
||
-- A basic statement goes into a single basic block
|
||
| .basic bs => singleton (some bs)
|
||
-- Sequencing of statements corresponds naturally to CFG sequencing
|
||
| .andThen s₁ s₂ => s₁.cfg ⤳ s₂.cfg
|
||
-- An if can execute either one branch or the other; overlap them.
|
||
-- Subsequent sequencing (etc.) will end up creating the forks and joins.
|
||
| .ifElse _ s₁ s₂ => s₁.cfg ∙ s₂.cfg
|
||
-- The `loop` construct was developed specifically for zero-or-more loops like this.
|
||
| .whileLoop _ s => loop s.cfg
|
||
|
||
end Spa
|