import Spa.Language.Traces

/-!

# Properties of the Object Language, CFGs, and Traces

This module encodes some properties of the language, mostly those having to do
with connecting the computational view (the `Spa.Graph`s, on which static
analyses are executed) to the semantic view (such as `EvalStmt`, which
encodes the expected formal behavior of the language). In particular,
to prove that our computationally-implemented static analyses are correct,
we need to show that our computational model of their execution (the CFG)
matches the formal description. Thus, the key result `cfg_sufficient`.

Many lemmas and definitions here aim are used to prove that result,
by allowing inductive proofs on the construction of the CFG:
the bits where we _build up_ the trace corresponding to each
proof tree are exactly those when we have two graphs (through
which traces exist) and we want to combine these graphs, while
showing also that a combined trace exists as well. -/

namespace Spa

open Graph

lemma Fin.castAdd_ne_natAdd {n m : ℕ} (i : Fin n) (j : Fin m) :
    Fin.castAdd m i ≠ Fin.natAdd n j := by
  intro h
  have := congrArg Fin.val h
  simp only [Fin.coe_castAdd, Fin.coe_natAdd] at this
  omega

section Embeddings

variable {g₁ g₂ : Graph} {ρ₁ ρ₂ : Env}

/-- When two graphs are overlaid, for each trace in the left graph,
    a corresponding trace exists in the combined graph. -/
noncomputable def Trace.overlay_left {idx₁ idx₂ : g₁.Index}
    (tr : Trace g₁ idx₁ idx₂ ρ₁ ρ₂) :
    Trace (g₁ ∙ g₂) (idx₁.castAdd g₂.size) (idx₂.castAdd g₂.size) ρ₁ ρ₂ := by
  induction tr with
  | single hbs =>
    exact Trace.single (by rwa [show (g₁ ∙ g₂).nodes = Fin.append g₁.nodes g₂.nodes from rfl,
      Fin.append_left])
  | edge hbs he _ ih =>
    refine Trace.edge ?_ ?_ ih
    · rwa [show (g₁ ∙ g₂).nodes = Fin.append g₁.nodes g₂.nodes from rfl, Fin.append_left]
    · exact List.mem_append_left _ (List.mem_map_of_mem _ he)

/-- When two graphs are overlaid, for each trace in the right graph,
    a corresponding trace exists in the combined graph. -/
noncomputable def Trace.overlay_right {idx₁ idx₂ : g₂.Index}
    (tr : Trace g₂ idx₁ idx₂ ρ₁ ρ₂) :
    Trace (g₁ ∙ g₂) (idx₁.natAdd g₁.size) (idx₂.natAdd g₁.size) ρ₁ ρ₂ := by
  induction tr with
  | single hbs =>
    exact Trace.single (by rwa [show (g₁ ∙ g₂).nodes = Fin.append g₁.nodes g₂.nodes from rfl,
      Fin.append_right])
  | edge hbs he _ ih =>
    refine Trace.edge ?_ ?_ ih
    · rwa [show (g₁ ∙ g₂).nodes = Fin.append g₁.nodes g₂.nodes from rfl, Fin.append_right]
    · exact List.mem_append_right _ (List.mem_map_of_mem _ he)

/-- When two graphs are sequenced, for each trace in the first graph,
    a corresponding trace exists in the combined graph. -/
noncomputable def Trace.sequence_left {idx₁ idx₂ : g₁.Index}
    (tr : Trace g₁ idx₁ idx₂ ρ₁ ρ₂) :
    Trace (g₁ ⤳ g₂) (idx₁.castAdd g₂.size) (idx₂.castAdd g₂.size) ρ₁ ρ₂ := by
  induction tr with
  | single hbs =>
    exact Trace.single (by rwa [show (g₁ ⤳ g₂).nodes = Fin.append g₁.nodes g₂.nodes from rfl,
      Fin.append_left])
  | edge hbs he _ ih =>
    refine Trace.edge ?_ ?_ ih
    · rwa [show (g₁ ⤳ g₂).nodes = Fin.append g₁.nodes g₂.nodes from rfl, Fin.append_left]
    · exact List.mem_append_left _ (List.mem_append_left _ (List.mem_map_of_mem _ he))

/-- When two graphs are sequenced, for each trace in the second graph,
   a corresponding trace exists in the combined graph. -/
noncomputable def Trace.sequence_right {idx₁ idx₂ : g₂.Index}
    (tr : Trace g₂ idx₁ idx₂ ρ₁ ρ₂) :
    Trace (g₁ ⤳ g₂) (idx₁.natAdd g₁.size) (idx₂.natAdd g₁.size) ρ₁ ρ₂ := by
  induction tr with
  | single hbs =>
    exact Trace.single (by rwa [show (g₁ ⤳ g₂).nodes = Fin.append g₁.nodes g₂.nodes from rfl,
      Fin.append_right])
  | edge hbs he _ ih =>
    refine Trace.edge ?_ ?_ ih
    · rwa [show (g₁ ⤳ g₂).nodes = Fin.append g₁.nodes g₂.nodes from rfl, Fin.append_right]
    · exact List.mem_append_left _
        (List.mem_append_right _ (List.mem_map_of_mem _ he))

/-- Equivalent of `Trace.overlay_left` for end-to-end traces. -/
noncomputable def EndToEndTrace.overlay_left (etr : EndToEndTrace g₁ ρ₁ ρ₂) :
    EndToEndTrace (g₁ ∙ g₂) ρ₁ ρ₂ := by
  obtain ⟨i₁, h₁, i₂, h₂, tr⟩ := etr
  exact ⟨i₁.castAdd g₂.size, List.mem_append_left _ (List.mem_map_of_mem _ h₁),
         i₂.castAdd g₂.size, List.mem_append_left _ (List.mem_map_of_mem _ h₂),
         tr.overlay_left⟩

/-- Equivalent of `Trace.overlay_right` for end-to-end traces. -/
noncomputable def EndToEndTrace.overlay_right (etr : EndToEndTrace g₂ ρ₁ ρ₂) :
    EndToEndTrace (g₁ ∙ g₂) ρ₁ ρ₂ := by
  obtain ⟨i₁, h₁, i₂, h₂, tr⟩ := etr
  exact ⟨i₁.natAdd g₁.size, List.mem_append_right _ (List.mem_map_of_mem _ h₁),
         i₂.natAdd g₁.size, List.mem_append_right _ (List.mem_map_of_mem _ h₂),
         tr.overlay_right⟩

/-- When two graphs are sequenced, two end-to-end traces through the respective
    graphs can be sequenced to create an end-to-end trace in the combined
    graph. This is only possible for end-to-end traces and not for general
    `Trace`s, because sequencing only introduces edges from the output nodes
    of one graph to the input nodes of another graph. A non-end-to-end trace
    need to conclude at the output node, so it cannot necessarily be sequenced
    with a trace in another graph. -/
noncomputable def EndToEndTrace.concat {ρ₃ : Env} (etr₁ : EndToEndTrace g₁ ρ₁ ρ₂)
    (etr₂ : EndToEndTrace g₂ ρ₂ ρ₃) : EndToEndTrace (g₁ ⤳ g₂) ρ₁ ρ₃ := by
  obtain ⟨i₁, h₁, i₂, h₂, tr₁⟩ := etr₁
  obtain ⟨j₁, k₁, j₂, k₂, tr₂⟩ := etr₂
  refine ⟨i₁.castAdd g₂.size, List.mem_map_of_mem _ h₁,
          j₂.natAdd g₁.size, List.mem_map_of_mem _ k₂,
          Trace.concat tr₁.sequence_left ?_ tr₂.sequence_right⟩
  exact List.mem_append_right _
    (List.mem_product.mpr ⟨List.mem_map_of_mem _ h₂, List.mem_map_of_mem _ k₁⟩)

end Embeddings

section Loop

variable {g : Graph} {ρ₁ ρ₂ ρ₃ : Env}

/-- A trace through a body CFG still exists (up to reindexing) in a zero-or-more loop CFG. -/
noncomputable def Trace.loop {idx₁ idx₂ : g.Index} (tr : Trace g idx₁ idx₂ ρ₁ ρ₂) :
    Trace (Graph.loop g) (idx₁.natAdd 2) (idx₂.natAdd 2) ρ₁ ρ₂ := by
  induction tr with
  | single hbs =>
    exact Trace.single (by
      rwa [show (Graph.loop g).nodes = Fin.append (fun _ : Fin 2 => none) g.nodes from rfl,
        Fin.append_right])
  | edge hbs he _ ih =>
    refine Trace.edge ?_ ?_ ih
    · rwa [show (Graph.loop g).nodes = Fin.append (fun _ : Fin 2 => none) g.nodes from rfl,
        Fin.append_right]
    · exact List.mem_append_left _ (List.mem_append_left _
        (List.mem_append_left _ (List.mem_map_of_mem _ he)))

/-- The beginning node of a loop graph is empty. -/
private lemma loop_nodes_at_in :
    (Graph.loop g).nodes g.loopIn = none :=
  Fin.append_left (fun _ : Fin 2 => none) g.nodes 0

/-- The ending node of a loop graph is empty. -/
private lemma loop_nodes_at_out :
    (Graph.loop g).nodes g.loopOut = none :=
  Fin.append_left (fun _ : Fin 2 => none) g.nodes 1

/-- Equivlaent of `Trace.loop` for end-to-end traces. -/
noncomputable def EndToEndTrace.loop (etr : EndToEndTrace g ρ₁ ρ₂) :
    EndToEndTrace (Graph.loop g) ρ₁ ρ₂ := by
  obtain ⟨i₁, h₁, i₂, h₂, tr⟩ := etr
  -- the edge in → (2 ↑ʳ i₁), reached through the second edge group
  have hin : (g.loopIn, i₁.natAdd 2) ∈ (Graph.loop g).edges := by
    refine List.mem_append_left _ (List.mem_append_left _ (List.mem_append_right _ ?_))
    exact List.mem_map_of_mem _ (List.mem_map_of_mem _ h₁)
  -- the edge (2 ↑ʳ i₂) → out, reached through the third edge group
  have hout : (i₂.natAdd 2, g.loopOut) ∈ (Graph.loop g).edges := by
    refine List.mem_append_left _ (List.mem_append_right _ ?_)
    exact List.mem_map_of_mem _ (List.mem_map_of_mem _ h₂)
  refine ⟨g.loopIn, List.mem_singleton_self _, g.loopOut, List.mem_singleton_self _, ?_⟩
  exact Trace.concat (Trace.single (loop_nodes_at_in ▸ EvalBasicStmtOpt.none)) hin
    (Trace.concat tr.loop hout (Trace.single (loop_nodes_at_out ▸ EvalBasicStmtOpt.none)))

/-- The zero-or-more times loop has an edge to return back to the top, to continue after an iteration. -/
private lemma loop_edge_out_in :
    ((g.loopOut, g.loopIn) : (Graph.loop g).Edge) ∈ (Graph.loop g).edges := by
  refine List.mem_append_right _ ?_
  exact List.mem_cons_self _ _

/-- Two traces through a loop can be combined, since a loop can be executed any number of times. -/
noncomputable def EndToEndTrace.loop_concat (etr₁ : EndToEndTrace (Graph.loop g) ρ₁ ρ₂)
    (etr₂ : EndToEndTrace (Graph.loop g) ρ₂ ρ₃) :
    EndToEndTrace (Graph.loop g) ρ₁ ρ₃ := by
  obtain ⟨i₁, h₁, i₂, h₂, tr₁⟩ := etr₁
  obtain ⟨j₁, k₁, j₂, k₂, tr₂⟩ := etr₂
  simp only [Graph.loop_inputs, Graph.loop_outputs, List.mem_singleton] at h₁ h₂ k₁ k₂
  subst h₁; subst h₂; subst k₁; subst k₂
  exact ⟨g.loopIn, List.mem_singleton_self _, g.loopOut, List.mem_singleton_self _,
    Trace.concat tr₁ loop_edge_out_in tr₂⟩

/-- A loop can be executed zero times. -/
noncomputable def EndToEndTrace.loop_empty {ρ : Env} : EndToEndTrace (Graph.loop g) ρ ρ := by
  have hedge : ((g.loopIn, g.loopOut) : (Graph.loop g).Edge) ∈ (Graph.loop g).edges :=
    List.mem_append_right _ (List.mem_cons_of_mem _ (List.mem_cons_self _ _))
  exact ⟨g.loopIn, List.mem_singleton_self _, g.loopOut, List.mem_singleton_self _,
    Trace.concat (Trace.single (loop_nodes_at_in ▸ EvalBasicStmtOpt.none)) hedge
      (Trace.single (loop_nodes_at_out ▸ EvalBasicStmtOpt.none))⟩

end Loop

/-- A CFG consisting of only a single node has a trace through it corresponding to that node. -/
noncomputable def EndToEndTrace.singleton {o : Option BasicStmt} {ρ₁ ρ₂ : Env}
    (h : EvalBasicStmtOpt ρ₁ o ρ₂) : EndToEndTrace (Graph.singleton o) ρ₁ ρ₂ :=
  ⟨(0 : Fin 1), List.mem_singleton_self _, (0 : Fin 1), List.mem_singleton_self _,
   Trace.single h⟩

/-- If a CFG's only node is empty, the no-op trace exists through it. -/
noncomputable def EndToEndTrace.singleton_nil (ρ : Env) :
    EndToEndTrace (Graph.singleton none) ρ ρ :=
  EndToEndTrace.singleton EvalBasicStmtOpt.none

/-- Invoking 'Graph.wrap` (which ensures a single entry and exit node for a CFG)
    does not invalidate traces in the original graph. -/
noncomputable def EndToEndTrace.wrap {g : Graph} {ρ₁ ρ₂ : Env}
    (etr : EndToEndTrace g ρ₁ ρ₂) : EndToEndTrace (Graph.wrap g) ρ₁ ρ₂ :=
  (EndToEndTrace.singleton_nil ρ₁).concat (etr.concat (EndToEndTrace.singleton_nil ρ₂))

/-- Key result: the control flow graph admits every execution that's made
    possible by a language's semantics. Thus, the CFG encodes _at least_ all
    semantically-possible executions. Informally, we can conclude from this
    that if we compute a result that using the graph's edges to determine
    what's possible, this result will not disagree with the semantics.

    Note that a CFG like $K_4$ (where the nodes are basic blocks) is
    technically also a sufficient graph, but is very likely meaningless in that
    it grossly overestimates the possible execution paths in the language, and
    thus is bound to produce less-than-specific results. There is as yet no
    result in this framework that the CFG we produce is _minimal_: loosely,
    posessing only edges for things that are admitted by the semantics.
    This is difficult to state (in its strongest form, this would
    require the CFG to be able to detect something like `while (alwaysFalse)`,
    and so remains a TODO. -/
noncomputable def Stmt.cfg_sufficient {s : Stmt} {ρ₁ ρ₂ : Env}
    (h : EvalStmt ρ₁ s ρ₂) : EndToEndTrace s.cfg ρ₁ ρ₂ := by
  induction h with
  | basic ρ₁ ρ₂ bs hbs =>
    exact EndToEndTrace.singleton (EvalBasicStmtOpt.some hbs)
  | andThen ρ₁ ρ₂ ρ₃ s₁ s₂ _ _ ih₁ ih₂ =>
    exact ih₁.concat ih₂
  | ifTrue ρ₁ ρ₂ e z s₁ s₂ _ _ _ ih =>
    exact ih.overlay_left
  | ifFalse ρ₁ ρ₂ e s₁ s₂ _ _ ih =>
    exact ih.overlay_right
  | whileTrue ρ₁ ρ₂ ρ₃ e z s _ _ _ _ ih₁ ih₂ =>
    exact (ih₁.loop).loop_concat ih₂
  | whileFalse ρ e s _ =>
    exact EndToEndTrace.loop_empty

/-- The input / entry node generated by `Graph.wrap`. -/
def Graph.wrapInput (g : Graph) : (Graph.wrap g).Index :=
  (0 : Fin 1).castAdd ((g ⤳ Graph.singleton none).size)

/-- The output / exit node generated by `Graph.wrap`. -/
def Graph.wrapOutput (g : Graph) : (Graph.wrap g).Index :=
  Fin.natAdd 1 ((Fin.natAdd g.size (0 : Fin 1)))

/-- The `Graph.wrapInput` is, indeed, the graph's only input after `Graph.wrap`. -/
lemma Graph.wrap_inputs (g : Graph) :
    (Graph.wrap g).inputs = [g.wrapInput] := rfl

/-- The `Graph.wrapInput` is, indeed, the graph's only output after `Graph.wrap`. -/
lemma Graph.wrap_outputs (g : Graph) :
    (Graph.wrap g).outputs = [g.wrapOutput] := rfl

/-- When sequencing (proven here with `Graph.singleton` on the left), no edges
    exist from the right-hand graph back to the left. -/
private lemma not_mem_edges_castAdd_sequence {g₂ : Graph} (i : Fin 1)
    (idx : (Graph.singleton none ⤳ g₂).Index) :
    ((idx, i.castAdd g₂.size) : (Graph.singleton none ⤳ g₂).Edge)
      ∉ (Graph.singleton none ⤳ g₂).edges := by
  intro h
  rcases List.mem_append.mp h with h' | h'
  · rcases List.mem_append.mp h' with h'' | h''
    · -- lifted edges of `singleton []`: there are none
      simp [Graph.singleton, List.finCastAddProd] at h''
    · -- lifted edges of g₂: targets are natAdd
      obtain ⟨e, _, heq⟩ := List.mem_map.mp h''
      exact Fin.castAdd_ne_natAdd i e.2 (congrArg Prod.snd heq).symm
  · -- product edges: targets are natAdd'd inputs of g₂
    obtain ⟨-, hb⟩ := List.mem_product.mp h'
    obtain ⟨j, -, heq⟩ := List.mem_map.mp hb
    exact Fin.castAdd_ne_natAdd i j heq.symm

/-- The input node of a graph after `Graph.wrap` has no predecessors. -/
lemma Graph.wrap_predecessors_eq_nil (g : Graph) (idx : (Graph.wrap g).Index)
    (h : idx ∈ (Graph.wrap g).inputs) :
    (Graph.wrap g).predecessors idx = [] := by
  rw [Graph.wrap_inputs, List.mem_singleton] at h
  subst h
  rw [GGraph.predecessors, List.filter_eq_nil_iff]
  intro idx' _
  simpa using not_mem_edges_castAdd_sequence (g₂ := g ⤳ Graph.singleton none) 0 idx'

end Spa