Update post with tactic-based proof.
This commit is contained in:
parent
43dfee56cc
commit
6a6f25547e
@ -1 +1 @@
|
||||
Subproject commit 661bcbb557bfd7c228341b2430efd5aa21de3878
|
||||
Subproject commit 2b69cbd391398ed21ccf3b2b06d62dc46207fe85
|
@ -1,102 +0,0 @@
|
||||
Require Import Coq.Lists.List.
|
||||
Require Import Omega.
|
||||
|
||||
Definition has_pair (t : nat) (is : list nat) : Prop :=
|
||||
exists n1 n2 : nat, n1 <> n2 /\ In n1 is /\ In n2 is /\ n1 + n2 = t.
|
||||
|
||||
Fixpoint find_matching (is : list nat) (total : nat) (x : nat) : option nat :=
|
||||
match is with
|
||||
| nil => None
|
||||
| cons y ys =>
|
||||
if Nat.eqb (x + y) total
|
||||
then Some y
|
||||
else find_matching ys total x
|
||||
end.
|
||||
|
||||
Fixpoint find_sum (is : list nat) (total : nat) : option (nat * nat) :=
|
||||
match is with
|
||||
| nil => None
|
||||
| cons x xs =>
|
||||
match find_matching xs total x with
|
||||
| None => find_sum xs total (* Was buggy! *)
|
||||
| Some y => Some (x, y)
|
||||
end
|
||||
end.
|
||||
|
||||
Lemma find_matching_correct : forall is k x y,
|
||||
find_matching is k x = Some y -> x + y = k.
|
||||
Proof.
|
||||
intros is. induction is;
|
||||
intros k x y Hev.
|
||||
- simpl in Hev. inversion Hev.
|
||||
- simpl in Hev. destruct (Nat.eqb (x+a) k) eqn:Heq.
|
||||
+ injection Hev as H; subst.
|
||||
apply EqNat.beq_nat_eq. auto.
|
||||
+ apply IHis. assumption.
|
||||
Qed.
|
||||
|
||||
Lemma find_matching_skip : forall k x y i is,
|
||||
find_matching is k x = Some y -> find_matching (cons i is) k x = Some y.
|
||||
Proof.
|
||||
intros k x y i is Hsmall.
|
||||
simpl. destruct (Nat.eqb (x+i) k) eqn:Heq.
|
||||
- apply find_matching_correct in Hsmall.
|
||||
symmetry in Heq. apply EqNat.beq_nat_eq in Heq.
|
||||
assert (i = y). { omega. } rewrite H. reflexivity.
|
||||
- assumption.
|
||||
Qed.
|
||||
|
||||
Lemma find_matching_works : forall is k x y, In y is /\ x + y = k ->
|
||||
find_matching is k x = Some y.
|
||||
Proof.
|
||||
intros is. induction is;
|
||||
intros k x y [Hin Heq].
|
||||
- inversion Hin.
|
||||
- inversion Hin.
|
||||
+ subst a. simpl. Search Nat.eqb.
|
||||
destruct (Nat.eqb_spec (x+y) k).
|
||||
* reflexivity.
|
||||
* exfalso. apply n. assumption.
|
||||
+ apply find_matching_skip. apply IHis.
|
||||
split; assumption.
|
||||
Qed.
|
||||
|
||||
Theorem find_sum_works :
|
||||
forall k is, has_pair k is ->
|
||||
exists x y, (find_sum is k = Some (x, y) /\ x + y = k).
|
||||
Proof.
|
||||
intros k is. generalize dependent k.
|
||||
induction is; intros k [x' [y' [Hneq [Hinx [Hiny Hsum]]]]].
|
||||
- (* is is empty. But x is in is! *)
|
||||
inversion Hinx.
|
||||
- (* is is not empty. *)
|
||||
inversion Hinx.
|
||||
+ (* x is the first element. *)
|
||||
subst a. inversion Hiny.
|
||||
* (* y is also the first element; but this is impossible! *)
|
||||
exfalso. apply Hneq. apply H.
|
||||
* (* y is somewhere in the rest of the list.
|
||||
We've proven that we will find it! *)
|
||||
exists x'. simpl.
|
||||
erewrite find_matching_works.
|
||||
{ exists y'. split. reflexivity. assumption. }
|
||||
{ split; assumption. }
|
||||
+ (* x is not the first element. *)
|
||||
inversion Hiny.
|
||||
* (* y is the first element,
|
||||
so x is somewhere in the rest of the list.
|
||||
Again, we've proven that we can find it. *)
|
||||
subst a. exists y'. simpl.
|
||||
erewrite find_matching_works.
|
||||
{ exists x'. split. reflexivity. rewrite plus_comm. assumption. }
|
||||
{ split. assumption. rewrite plus_comm. assumption. }
|
||||
* (* y is not the first element, either.
|
||||
Of course, there could be another matching pair
|
||||
starting with a. Otherwise, the inductive hypothesis applies. *)
|
||||
simpl. destruct (find_matching is k a) eqn:Hf.
|
||||
{ exists a. exists n. split.
|
||||
reflexivity.
|
||||
apply find_matching_correct with is. assumption. }
|
||||
{ apply IHis. unfold has_pair. exists x'. exists y'.
|
||||
repeat split; assumption. }
|
||||
Qed.
|
@ -397,10 +397,7 @@ Among those is `t`, the type an integer in such an arbitrary implementation. We
|
||||
will not make an assumption about how the integers are implemented, and simply
|
||||
use this generic `t` from now on.
|
||||
|
||||
#### Semantics in Coq
|
||||
|
||||
Now that we've seen finite sets and vectors, it's time to use them to
|
||||
encode our semantics in Coq. Let's start with jumps. Suppose we wanted to write a function that _does_ return a valid program
|
||||
Now, suppose we wanted to write a function that _does_ return a valid program
|
||||
counter after adding the offset to it. Since it's possible for this function to fail
|
||||
(for instance, if the offset is very negative), it has to return `option (fin (S n))`.
|
||||
That is, this function may either fail (returning `None`) or succeed, returning
|
||||
@ -409,11 +406,93 @@ the function in Coq (again, don't worry too much about the definition):
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 61 61 >}}
|
||||
|
||||
But earlier, didn't we say:
|
||||
We will make use of this function when we define and verify our semantics.
|
||||
Let's take a look at that next.
|
||||
|
||||
#### Semantics in Coq
|
||||
|
||||
Now that we've seen finite sets and vectors, it's time to use them to
|
||||
encode our semantics in Coq. Before we do anything else, we need
|
||||
to provide Coq definitions for the various components of our
|
||||
language, much like what we did with `tinylang`. We can start with opcodes:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 20 23 >}}
|
||||
|
||||
Now we can define a few other parts of our language and semantics, namely
|
||||
states, instructions and programs (which I called "inputs" since, we'll, they're
|
||||
our puzzle input). A state is simply the 3-tuple of the program counter, the set
|
||||
of valid program counters, and the accumulator. We write it as follows:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 33 33 >}}
|
||||
|
||||
The star `*` is used here to represent a [product type](https://en.wikipedia.org/wiki/Product_type)
|
||||
rather than arithmetic multiplication. Our state type accepts an argument,
|
||||
`n`, much like a finite natural number or a vector. In fact, this `n` is passed on
|
||||
to the state's program counter and set types. Rightly, a state for a program
|
||||
of length \\(n\\) will not be of the same type as a state for a program of length \\(n+1\\).
|
||||
|
||||
An instruction is also a tuple, but this time containing only two elements: the opcode and
|
||||
the number. We write this as follows:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 36 36 >}}
|
||||
|
||||
Finally, we have to define the type of a program. This type will also be
|
||||
indexed by `n`, the program's length. A program of length `n` is simply a
|
||||
vector of instructions `inst` of length `n`. This leads to the following
|
||||
definition:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 38 38 >}}
|
||||
|
||||
So far, so good! Finally, it's time to get started on the semantics themselves.
|
||||
We begin with the inductive definition of \\((\\rightarrow_i)\\).
|
||||
I think this is fairly straightforward. However, we do use
|
||||
`t` instead of \\(n\\) from the rules, and we use `FS`
|
||||
instead of \\(+1\\). Also, we make the formerly implicit
|
||||
assumption that \\(c+n\\) is valid explicit, by
|
||||
providing a proof that `valid_jump_t pc t = Some pc'`.
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 103 110 >}}
|
||||
|
||||
Next, it will help us to combine the premises for a
|
||||
"failed" and "ok" terminations into Coq data types.
|
||||
This will make it easier for us to formulate a lemma later on.
|
||||
Here are the definitions:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 112 117 >}}
|
||||
|
||||
Since all of out "termination" rules start and
|
||||
end in the same state, there's no reason to
|
||||
write that state twice. Thus, both `done`
|
||||
and `stuck` only take the input `inp`,
|
||||
and the state, which includes the accumulator
|
||||
`acc`, set of allowed program counters `v`, and
|
||||
the program counter at which the program came to an end.
|
||||
When the program terminates successfully, this program
|
||||
counter will be equal to the length of the program `n`,
|
||||
so we use `nat_to_fin n`. On the other hand, if the program
|
||||
terminates in as stuck state, it must be that it terminated
|
||||
at a program counter that points to an instruction. Thus, this
|
||||
program counter is actually a \\(\\text{Fin} \\; n\\), and not
|
||||
a \\(\\text{Fin} \\ (n+1)\\), and is not in the set of allowed program counters.
|
||||
We use the same "weakening" trick we saw earlier to represent
|
||||
this.
|
||||
|
||||
Finally, we encode the three inference rules we came up with:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 119 126 >}}
|
||||
|
||||
Notice that we fused two of the premises in the last rule.
|
||||
Instead of naming the instruction at the current program
|
||||
counter and using it in another premise, we simply use
|
||||
`nth inp pc`, which corresponds to \\(p[c]\\) in our
|
||||
"paper" semantics.
|
||||
|
||||
Before we go on writing some actual proofs, we have
|
||||
one more thing we have to address. Earlier, we said:
|
||||
|
||||
> All jumps will lead either to an instruction, or right to the end of a program.
|
||||
|
||||
To make Coq aware of this constraint, we'll have to formalize this notion. To
|
||||
To make Coq aware of this constraint, we'll have to formalize it. To
|
||||
start off, we'll define the notion of a "valid instruction", which is guaranteed
|
||||
to keep the program counter in the correct range.
|
||||
There are a couple of ways to do this, but we'll use yet another definition based
|
||||
@ -423,7 +502,7 @@ is perfectly valid for a program with thousands of instructions, but if it occur
|
||||
in a program with only 3 instructions, it will certainly lead to disaster. Specifically,
|
||||
the validity of an instruction depends on the length of the program in which it resides,
|
||||
and the program counter at which it's encountered.
|
||||
Thus, we refine our idea of validity to "being valid for a program of length n at program counter f".
|
||||
Thus, we refine our idea of validity to "being valid for a program of length \\(n\\) at program counter \\(f\\)".
|
||||
For this, we can use the following two inference rules:
|
||||
|
||||
{{< latex >}}
|
||||
@ -444,11 +523,12 @@ which, as we discussed above, is perfectly valid.
|
||||
|
||||
The second rule works for the other two instructions. It has an extra premise:
|
||||
the result of `jump_valid_t` (written as \\(J_v\\)) has to be \\(\\text{Some} \\; c'\\),
|
||||
that is, `jump_valid_t` must succeed. Now, if an instruction satisfies these validity
|
||||
rules for a given program at a given program counter, evaluating it will always
|
||||
result in a program counter that has a proper value.
|
||||
that is, `jump_valid_t` must succeed. Note that we require this even for no-ops,
|
||||
since it later turns out of the them may be a jump after all.
|
||||
|
||||
We encode this in Coq as follows:
|
||||
We now have our validity rules. If an instruction satisfies them for a given program
|
||||
and at a given program counter, evaluating it will always result in a program counter that has a proper value.
|
||||
We encode the rules in Coq as follows:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 152 157 >}}
|
||||
|
||||
@ -470,44 +550,191 @@ encode this in Coq, too:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 160 161 >}}
|
||||
|
||||
In the above, we use `input n` to mean "a program of length `n`".
|
||||
This is just an alias for `vect inst n`, a vector of instructions
|
||||
of length `n`. In the above, `n` is made implicit where possible.
|
||||
In the above, `n` is made implicit where possible.
|
||||
Since \\(c\\) (called `pc` in the code) is of type \\(\\text{Fin} \\; n\\), there's no
|
||||
need to write \\(n\\) _again_.
|
||||
need to write \\(n\\) _again_. The curly braces tell Coq to infer that
|
||||
argument where possible.
|
||||
|
||||
Finally, it's time to get started on the semantics themselves.
|
||||
We start with the inductive definition of \\((\\rightarrow_i)\\).
|
||||
I think this is fairly straightforward. We use
|
||||
`t` instead of \\(n\\) from the rules, and we use `FS`
|
||||
instead of \\(+1\\). Also, we make the formerly implicit
|
||||
assumption that \\(c+n\\) is valid explicit, by
|
||||
providing a proof that `valid_jump_t pc t = Some pc'`.
|
||||
### Proving Termination
|
||||
Here we go! It's finally time to make some claims about our
|
||||
definitions. Who knows - maybe we wrote down total garbage!
|
||||
We will be creating several related lemmas and theorems.
|
||||
All of them share two common assumptions:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 103 110 >}}
|
||||
* We have some valid program `inp` of length `n`.
|
||||
* This program is a valid input, that is, `valid_input` holds for it.
|
||||
There's no sense in arguing based on an invalid input program.
|
||||
|
||||
Next, it will help us to combine the premises for a
|
||||
"failed" and "ok" terminations into Coq data types.
|
||||
This will help us formulate a lemma later on. Here they are:
|
||||
We represent these grouped assumptions by opening a Coq
|
||||
`Section`, which we call `ValidInput`, and listing our assumptions:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 112 117 >}}
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 163 166 >}}
|
||||
|
||||
Since all of out "termination" rules start and
|
||||
end in the same state, there's no reason to
|
||||
write that state twice. Thus, both `done`
|
||||
and `stuck` only take the input `inp`,
|
||||
and the state, which includes the accumulator
|
||||
`acc`, set of allowed program counters `v`, and
|
||||
the program counter at which the program came to an end.
|
||||
When the program terminates successfully, this program
|
||||
counter will be equal to the length of the program `n`,
|
||||
so we use `nat_to_fin n`. On the other hand, if the program
|
||||
terminates in as stuck state, it must be that it terminated
|
||||
at a program counter that points to an instruction. Thus, this
|
||||
program counter is actually a \\(\\text{Fin} \\; n\\), and not
|
||||
a \\(\\text{Fin} \\ (n+1)\\) (we use the same "weakening" trick
|
||||
we saw earlier), and is not in the set of allowed program counters.
|
||||
We had to also explicitly mention the length `n` of our program.
|
||||
From now on, the variables `n`, `inp`, and `Hv` will be
|
||||
available to all of the proofs we write in this section.
|
||||
The first proof is rather simple. The claim is:
|
||||
|
||||
Finally, we encode the three inference rules we came up with:
|
||||
> For our valid program, at any program counter `pc`
|
||||
and accumulator `acc`, there must exists another program
|
||||
counter `pc'` and accumulator `acc'` such that the
|
||||
instruction evaluation relation \\((\rightarrow_i)\\)
|
||||
connects the two. That is, valid addresses aside,
|
||||
we can always make a step.
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 119 126 >}}
|
||||
Here is this claim encoded in Coq:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 168 169 >}}
|
||||
|
||||
We start our proof by introducing all the relevant variables into
|
||||
the global context. I've mentioned this when I wrote about
|
||||
day 1, but here's the gist: the `intros` keyword takes
|
||||
variables from a `forall`, and makes them concrete.
|
||||
In short, `intros x` is very much like saying "suppose
|
||||
we have an `x`", and going on with the proof.
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 170 171 >}}
|
||||
|
||||
Here, we said "take any program counter `pc` and any
|
||||
accumulator `acc`". Now what? Well, first of all,
|
||||
we want to take a look at the instruction at the current
|
||||
`pc`. We know that this instruction is a combination
|
||||
of an opcode and a number, so we use `destruct` to get
|
||||
access to both of these parts:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 172 172 >}}
|
||||
|
||||
Now, Coq reports the following proof state:
|
||||
|
||||
```
|
||||
1 subgoal
|
||||
|
||||
n : nat
|
||||
inp : input n
|
||||
Hv : valid_input inp
|
||||
pc : Fin.t n
|
||||
acc : t
|
||||
o : opcode
|
||||
t0 : t
|
||||
Hop : nth inp pc = (o, t0)
|
||||
|
||||
========================= (1 / 1)
|
||||
|
||||
exists (pc' : fin (S n)) (acc' : t),
|
||||
step_noswap (o, t0) (pc, acc) (pc', acc')
|
||||
```
|
||||
|
||||
We have some opcode `o`, and some associated number
|
||||
`t0`, and we must show that there exist a `pc'`
|
||||
and `acc'` to which we can move on. To prove
|
||||
that something exists in Coq, we must provide
|
||||
an instance of that "something". If we claim
|
||||
that there exists a dog that's not a good boy,
|
||||
we better have this elusive creature in hand.
|
||||
In other words, proofs in Coq are [constructive](https://en.wikipedia.org/wiki/Constructive_proof).
|
||||
Without knowing the kind of operation we're dealing with, we can't
|
||||
say for sure how the step will proceed. Thus, we proceed by
|
||||
case analysis on `o`.
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 173 173 >}}
|
||||
|
||||
There are three possible cases we have to consider,
|
||||
one for each type of instruction.
|
||||
|
||||
* If the instruction is \\(\\texttt{add}\\), we know
|
||||
that `pc' = pc + 1` and `acc' = acc + t0`. That is,
|
||||
the program counter is simply incremented, and the accumulator
|
||||
is modified with the number part of the instruction.
|
||||
* If the instruction is \\(\\texttt{nop}\\), the program
|
||||
coutner will again be incremented (`pc' = pc + 1`),
|
||||
but the accumulator will stay the same, so `acc' = acc`.
|
||||
* If the instruction is \\(\\texttt{jmp}\\), things are
|
||||
more complicated. We must rely on the assumption
|
||||
that our input is valid, which tells us that adding
|
||||
`t0` to our `pc` will result in `Some f`, and not `None`.
|
||||
Given this, we have `pc' = f`, and `acc' = acc`.
|
||||
|
||||
This is how these three cases are translated to Coq:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 174 177 >}}
|
||||
|
||||
For the first two cases, we simply provide the
|
||||
values we expect for `pc'` and `acc'`, and
|
||||
apply the corresponding inference rule that
|
||||
is satisfied by these values. For the third case, we have
|
||||
to invoke `Hv`, the hypothesis that our input is valid.
|
||||
In particular, we care about the instruction at `pc`,
|
||||
so we use `specialize` to plug `pc` into the more general
|
||||
hypothesis. We then replace `nth inp pc` with its known
|
||||
value, `(jmp, t0)`. This tells us the following, in Coq's words:
|
||||
|
||||
```
|
||||
Hv : valid_inst (jmp, t0) pc
|
||||
```
|
||||
|
||||
That is, `(jmp, t0)` is a valid instruction at `pc`. Then, using
|
||||
Coq's `inversion` tactic, we ask: how is this possible? There is
|
||||
only one inference rule that gives us such a conclusion, and it is named `valid_inst_jmp`
|
||||
in our Coq code. Since we have a proof that our `jmp` is valid,
|
||||
it must mean that this rule was used. Furthermore, sicne this
|
||||
rule requires that `valid_jump_t` evaluates to `Some f'`, we know
|
||||
that this must be the case here! Coq now has adds the following
|
||||
two lines to our proof state:
|
||||
|
||||
```
|
||||
f' : fin (S n)
|
||||
H0 : valid_jump_t pc t0 = Some f'
|
||||
```
|
||||
|
||||
Finally, we specify, as mentioned earlier, that `pc' = f'` and `acc' = acc`.
|
||||
As before, we apply the corresponding step rule for `jmp`. When it asks
|
||||
for a proof that `valid_jump_t` produces a valid program counter,
|
||||
we hand it `H0` using `apply H0`. And with that, Coq is happy!
|
||||
|
||||
Next, we prove a claim that a valid program can always do _something_,
|
||||
and that something is one of three things:
|
||||
|
||||
* It can terminate in the "ok" state if the program counter
|
||||
reaches the programs' end.
|
||||
* It can terminate with an error if it's currently at a program
|
||||
counter that is not included in the valid set.
|
||||
* Otherwise, it can run the current instruction and advance
|
||||
to a "next" state.
|
||||
|
||||
Alternatively, we could say that one of the inference rules
|
||||
for \\((\\Rightarrow_p)\\) must apply. This is not the case if the input
|
||||
is not valid, since, as I said
|
||||
before, an arbitrary input program can lead us to jump
|
||||
to a negative address (or to an address _way_ past the end of the program).
|
||||
Here's the claim, translated to Coq:
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 181 186 >}}
|
||||
|
||||
Informally, we can prove this as follows:
|
||||
|
||||
* If the current program counter is equal to the length
|
||||
of the program, we've reached the end. Thus, the program
|
||||
can terminate in the "ok" state.
|
||||
* Otherwise, the current program counter must be
|
||||
less than the length of the program.
|
||||
* If we've already encountered this program counter (that is,
|
||||
if it's gone from the set of valid program counters),
|
||||
then the program will terminate in the "error" state.
|
||||
* Otherwise, the program counter is in the set of
|
||||
valid instructions. By our earlier theorem, in a valid
|
||||
program, the instruction at any program counter can be correctly
|
||||
executed, taking us to the next state. Now too
|
||||
our program can move to this next state.
|
||||
|
||||
Below is the Coq translation of the above.
|
||||
|
||||
{{< codelines "Coq" "aoc-2020/day8.v" 187 203 >}}
|
||||
|
||||
It doesn't seem like we're that far from being done now.
|
||||
A program can always take a step, and each time it does,
|
||||
the set of valid program counters decreases in size. Eventually,
|
||||
this set will become empty, so if nothing else, our program will
|
||||
eventually terminate in an "error" state. Thus, it will stop
|
||||
running no matter what.
|
||||
|
||||
The problem is: how do we express that?
|
||||
|
Loading…
Reference in New Issue
Block a user