From 6a6f25547e88b72454ced8998f3caf7d7008e3c1 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Sat, 2 Jan 2021 18:30:32 -0800 Subject: [PATCH] Update post with tactic-based proof. --- code/aoc-2020 | 2 +- code/aoc-coq/day1.v | 102 ------------ content/blog/01_aoc_coq.md | 315 +++++++++++++++++++++++++++++++------ 3 files changed, 272 insertions(+), 147 deletions(-) delete mode 100644 code/aoc-coq/day1.v diff --git a/code/aoc-2020 b/code/aoc-2020 index 661bcbb..2b69cbd 160000 --- a/code/aoc-2020 +++ b/code/aoc-2020 @@ -1 +1 @@ -Subproject commit 661bcbb557bfd7c228341b2430efd5aa21de3878 +Subproject commit 2b69cbd391398ed21ccf3b2b06d62dc46207fe85 diff --git a/code/aoc-coq/day1.v b/code/aoc-coq/day1.v deleted file mode 100644 index 5058dee..0000000 --- a/code/aoc-coq/day1.v +++ /dev/null @@ -1,102 +0,0 @@ -Require Import Coq.Lists.List. -Require Import Omega. - -Definition has_pair (t : nat) (is : list nat) : Prop := - exists n1 n2 : nat, n1 <> n2 /\ In n1 is /\ In n2 is /\ n1 + n2 = t. - -Fixpoint find_matching (is : list nat) (total : nat) (x : nat) : option nat := - match is with - | nil => None - | cons y ys => - if Nat.eqb (x + y) total - then Some y - else find_matching ys total x - end. - -Fixpoint find_sum (is : list nat) (total : nat) : option (nat * nat) := - match is with - | nil => None - | cons x xs => - match find_matching xs total x with - | None => find_sum xs total (* Was buggy! *) - | Some y => Some (x, y) - end - end. - -Lemma find_matching_correct : forall is k x y, - find_matching is k x = Some y -> x + y = k. -Proof. - intros is. induction is; - intros k x y Hev. - - simpl in Hev. inversion Hev. - - simpl in Hev. destruct (Nat.eqb (x+a) k) eqn:Heq. - + injection Hev as H; subst. - apply EqNat.beq_nat_eq. auto. - + apply IHis. assumption. -Qed. - -Lemma find_matching_skip : forall k x y i is, - find_matching is k x = Some y -> find_matching (cons i is) k x = Some y. -Proof. - intros k x y i is Hsmall. - simpl. destruct (Nat.eqb (x+i) k) eqn:Heq. - - apply find_matching_correct in Hsmall. - symmetry in Heq. apply EqNat.beq_nat_eq in Heq. - assert (i = y). { omega. } rewrite H. reflexivity. - - assumption. -Qed. - -Lemma find_matching_works : forall is k x y, In y is /\ x + y = k -> - find_matching is k x = Some y. -Proof. - intros is. induction is; - intros k x y [Hin Heq]. - - inversion Hin. - - inversion Hin. - + subst a. simpl. Search Nat.eqb. - destruct (Nat.eqb_spec (x+y) k). - * reflexivity. - * exfalso. apply n. assumption. - + apply find_matching_skip. apply IHis. - split; assumption. -Qed. - -Theorem find_sum_works : - forall k is, has_pair k is -> - exists x y, (find_sum is k = Some (x, y) /\ x + y = k). -Proof. - intros k is. generalize dependent k. - induction is; intros k [x' [y' [Hneq [Hinx [Hiny Hsum]]]]]. - - (* is is empty. But x is in is! *) - inversion Hinx. - - (* is is not empty. *) - inversion Hinx. - + (* x is the first element. *) - subst a. inversion Hiny. - * (* y is also the first element; but this is impossible! *) - exfalso. apply Hneq. apply H. - * (* y is somewhere in the rest of the list. - We've proven that we will find it! *) - exists x'. simpl. - erewrite find_matching_works. - { exists y'. split. reflexivity. assumption. } - { split; assumption. } - + (* x is not the first element. *) - inversion Hiny. - * (* y is the first element, - so x is somewhere in the rest of the list. - Again, we've proven that we can find it. *) - subst a. exists y'. simpl. - erewrite find_matching_works. - { exists x'. split. reflexivity. rewrite plus_comm. assumption. } - { split. assumption. rewrite plus_comm. assumption. } - * (* y is not the first element, either. - Of course, there could be another matching pair - starting with a. Otherwise, the inductive hypothesis applies. *) - simpl. destruct (find_matching is k a) eqn:Hf. - { exists a. exists n. split. - reflexivity. - apply find_matching_correct with is. assumption. } - { apply IHis. unfold has_pair. exists x'. exists y'. - repeat split; assumption. } -Qed. diff --git a/content/blog/01_aoc_coq.md b/content/blog/01_aoc_coq.md index 96a0820..3b8264c 100644 --- a/content/blog/01_aoc_coq.md +++ b/content/blog/01_aoc_coq.md @@ -397,10 +397,7 @@ Among those is `t`, the type an integer in such an arbitrary implementation. We will not make an assumption about how the integers are implemented, and simply use this generic `t` from now on. -#### Semantics in Coq - -Now that we've seen finite sets and vectors, it's time to use them to -encode our semantics in Coq. Let's start with jumps. Suppose we wanted to write a function that _does_ return a valid program +Now, suppose we wanted to write a function that _does_ return a valid program counter after adding the offset to it. Since it's possible for this function to fail (for instance, if the offset is very negative), it has to return `option (fin (S n))`. That is, this function may either fail (returning `None`) or succeed, returning @@ -409,11 +406,93 @@ the function in Coq (again, don't worry too much about the definition): {{< codelines "Coq" "aoc-2020/day8.v" 61 61 >}} -But earlier, didn't we say: +We will make use of this function when we define and verify our semantics. +Let's take a look at that next. + +#### Semantics in Coq + +Now that we've seen finite sets and vectors, it's time to use them to +encode our semantics in Coq. Before we do anything else, we need +to provide Coq definitions for the various components of our +language, much like what we did with `tinylang`. We can start with opcodes: + +{{< codelines "Coq" "aoc-2020/day8.v" 20 23 >}} + +Now we can define a few other parts of our language and semantics, namely +states, instructions and programs (which I called "inputs" since, we'll, they're +our puzzle input). A state is simply the 3-tuple of the program counter, the set +of valid program counters, and the accumulator. We write it as follows: + +{{< codelines "Coq" "aoc-2020/day8.v" 33 33 >}} + +The star `*` is used here to represent a [product type](https://en.wikipedia.org/wiki/Product_type) +rather than arithmetic multiplication. Our state type accepts an argument, +`n`, much like a finite natural number or a vector. In fact, this `n` is passed on +to the state's program counter and set types. Rightly, a state for a program +of length \\(n\\) will not be of the same type as a state for a program of length \\(n+1\\). + +An instruction is also a tuple, but this time containing only two elements: the opcode and +the number. We write this as follows: + +{{< codelines "Coq" "aoc-2020/day8.v" 36 36 >}} + +Finally, we have to define the type of a program. This type will also be +indexed by `n`, the program's length. A program of length `n` is simply a +vector of instructions `inst` of length `n`. This leads to the following +definition: + +{{< codelines "Coq" "aoc-2020/day8.v" 38 38 >}} + +So far, so good! Finally, it's time to get started on the semantics themselves. +We begin with the inductive definition of \\((\\rightarrow_i)\\). +I think this is fairly straightforward. However, we do use +`t` instead of \\(n\\) from the rules, and we use `FS` +instead of \\(+1\\). Also, we make the formerly implicit +assumption that \\(c+n\\) is valid explicit, by +providing a proof that `valid_jump_t pc t = Some pc'`. + +{{< codelines "Coq" "aoc-2020/day8.v" 103 110 >}} + +Next, it will help us to combine the premises for a +"failed" and "ok" terminations into Coq data types. +This will make it easier for us to formulate a lemma later on. +Here are the definitions: + +{{< codelines "Coq" "aoc-2020/day8.v" 112 117 >}} + +Since all of out "termination" rules start and +end in the same state, there's no reason to +write that state twice. Thus, both `done` +and `stuck` only take the input `inp`, +and the state, which includes the accumulator +`acc`, set of allowed program counters `v`, and +the program counter at which the program came to an end. +When the program terminates successfully, this program +counter will be equal to the length of the program `n`, +so we use `nat_to_fin n`. On the other hand, if the program +terminates in as stuck state, it must be that it terminated +at a program counter that points to an instruction. Thus, this +program counter is actually a \\(\\text{Fin} \\; n\\), and not +a \\(\\text{Fin} \\ (n+1)\\), and is not in the set of allowed program counters. +We use the same "weakening" trick we saw earlier to represent +this. + +Finally, we encode the three inference rules we came up with: + +{{< codelines "Coq" "aoc-2020/day8.v" 119 126 >}} + +Notice that we fused two of the premises in the last rule. +Instead of naming the instruction at the current program +counter and using it in another premise, we simply use +`nth inp pc`, which corresponds to \\(p[c]\\) in our +"paper" semantics. + +Before we go on writing some actual proofs, we have +one more thing we have to address. Earlier, we said: > All jumps will lead either to an instruction, or right to the end of a program. -To make Coq aware of this constraint, we'll have to formalize this notion. To +To make Coq aware of this constraint, we'll have to formalize it. To start off, we'll define the notion of a "valid instruction", which is guaranteed to keep the program counter in the correct range. There are a couple of ways to do this, but we'll use yet another definition based @@ -423,7 +502,7 @@ is perfectly valid for a program with thousands of instructions, but if it occur in a program with only 3 instructions, it will certainly lead to disaster. Specifically, the validity of an instruction depends on the length of the program in which it resides, and the program counter at which it's encountered. -Thus, we refine our idea of validity to "being valid for a program of length n at program counter f". +Thus, we refine our idea of validity to "being valid for a program of length \\(n\\) at program counter \\(f\\)". For this, we can use the following two inference rules: {{< latex >}} @@ -444,11 +523,12 @@ which, as we discussed above, is perfectly valid. The second rule works for the other two instructions. It has an extra premise: the result of `jump_valid_t` (written as \\(J_v\\)) has to be \\(\\text{Some} \\; c'\\), -that is, `jump_valid_t` must succeed. Now, if an instruction satisfies these validity -rules for a given program at a given program counter, evaluating it will always -result in a program counter that has a proper value. +that is, `jump_valid_t` must succeed. Note that we require this even for no-ops, +since it later turns out of the them may be a jump after all. -We encode this in Coq as follows: +We now have our validity rules. If an instruction satisfies them for a given program +and at a given program counter, evaluating it will always result in a program counter that has a proper value. +We encode the rules in Coq as follows: {{< codelines "Coq" "aoc-2020/day8.v" 152 157 >}} @@ -470,44 +550,191 @@ encode this in Coq, too: {{< codelines "Coq" "aoc-2020/day8.v" 160 161 >}} -In the above, we use `input n` to mean "a program of length `n`". -This is just an alias for `vect inst n`, a vector of instructions -of length `n`. In the above, `n` is made implicit where possible. +In the above, `n` is made implicit where possible. Since \\(c\\) (called `pc` in the code) is of type \\(\\text{Fin} \\; n\\), there's no -need to write \\(n\\) _again_. +need to write \\(n\\) _again_. The curly braces tell Coq to infer that +argument where possible. -Finally, it's time to get started on the semantics themselves. -We start with the inductive definition of \\((\\rightarrow_i)\\). -I think this is fairly straightforward. We use -`t` instead of \\(n\\) from the rules, and we use `FS` -instead of \\(+1\\). Also, we make the formerly implicit -assumption that \\(c+n\\) is valid explicit, by -providing a proof that `valid_jump_t pc t = Some pc'`. +### Proving Termination +Here we go! It's finally time to make some claims about our +definitions. Who knows - maybe we wrote down total garbage! +We will be creating several related lemmas and theorems. +All of them share two common assumptions: -{{< codelines "Coq" "aoc-2020/day8.v" 103 110 >}} +* We have some valid program `inp` of length `n`. +* This program is a valid input, that is, `valid_input` holds for it. +There's no sense in arguing based on an invalid input program. -Next, it will help us to combine the premises for a -"failed" and "ok" terminations into Coq data types. -This will help us formulate a lemma later on. Here they are: +We represent these grouped assumptions by opening a Coq +`Section`, which we call `ValidInput`, and listing our assumptions: -{{< codelines "Coq" "aoc-2020/day8.v" 112 117 >}} +{{< codelines "Coq" "aoc-2020/day8.v" 163 166 >}} -Since all of out "termination" rules start and -end in the same state, there's no reason to -write that state twice. Thus, both `done` -and `stuck` only take the input `inp`, -and the state, which includes the accumulator -`acc`, set of allowed program counters `v`, and -the program counter at which the program came to an end. -When the program terminates successfully, this program -counter will be equal to the length of the program `n`, -so we use `nat_to_fin n`. On the other hand, if the program -terminates in as stuck state, it must be that it terminated -at a program counter that points to an instruction. Thus, this -program counter is actually a \\(\\text{Fin} \\; n\\), and not -a \\(\\text{Fin} \\ (n+1)\\) (we use the same "weakening" trick -we saw earlier), and is not in the set of allowed program counters. +We had to also explicitly mention the length `n` of our program. +From now on, the variables `n`, `inp`, and `Hv` will be +available to all of the proofs we write in this section. +The first proof is rather simple. The claim is: -Finally, we encode the three inference rules we came up with: +> For our valid program, at any program counter `pc` +and accumulator `acc`, there must exists another program +counter `pc'` and accumulator `acc'` such that the +instruction evaluation relation \\((\rightarrow_i)\\) +connects the two. That is, valid addresses aside, +we can always make a step. -{{< codelines "Coq" "aoc-2020/day8.v" 119 126 >}} +Here is this claim encoded in Coq: + +{{< codelines "Coq" "aoc-2020/day8.v" 168 169 >}} + +We start our proof by introducing all the relevant variables into +the global context. I've mentioned this when I wrote about +day 1, but here's the gist: the `intros` keyword takes +variables from a `forall`, and makes them concrete. +In short, `intros x` is very much like saying "suppose +we have an `x`", and going on with the proof. + +{{< codelines "Coq" "aoc-2020/day8.v" 170 171 >}} + +Here, we said "take any program counter `pc` and any +accumulator `acc`". Now what? Well, first of all, +we want to take a look at the instruction at the current +`pc`. We know that this instruction is a combination +of an opcode and a number, so we use `destruct` to get +access to both of these parts: + +{{< codelines "Coq" "aoc-2020/day8.v" 172 172 >}} + +Now, Coq reports the following proof state: + +``` +1 subgoal + +n : nat +inp : input n +Hv : valid_input inp +pc : Fin.t n +acc : t +o : opcode +t0 : t +Hop : nth inp pc = (o, t0) + +========================= (1 / 1) + +exists (pc' : fin (S n)) (acc' : t), + step_noswap (o, t0) (pc, acc) (pc', acc') +``` + +We have some opcode `o`, and some associated number +`t0`, and we must show that there exist a `pc'` +and `acc'` to which we can move on. To prove +that something exists in Coq, we must provide +an instance of that "something". If we claim +that there exists a dog that's not a good boy, +we better have this elusive creature in hand. +In other words, proofs in Coq are [constructive](https://en.wikipedia.org/wiki/Constructive_proof). +Without knowing the kind of operation we're dealing with, we can't +say for sure how the step will proceed. Thus, we proceed by +case analysis on `o`. + +{{< codelines "Coq" "aoc-2020/day8.v" 173 173 >}} + +There are three possible cases we have to consider, +one for each type of instruction. + +* If the instruction is \\(\\texttt{add}\\), we know +that `pc' = pc + 1` and `acc' = acc + t0`. That is, +the program counter is simply incremented, and the accumulator +is modified with the number part of the instruction. +* If the instruction is \\(\\texttt{nop}\\), the program +coutner will again be incremented (`pc' = pc + 1`), +but the accumulator will stay the same, so `acc' = acc`. +* If the instruction is \\(\\texttt{jmp}\\), things are +more complicated. We must rely on the assumption +that our input is valid, which tells us that adding +`t0` to our `pc` will result in `Some f`, and not `None`. +Given this, we have `pc' = f`, and `acc' = acc`. + +This is how these three cases are translated to Coq: + +{{< codelines "Coq" "aoc-2020/day8.v" 174 177 >}} + +For the first two cases, we simply provide the +values we expect for `pc'` and `acc'`, and +apply the corresponding inference rule that +is satisfied by these values. For the third case, we have +to invoke `Hv`, the hypothesis that our input is valid. +In particular, we care about the instruction at `pc`, +so we use `specialize` to plug `pc` into the more general +hypothesis. We then replace `nth inp pc` with its known +value, `(jmp, t0)`. This tells us the following, in Coq's words: + +``` +Hv : valid_inst (jmp, t0) pc +``` + +That is, `(jmp, t0)` is a valid instruction at `pc`. Then, using +Coq's `inversion` tactic, we ask: how is this possible? There is +only one inference rule that gives us such a conclusion, and it is named `valid_inst_jmp` +in our Coq code. Since we have a proof that our `jmp` is valid, +it must mean that this rule was used. Furthermore, sicne this +rule requires that `valid_jump_t` evaluates to `Some f'`, we know +that this must be the case here! Coq now has adds the following +two lines to our proof state: + +``` +f' : fin (S n) +H0 : valid_jump_t pc t0 = Some f' +``` + +Finally, we specify, as mentioned earlier, that `pc' = f'` and `acc' = acc`. +As before, we apply the corresponding step rule for `jmp`. When it asks +for a proof that `valid_jump_t` produces a valid program counter, +we hand it `H0` using `apply H0`. And with that, Coq is happy! + +Next, we prove a claim that a valid program can always do _something_, +and that something is one of three things: + +* It can terminate in the "ok" state if the program counter +reaches the programs' end. +* It can terminate with an error if it's currently at a program +counter that is not included in the valid set. +* Otherwise, it can run the current instruction and advance +to a "next" state. + +Alternatively, we could say that one of the inference rules +for \\((\\Rightarrow_p)\\) must apply. This is not the case if the input +is not valid, since, as I said +before, an arbitrary input program can lead us to jump +to a negative address (or to an address _way_ past the end of the program). +Here's the claim, translated to Coq: + +{{< codelines "Coq" "aoc-2020/day8.v" 181 186 >}} + +Informally, we can prove this as follows: + +* If the current program counter is equal to the length +of the program, we've reached the end. Thus, the program +can terminate in the "ok" state. +* Otherwise, the current program counter must be +less than the length of the program. + * If we've already encountered this program counter (that is, + if it's gone from the set of valid program counters), + then the program will terminate in the "error" state. + * Otherwise, the program counter is in the set of + valid instructions. By our earlier theorem, in a valid + program, the instruction at any program counter can be correctly + executed, taking us to the next state. Now too + our program can move to this next state. + +Below is the Coq translation of the above. + +{{< codelines "Coq" "aoc-2020/day8.v" 187 203 >}} + +It doesn't seem like we're that far from being done now. +A program can always take a step, and each time it does, +the set of valid program counters decreases in size. Eventually, +this set will become empty, so if nothing else, our program will +eventually terminate in an "error" state. Thus, it will stop +running no matter what. + +The problem is: how do we express that?