Add the initial version of the Dawn article.
This commit is contained in:
		
							parent
							
								
									72259c16a9
								
							
						
					
					
						commit
						c214d9ee37
					
				
							
								
								
									
										179
									
								
								code/dawn/Dawn.v
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										179
									
								
								code/dawn/Dawn.v
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,179 @@ | ||||
| Require Import Coq.Lists.List. | ||||
| From Ltac2 Require Import Ltac2. | ||||
| 
 | ||||
| Inductive intrinsic := | ||||
|   | swap | ||||
|   | clone | ||||
|   | drop | ||||
|   | quote | ||||
|   | compose | ||||
|   | apply. | ||||
| 
 | ||||
| Inductive expr := | ||||
|   | e_int (i : intrinsic) | ||||
|   | e_quote (e : expr) | ||||
|   | e_comp (e1 e2 : expr). | ||||
| 
 | ||||
| Definition e_compose (e : expr) (es : list expr) := fold_left e_comp es e. | ||||
| 
 | ||||
| Inductive IsValue : expr -> Prop := | ||||
|   | Val_quote : forall {e : expr}, IsValue (e_quote e). | ||||
| 
 | ||||
| Definition value := { v : expr & IsValue v }. | ||||
| Definition value_stack := list value. | ||||
| 
 | ||||
| Definition v_quote (e : expr) := existT IsValue (e_quote e) Val_quote. | ||||
| 
 | ||||
| Inductive Sem_int : value_stack -> intrinsic -> value_stack -> Prop := | ||||
|   | Sem_swap : forall (v v' : value) (vs : value_stack), Sem_int (v' :: v :: vs) swap (v :: v' :: vs) | ||||
|   | Sem_clone : forall (v : value) (vs : value_stack), Sem_int (v :: vs) clone (v :: v :: vs) | ||||
|   | Sem_drop : forall (v : value) (vs : value_stack), Sem_int (v :: vs) drop vs | ||||
|   | Sem_quote : forall (v : value) (vs : value_stack), Sem_int (v :: vs) quote ((v_quote (projT1 v)) :: vs) | ||||
|   | Sem_compose : forall (e e' : expr) (vs : value_stack), Sem_int (v_quote e' :: v_quote e :: vs) compose (v_quote (e_comp e e') :: vs) | ||||
|   | Sem_apply : forall (e : expr) (vs vs': value_stack), Sem_expr vs e vs' -> Sem_int (v_quote e :: vs) apply vs' | ||||
| 
 | ||||
| with Sem_expr : value_stack -> expr -> value_stack -> Prop := | ||||
|   | Sem_e_int : forall (i : intrinsic) (vs vs' : value_stack), Sem_int vs i vs' -> Sem_expr vs (e_int i) vs' | ||||
|   | Sem_e_quote : forall (e : expr) (vs : value_stack), Sem_expr vs (e_quote e) (v_quote e :: vs) | ||||
|   | Sem_e_comp : forall (e1 e2 : expr) (vs1 vs2 vs3 : value_stack), | ||||
|       Sem_expr vs1 e1 vs2 -> Sem_expr vs2 e2 vs3 -> Sem_expr vs1 (e_comp e1 e2) vs3. | ||||
| 
 | ||||
| Definition false : expr := e_quote (e_int drop). | ||||
| Definition false_v : value := v_quote (e_int drop). | ||||
| 
 | ||||
| Definition true : expr := e_quote (e_comp (e_int swap) (e_int drop)). | ||||
| Definition true_v : value := v_quote (e_comp (e_int swap) (e_int drop)). | ||||
| 
 | ||||
| Theorem false_correct : forall (v v' : value) (vs : value_stack), Sem_expr (v' :: v :: vs) (e_comp false (e_int apply)) (v :: vs). | ||||
| Proof. | ||||
|   intros v v' vs. | ||||
|   eapply Sem_e_comp. | ||||
|   - apply Sem_e_quote. | ||||
|   - apply Sem_e_int. apply Sem_apply. apply Sem_e_int. apply Sem_drop. | ||||
| Qed. | ||||
| 
 | ||||
| Theorem true_correct : forall (v v' : value) (vs : value_stack), Sem_expr (v' :: v :: vs) (e_comp true (e_int apply)) (v' :: vs). | ||||
| Proof. | ||||
|   intros v v' vs. | ||||
|   eapply Sem_e_comp. | ||||
|   - apply Sem_e_quote. | ||||
|   - apply Sem_e_int. apply Sem_apply. eapply Sem_e_comp. | ||||
|     * apply Sem_e_int. apply Sem_swap. | ||||
|     * apply Sem_e_int. apply Sem_drop. | ||||
| Qed. | ||||
| 
 | ||||
| Definition or : expr := e_comp (e_int clone) (e_int apply). | ||||
| 
 | ||||
| Theorem or_false_v : forall (v : value) (vs : value_stack), Sem_expr (false_v :: v :: vs) or (v :: vs). | ||||
| Proof with apply Sem_e_int. | ||||
|   intros v vs. | ||||
|   eapply Sem_e_comp... | ||||
|   - apply Sem_clone. | ||||
|   - apply Sem_apply... apply Sem_drop. | ||||
| Qed. | ||||
| 
 | ||||
| Theorem or_true : forall (v : value) (vs : value_stack), Sem_expr (true_v :: v :: vs) or (true_v :: vs). | ||||
| Proof with apply Sem_e_int. | ||||
|   intros v vs. | ||||
|   eapply Sem_e_comp... | ||||
|   - apply Sem_clone... | ||||
|   - apply Sem_apply. eapply Sem_e_comp... | ||||
|     * apply Sem_swap. | ||||
|     * apply Sem_drop. | ||||
| Qed. | ||||
| 
 | ||||
| Definition or_false_false := or_false_v false_v. | ||||
| Definition or_false_true := or_false_v true_v. | ||||
| Definition or_true_false := or_true false_v. | ||||
| Definition or_true_true := or_true true_v. | ||||
| 
 | ||||
| Fixpoint quote_n (n : nat) := | ||||
|   match n with | ||||
|   | O => e_int quote | ||||
|   | S n' => e_compose (quote_n n') (e_int swap :: e_int quote :: e_int swap :: e_int compose :: nil) | ||||
|   end. | ||||
| 
 | ||||
| Theorem quote_2_correct : forall (v1 v2 : value) (vs : value_stack), | ||||
|     Sem_expr (v2 :: v1 :: vs) (quote_n 1) (v_quote (e_comp (projT1 v1) (projT1 v2)) :: vs). | ||||
| Proof with apply Sem_e_int. | ||||
|   intros v1 v2 vs. simpl. | ||||
|   repeat (eapply Sem_e_comp)... | ||||
|   - apply Sem_quote. | ||||
|   - apply Sem_swap. | ||||
|   - apply Sem_quote. | ||||
|   - apply Sem_swap. | ||||
|   - apply Sem_compose. | ||||
| Qed. | ||||
| 
 | ||||
| Theorem quote_3_correct : forall (v1 v2 v3 : value) (vs : value_stack), | ||||
|   Sem_expr (v3 :: v2 :: v1 :: vs) (quote_n 2) (v_quote (e_comp (projT1 v1) (e_comp (projT1 v2) (projT1 v3))) :: vs). | ||||
| Proof with apply Sem_e_int. | ||||
|   intros v1 v2 v3 vs. simpl. | ||||
|   repeat (eapply Sem_e_comp)... | ||||
|   - apply Sem_quote. | ||||
|   - apply Sem_swap. | ||||
|   - apply Sem_quote. | ||||
|   - apply Sem_swap. | ||||
|   - apply Sem_compose. | ||||
|   - apply Sem_swap. | ||||
|   - apply Sem_quote. | ||||
|   - apply Sem_swap. | ||||
|   - apply Sem_compose. | ||||
| Qed. | ||||
| 
 | ||||
| Ltac2 rec solve_basic () := Control.enter (fun _ => | ||||
|   match! goal with | ||||
|   | [|- Sem_int ?vs1 swap ?vs2] => apply Sem_swap | ||||
|   | [|- Sem_int ?vs1 clone ?vs2] => apply Sem_clone | ||||
|   | [|- Sem_int ?vs1 drop ?vs2] => apply Sem_drop | ||||
|   | [|- Sem_int ?vs1 quote ?vs2] => apply Sem_quote | ||||
|   | [|- Sem_int ?vs1 compose ?vs2] => apply Sem_compose | ||||
|   | [|- Sem_int ?vs1 apply ?vs2] => apply Sem_apply | ||||
|   | [|- Sem_expr ?vs1 (e_comp ?e1 ?e2) ?vs2] => eapply Sem_e_comp; solve_basic () | ||||
|   | [|- Sem_expr ?vs1 (e_int ?e) ?vs2] => apply Sem_e_int; solve_basic () | ||||
|   | [|- Sem_e_quote ?vs1 (e_quote ?e) ?vs2] => apply Sem_quote | ||||
|   | [_ : _ |- _] => () | ||||
|   end). | ||||
| 
 | ||||
| Theorem quote_2_correct' : forall (v1 v2 : value) (vs : value_stack), | ||||
|     Sem_expr (v2 :: v1 :: vs) (quote_n 1) (v_quote (e_comp (projT1 v1) (projT1 v2)) :: vs). | ||||
| Proof. intros. simpl. solve_basic (). Qed. | ||||
| 
 | ||||
| Theorem quote_3_correct' : forall (v1 v2 v3 : value) (vs : value_stack), | ||||
|   Sem_expr (v3 :: v2 :: v1 :: vs) (quote_n 2) (v_quote (e_comp (projT1 v1) (e_comp (projT1 v2) (projT1 v3))) :: vs). | ||||
| Proof. intros. simpl. solve_basic (). Qed. | ||||
| 
 | ||||
| Definition rotate_n (n : nat) := e_compose (quote_n n) (e_int swap :: e_int quote :: e_int compose :: e_int apply :: nil). | ||||
| 
 | ||||
| Lemma eval_value : forall (v : value) (vs : value_stack), | ||||
|   Sem_expr vs (projT1 v) (v :: vs). | ||||
| Proof. | ||||
|   intros v vs. | ||||
|   destruct v. destruct i. | ||||
|   simpl. apply Sem_e_quote. | ||||
| Qed. | ||||
| 
 | ||||
| Theorem rotate_3_correct : forall (v1 v2 v3 : value) (vs : value_stack), | ||||
|   Sem_expr (v3 :: v2 :: v1 :: vs) (rotate_n 1) (v1 :: v3 :: v2 :: vs). | ||||
| Proof. | ||||
|   intros. unfold rotate_n. simpl. solve_basic (). | ||||
|   repeat (eapply Sem_e_comp); apply eval_value. | ||||
| Qed. | ||||
| 
 | ||||
| Theorem rotate_4_correct : forall (v1 v2 v3 v4 : value) (vs : value_stack), | ||||
|   Sem_expr (v4 :: v3 :: v2 :: v1 :: vs) (rotate_n 2) (v1 :: v4 :: v3 :: v2 :: vs). | ||||
| Proof. | ||||
|   intros. unfold rotate_n. simpl. solve_basic (). | ||||
|   repeat (eapply Sem_e_comp); apply eval_value. | ||||
| Qed. | ||||
| 
 | ||||
| Theorem e_comp_assoc : forall (e1 e2 e3 : expr) (vs vs' : value_stack), | ||||
|   Sem_expr vs (e_comp e1 (e_comp e2 e3)) vs' <-> Sem_expr vs (e_comp (e_comp e1 e2) e3) vs'. | ||||
| Proof. | ||||
|   intros e1 e2 e3 vs vs'. | ||||
|   split; intros Heval. | ||||
|   - inversion Heval; subst. inversion H4; subst. | ||||
|     eapply Sem_e_comp. eapply Sem_e_comp. apply H2. apply H3. apply H6. | ||||
|   - inversion Heval; subst. inversion H2; subst. | ||||
|     eapply Sem_e_comp. apply H3. eapply Sem_e_comp. apply H6. apply H4. | ||||
| Qed. | ||||
							
								
								
									
										375
									
								
								content/blog/coq_dawn.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										375
									
								
								content/blog/coq_dawn.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,375 @@ | ||||
| --- | ||||
| title: "Formalizing Dawn in Coq" | ||||
| date: 2021-11-20T19:04:57-08:00 | ||||
| tags: ["Coq", "Dawn"] | ||||
| draft: true | ||||
| --- | ||||
| 
 | ||||
| The [_Foundations of Dawn_](https://www.dawn-lang.org/posts/foundations-ucc/) article came up | ||||
| on [Lobsters](https://lobste.rs/s/clatuv/foundations_dawn_untyped_concatenative) recently. | ||||
| In this article, the author of Dawn defines a core calculus for the language, and provides its | ||||
| semantics. The definitions seemed so clean and straightforward that I wanted to try my hand at | ||||
| translating them into machine-checked code. I am most familiar with [Coq](https://coq.inria.fr/), | ||||
| and that's what I reached for when making this attempt. | ||||
| 
 | ||||
| ### Defining the Syntax | ||||
| #### Expressions and Intrinsics | ||||
| For the most part, this is the easy part. A Dawn expression is one of three things: | ||||
| 
 | ||||
| * An "intrinsic", written \\(i\\), which is akin to a built-in function or command. | ||||
| * A "quote", written \\([e]\\), which takes a Dawn expression \\(e\\) and moves it onto the stack (Dawn is stack-based). | ||||
| * A composition of several expressions, written \\(e_1\\ e_2\\ \\ldots\\ e_n\\), which effectively evaluates them in order. | ||||
| 
 | ||||
| This is straightforward to define in Coq, but I'm going to make a little simplifying change. | ||||
| Instead of making "composition of \\(n\\) expressions" a core language feature, I'll only | ||||
| allow "composition of \\(e_1\\) and \\(e_2\\)", written \\(e_1\\ e_2\\). This change does not | ||||
| in any way reduce the power of the language; we can still | ||||
| {{< sidenote "right" "assoc-note" "write \(e_1\ e_2\ \ldots\ e_n\) as \((e_1\ e_2)\ \ldots\ e_n\)." >}} | ||||
| The same expression can, of course, be written as \(e_1\ \ldots\ (e_{n-1}\ e_n)\). | ||||
| So, which way should we <em>really</em> use when translating the many-expression composition | ||||
| from the Dawn article into the two-expression composition I am using here? Well, the answer is, | ||||
| it doesn't matter; expression composition is <em>associative</em>, so both ways effectively mean | ||||
| the same thing.<br> | ||||
| <br> | ||||
| This is quite similar to what we do in algebra: the regular old addition operator, \(+\) is formally | ||||
| only defined for pairs of numbers, like \(a+b\). However, no one really bats an eye when we | ||||
| write \(1+2+3\), because we can just insert parentheses any way we like, and get the same result: | ||||
| \((1+2)+3\) is the same as \(1+(2+3)\). | ||||
| {{< /sidenote >}} | ||||
| With that in mind, we can translate each of the three types of expressions in Dawn into cases | ||||
| of an inductive data type in Coq. | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 12 15 >}} | ||||
| 
 | ||||
| Why do we need `e_int`? We do because a token like \\(\\text{swap}\\) can be viewed | ||||
| as belonging to the set of intrinsics \\(i\\), or the set of expressions, \\(e\\). While writing | ||||
| down the rules in mathematical notation, what exactly the token means is inferred from context - clearly | ||||
| \\(\\text{swap}\\ \\text{drop}\\) is an expression built from two other expressions. In statically-typed | ||||
| functional languages (like Coq or Haskell), however, the same expression can't belong to two different, | ||||
| arbitrary types. Thus, to turn an intrinsic into an expression, we need to wrap it up in a constructor, | ||||
| which we called `e_int` here. Other than that, `e_quote` accepts as argument another expression, `e` (the | ||||
| thing being quoted), and `e_comp` accepts two expressions, `e1` and `e2` (the two sub-expressions being composed). | ||||
| 
 | ||||
| The definition for intrinsics themselves is even simpler: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 4 10 >}} | ||||
| 
 | ||||
| We simply define a constructor for each of the six intrinsics. Since none of the intrinsic | ||||
| names are reserved in Coq, we can just call our constructors exactly the same as their names | ||||
| in the written formalization. | ||||
| 
 | ||||
| #### Values and Value Stacks | ||||
| Values are up next. My initial temptation was to define a value much like | ||||
| I defined an intrinsic expression: by wrapping an expression in a constructor for a new data | ||||
| type. Something like: | ||||
| 
 | ||||
| ```Coq | ||||
| Inductive value := | ||||
|     | v_quot (e : expr). | ||||
| ``` | ||||
| 
 | ||||
| Then, `v_quot (e_int swap)` would be the Coq translation of the expression \\([\\text{swap}]\\).  | ||||
| However, I didn't decide on this approach for two reasons: | ||||
| 
 | ||||
| * There are now two ways to write a quoted expression: either `v_quote e` to represent | ||||
|   a quoted expression that is a value, or `e_quote e` to represent a quoted expression | ||||
|   that is just an expression. In the extreme case, the value \\([[e]]\\) would | ||||
|   be represented by `v_quote (e_quote e)` - two different constructors for the same concept, | ||||
|   in the same expression! | ||||
| * When formalizing the lambda calculus, | ||||
|   [Programming Language Foundations](https://softwarefoundations.cis.upenn.edu/plf-current/Stlc.html) | ||||
|   uses an inductively-defined property to indicate values. In the simply typed lambda calculus, | ||||
|   much like in Dawn, values are a subset of expressions. | ||||
| 
 | ||||
| I took instead the approach from Programming Language Foundations: a value is merely an expression | ||||
| for which some predicate, `IsValue`, holds. We will define this such that `IsValue (e_quote e)` is provable, | ||||
| but also such that here is no way to prove `IsValue (e_int swap)`, since _that_ expression is not | ||||
| a value. But what does "provable" mean, here? | ||||
| 
 | ||||
| By the [Curry-Howard correspondence](https://en.wikipedia.org/wiki/Curry%E2%80%93Howard_correspondence), | ||||
| a predicate is just a function that takes _something_ and returns a type. Thus, if \\(\\text{Even}\\) | ||||
| is a predicate, then \\(\\text{Even}\\ 3\\) is actually a type. Since \\(\\text{Even}\\) takes | ||||
| numbers in, it is a predicate on numbers. Our \\(\\text{IsValue}\\) predicate will be a predicate | ||||
| on expressions, instead. In Coq, we can write this as: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 19 19 >}} | ||||
| 
 | ||||
| You might be thinking, | ||||
| 
 | ||||
| > Huh, `Prop`? But you just said that predicates return types! | ||||
| 
 | ||||
| This is a good observation; In Coq, `Prop` is a special sort of type that corresponds to logical | ||||
| propositions. It's special for a few reasons, but those reasons are beyond the scope of this post; | ||||
| for our purposes, it's sufficient to think of `IsValue e` as a type. | ||||
| 
 | ||||
| Alright, so what good is this new `IsValue e` type? Well, we will define `IsValue` such that | ||||
| this type is only _inhabited_ if `e` is a value according to the Dawn specification. A type | ||||
| is inhabited if and only if we can find a value of that type. For instance, the type of natural | ||||
| numbers, `nat`, is inhabited, because any number, like `0`, has this type. Uninhabited types | ||||
| are harder to come by, but take as an example the type `3 = 4`, the type of proofs that three is equal | ||||
| to four. Three is _not_ equal to four, so we can never find a proof of equality, and thus, `3 = 4` is | ||||
| uninhabited. As I said, `IsValue e` will only be inhabited if `e` is a value per the formal | ||||
| specification of Dawn; specifically, this means that `e` is a quoted expression, like `e_quote e'`. | ||||
| 
 | ||||
| To this end, we define `IsValue` as follows: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 19 20 >}} | ||||
| 
 | ||||
| Now, `IsValue` is a new data type with only only constructor, `ValQuote`. For any expression `e`, | ||||
| this constructor creates a value of type `IsValue (e_quote e)`. Two things are true here: | ||||
| 
 | ||||
| * Since `Val_quote` accepts any expression `e` to be put inside `e_quote`, we can use | ||||
|   `Val_quote` to create an `IsValue` instance for any quoted expression. | ||||
| * Because `Val_quote` is the _only_ constructor, and because it always returns `IsValue (e_quote e)`, | ||||
|   there's no way to get `IsValue (e_int i)`, or anything else. | ||||
| 
 | ||||
| Thus, `IsValue e` is inhabited if and only if `e` is a Dawn value, as we intended. | ||||
| 
 | ||||
| Just one more thing. A value is just an expression, but Coq only knows about this as long | ||||
| as there's an `IsValue` instance around to vouch for it. To be able to reason about values, then, | ||||
| we will need both the expression and its `IsValue` proof. Thus, we define the type `value` to mean | ||||
| a pair of two things: an expression `v` and a proof that it's a value, `IsValue v`: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 22 22 >}} | ||||
| 
 | ||||
| A value stack is just a list of values: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 23 23 >}} | ||||
| 
 | ||||
| ### Semantics | ||||
| Remember our `IsValue` predicate? Well, it's not just any predicate, it's a _unary_ predicate. | ||||
| _Unary_ means that it's a predicate that only takes one argument, an expression in our case. However, | ||||
| this is far from the only type of predicate. Here are some examples: | ||||
| 
 | ||||
| * Equality, `=`, is a binary predicate in Coq. It takes two arguments, say `x` and `y`, and builds | ||||
|   a type `x = y` that is only inhabited if `x` and `y` are equal. | ||||
| * The mathematical "less than" relation is also a binary predicate, and it's called `le` in Coq. | ||||
|   It takes two numbers `n` and `m` and returns a type `le n m` that is only inhabited if `n` is less | ||||
|   than or equal to `m`. | ||||
| * The evaluation relation in Dawn is a ternary predicate. It takes two stacks, `vs` and `vs'`, | ||||
|   and an expression, `e`, and creates a type that's inhabited if and only if evaluating | ||||
|   `e` starting at a stack `vs` results in the stack `vs'`. | ||||
| 
 | ||||
| Binary predicates are just functions of two inputs that return types. For instance, here's what Coq has | ||||
| to say about the type of `eq`: | ||||
| 
 | ||||
| ``` | ||||
| eq : ?A -> ?A -> Prop | ||||
| ``` | ||||
| 
 | ||||
| By a similar logic, ternary predicates, much like Dawn's evaluation relation, are functions | ||||
| of three inputs. We can thus write the type of our evaluation relation as follows: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 35 35 >}} | ||||
| 
 | ||||
| We define the constructors just like we did in our `IsValue` predicate. For each evaluation | ||||
| rule in Dawn, such as: | ||||
| 
 | ||||
| {{< latex >}} | ||||
| \langle V, v, v'\rangle\ \text{swap}\ \rightarrow\ \langle V, v', v \rangle | ||||
| {{< /latex >}} | ||||
| 
 | ||||
| We introduce a constructor. For the `swap` rule mentioned above, the constructor looks like this: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 28 28 >}} | ||||
| 
 | ||||
| Although the stacks are written in reverse order (which is just a consequence of Coq's list notation), | ||||
| I hope that the correspondence is fairly clear. If it's not, try reading this rule out loud: | ||||
| 
 | ||||
| > The rule `Sem_swap` says that for every two values `v` and `v'`, and for any stack `vs`, | ||||
|   evaluating `swap` in the original stack `v' :: v :: vs`, aka \\(\\langle V, v, v'\\rangle\\), | ||||
|   results in a final stack `v :: v' :: vs`, aka \\(\\langle V, v', v\\rangle\\). | ||||
| 
 | ||||
| With that in mind, here's a definition of a predicate `Sem_int`, the evaluation predicate | ||||
| for intrinsics: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 27 33 >}} | ||||
| 
 | ||||
| Hey, what's all this with `v_quote` and `projT1`? It's just a little bit of bookkeeping. | ||||
| Given a value -- a pair of an expression `e` and a proof `IsValue e` -- the function `projT1` | ||||
| just returns the expression `e`. That is, it's basically a way of converting a value back into | ||||
| an expression. The function `v_quote` takes us in the other direction: given an expression \\(e\\), | ||||
| it constructs a quoted expression \\([e]\\), and combines it with a proof that the newly constructed | ||||
| quote is a value. | ||||
| 
 | ||||
| The above two function in combination help us define the `quote` intrinsic, which | ||||
| wraps a value on the stack in an additional layer of quotes. When we create a new quote, we | ||||
| need to push it onto the value stack, so it needs to be a value; we thus use `v_quote`. However, | ||||
| `v_quote` needs an expression to wrap in a quote, so we use `projT1` to extract the expression from | ||||
| the value on top of the stack. | ||||
| 
 | ||||
| In addition to intrinsics, we also define the evaluation relation for actual expressions. | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 35 39 >}} | ||||
| 
 | ||||
| Here, we may as well go through the three constructors to explain what they mean: | ||||
| 
 | ||||
| * `Sem_e_int` says that if the expression being evaluated is an intrinsic, and if the | ||||
|    intrinsic has an effect on the stack as described by `Sem_int` above, then the effect | ||||
|    of the expression itself is the same. | ||||
| * `Sem_e_quote` says that if the expression is a quote, then a corresponding quoted | ||||
|    value is placed on top of the stack. | ||||
| * `Sem_e_comp` says that if one expression `e1` changes the stack from `vs1` to `vs2`, | ||||
|    and if another expression `e2` takes this new stack `vs2` and changes it into `vs3`, | ||||
|    then running the two expressions one after another (i.e. composing them) means starting | ||||
|    at stack `vs1` and ending in stack `vs3`. | ||||
| 
 | ||||
| ### \\(\\text{true}\\), \\(\\text{false}\\), \\(\\text{or}\\) and Proofs | ||||
| Now it's time for some fun! The Dawn language specification starts by defining two values: | ||||
| true and false. Why don't we do the same thing? | ||||
| 
 | ||||
| |Dawn Spec| Coq encoding | | ||||
| |---|----| | ||||
| |\\(\\text{false}\\)=\\([\\text{drop}]\\)| {{< codelines "Coq" "dawn/Dawn.v" 41 42 >}} | ||||
| |\\(\\text{true}\\)=\\([\\text{swap} \\ \\text{drop}]\\)| {{< codelines "Coq" "dawn/Dawn.v" 44 45 >}} | ||||
| 
 | ||||
| Let's try prove that these two work as intended. | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 47 53 >}} | ||||
| 
 | ||||
| This is the first real proof in this article. Rather than getting into the technical details, | ||||
| I invite you to take a look at the "shape" of the proof: | ||||
| 
 | ||||
| * After the initial use of `intros`, which brings the variables `v`, `v`, and `vs` into | ||||
|   scope, we start by applying `Sem_e_comp`. Intuitively, this makes sense - at the top level, | ||||
|   our expression, \\(\\text{false}\\ \\text{apply}\\), | ||||
|   is a composition of two other expressions, \\(\\text{false}\\) and \\(\\text{apply}\\). | ||||
|   Because of this, we need to use the rule from our semantics that corresponds to composition. | ||||
| * The composition rule requires that we describe the individual effects on the stack of the | ||||
|   two constituent expressions (recall that the first expression takes us from the initial stack `v1` | ||||
|   to some intermediate stack `v2`, and the second expression takes us from that stack `v2` to the | ||||
|   final stack `v3`). Thus, we have two "bullet points": | ||||
|   * The first expression, \\(\\text{false}\\), is just a quoted expression. Thus, the rule | ||||
|     `Sem_e_quote` applies, and the contents of the quote are puhsed onto the stack. | ||||
|   * The second expression, \\(\\text{apply}\\), is an intrinsic, so we need to use the rule | ||||
|     `Sem_e_int`, which handles the intrinsic case. This, in turn, requires that we show | ||||
|     the effect of the intrinsic itself; the `apply` intrinsic evaluates the quoted expression | ||||
|     on the stack. | ||||
|     The quoted expression is contains the body of false, or \\(\\text{drop}\\). This is | ||||
|     once again an intrinsic, so we use `Sem_e_int`; the intrinsic in question is \\(\\text{drop}\\), | ||||
|     so the `Sem_drop` rule takes care of that. | ||||
| 
 | ||||
| Following these steps, we arrive at the fact that evaluating `false` on the stack simply drops the top | ||||
| element, as specified. The proof for \\(\\text{true}\\) is very similar in spirit: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 55 63 >}} | ||||
| 
 | ||||
| We can also formalize the \\(\\text{or}\\) operator: | ||||
| 
 | ||||
| |Dawn Spec| Coq encoding | | ||||
| |---|----| | ||||
| |\\(\\text{or}\\)=\\(\\text{clone}\\ \\text{apply}\\)| {{< codelines "Coq" "dawn/Dawn.v" 65 65 >}} | ||||
| 
 | ||||
| We can write two top-level proofs about how this works: the first says that \\(\\text{or}\\), | ||||
| when the first argument is \\(\\text{false}\\), just returns the second argument (this is in agreement | ||||
| with the truth table, since \\(\\text{false}\\) is the identity element of \\(\\text{or}\\)). | ||||
| The proof proceeds much like before: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 67 73 >}} | ||||
| 
 | ||||
| To shorten the proof a little bit, I used the `Proof with` construct from Coq, which runs | ||||
| an additional _tactic_ (like `apply`) whenever `...` is used. | ||||
| Because of this, in this proof writing `apply Sem_apply...` is the same | ||||
| as `apply Sem_apply. apply Sem_e_int`. Since the `Sem_e_int` rule is used a lot, this makes for a | ||||
| very convenient shorthand. | ||||
| 
 | ||||
| Similarly, we prove that \\(\\text{or}\\) applied to \\(\\text{true}\\) always returns \\(\\text{true}\\). | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 75 83 >}} | ||||
| 
 | ||||
| Finally, the specific facts (like \\(\\text{false}\\ \\text{or} \\text{false}\\) evaluating to \\(\\text{false}\\)) | ||||
| can be expressed using our two new proofs, `or_false_v` and `or_true`. | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 85 88 >}} | ||||
| 
 | ||||
| ### Derived Expressions | ||||
| #### Quotes | ||||
| The Dawn specification defines \\(\\text{quote}_n\\) to make it more convenient to quote | ||||
| multiple terms. For example, \\(\\text{quote}_2\\) composes and quotes the first two values | ||||
| on the stack. This is defined in terms of other Dawn expressions as follows: | ||||
| 
 | ||||
| {{< latex >}} | ||||
| \text{quote}_n = \text{quote}_{n-1}\ \text{swap}\ \text{quote}\ \text{swap}\ \text{compose} | ||||
| {{< /latex >}} | ||||
| 
 | ||||
| We can define this in Coq as follows: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 90 94 >}} | ||||
| 
 | ||||
| This definition diverges slightly from the one given in the Dawn specification; particularly, | ||||
| Dawn's spec mentions that \\(\\text{quote}_n\\) is only defined for \\(n \\geq 1\\).However, | ||||
| this means that in our code, we'd have to somehow handle the error that would arise if the | ||||
| term \\(\\text{quote}\_0\\) is used. Instead, I defined `quote_n n` to simply mean | ||||
| \\(\\text{quote}\_{n+1}\\); thus, in Coq, no matter what `n` we use, we will have a valid | ||||
| expression, since `quote_n 0` will simply correspond to \\(\\text{quote}_1 = \\text{quote}\\). | ||||
| 
 | ||||
| We can now attempt to prove that this definition is correct by ensuring that the examples given | ||||
| in the specification are valid. We may thus write, | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 96 106 >}} | ||||
| 
 | ||||
| We used a new tactic here, `repeat`, but overall, the structure of the proof is pretty straightforward: | ||||
| the definition of `quote_n` consists of many intrinsics, and we apply the corresponding rules | ||||
| one-by-one until we arrive at the final stack. Writing this proof was kind of boring, since | ||||
| I just had to see which intrinsic is being used in each step, and then write a line of `apply` | ||||
| code to handle that intrinsic. This gets worse for \\(\\text{quote}_3\\): | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 108 122 >}} | ||||
| 
 | ||||
| It's so long! Instead, I decided to try out Coq's `Ltac2` mechanism to teach Coq how | ||||
| to write proofs like this itself. Here's what I came up with: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 124 136 >}} | ||||
| 
 | ||||
| You don't have to understand the details, but in brief, this checks what kind of proof | ||||
| we're asking Coq to do (for instance, if we're trying to prove that a \\(\\text{swap}\\) | ||||
| instruction has a particular effect), and tries to apply a corresponding semantic rule. | ||||
| Thus, it will try `Sem_swap` if the expression is \\(\\text{swap}\\),  | ||||
| `Sem_clone` if the expression is \\(\\text{clone}\\), and so on. Then, the two proofs become: | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 138 144 >}} | ||||
| 
 | ||||
| #### Rotations | ||||
| There's a little trick to formalizing rotations. There's an important property of values: | ||||
| when a value is run against a stack, all it does is place itself on a stack. We can state | ||||
| this as follows: | ||||
| 
 | ||||
| {{< latex >}} | ||||
| \langle V \rangle\ v = \langle V\ v \rangle | ||||
| {{< /latex >}} | ||||
| 
 | ||||
| Or, in Coq, | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 148 149 >}} | ||||
| 
 | ||||
| This is the trick to how \\(\\text{rotate}_n\\) works: it creates a quote of \\(n\\) reordered and composed | ||||
| values on the stack, and then evaluates that quote. Since evaluating each value | ||||
| just places it on the stack, these values end up back on the stack, in the same order that they | ||||
| were in the quote. When writing the proof, `solve_basic ()` gets us almost all the way to the | ||||
| end (evaluating a list of values against a stack). Then, we simply apply the composition | ||||
| rule over and over, following it up with `eval_value` to prove that the each value is just being | ||||
| placed back on the stack. | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 156 168 >}} | ||||
| 
 | ||||
| ### `e_comp` is Associative | ||||
| When composing three expressions, which way of pressing parentheses is correct? | ||||
| Is it \\((e_1\\ e_2)\\ e_3\\)? Or is it \\(e_1\\ (e_2\\ e_3)\\)? Well, neither! | ||||
| Expression composition is associative, which means that the order of the parentheses | ||||
| doesn't matter. We state this in the following theorem, which says that the two | ||||
| ways of writing the composition, if they evaluate to anything, evaluate to the same thing. | ||||
| 
 | ||||
| {{< codelines "Coq" "dawn/Dawn.v" 170 171 >}} | ||||
| 
 | ||||
| That's all I've got in me for today. However, we got pretty far! The Dawn specification | ||||
| says: | ||||
| 
 | ||||
| > One of my long term goals for Dawn is to democratize formal software verification in order to make it much more feasible and realistic to write perfect software. | ||||
| 
 | ||||
| I think that Dawn is definitely getting there: formally defining the semantics outlined | ||||
| on the page was quite straightforward! We can now have complete confidence in the behavior | ||||
| of \\(\\text{true}\\), \\(\\text{false}\\), \\(\\text{or}\\), \\(\\text{quote}_n\\) and | ||||
| \\(\\text{rotate}_n\\). The proof of associativity is also enough to possibly argue for simplifying | ||||
| the core calculus' syntax even more. All of this we got from an official source, with only | ||||
| a little bit of tweaking to get from the written description of the language to code! I'm | ||||
| looking forward to reading the next post about the _multistack_ concatenative calculus. | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user