From 37dd9ad6d4fc413fcf663a3e25fa9d0be19a433d Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Sun, 13 Oct 2024 13:31:47 -0700 Subject: [PATCH] Write the semantics section using Bergamot Signed-off-by: Danila Fedorin --- content/blog/05_spa_agda_semantics/index.md | 254 +++++++++++++++++++ content/blog/05_spa_agda_semantics/parser.js | 128 ++++++++++ 2 files changed, 382 insertions(+) create mode 100644 content/blog/05_spa_agda_semantics/parser.js diff --git a/content/blog/05_spa_agda_semantics/index.md b/content/blog/05_spa_agda_semantics/index.md index 0694321..3ce4778 100644 --- a/content/blog/05_spa_agda_semantics/index.md +++ b/content/blog/05_spa_agda_semantics/index.md @@ -5,6 +5,17 @@ description: "In this post, I define the language that well serve as the object date: 2024-08-10T17:37:43-07:00 tags: ["Agda", "Programming Languages"] draft: true +custom_js: ["parser.js"] +bergamot: + render_presets: + default: "bergamot/rendering/imp.bergamot" + input_modes: + - name: "Expression" + fn: "window.parseExpr" + - name: "Basic Statement" + fn: "window.parseBasicStmt" + - name: "Statement" + fn: "window.parseStmt" --- In the previous several posts, I've formalized the notion of lattices, which @@ -197,3 +208,246 @@ The Agda version is: Notice how we used `noop` to express the fact that the `else` branch of the conditional does nothing. + +### The Semantics of Our Language + +We now have all the language constructs that I'll be showing off --- because +those are all the concepts that I've formalized. What's left is to define +how they behave. We will do this using a logical tool called +[_inference rules_](https://en.wikipedia.org/wiki/Rule_of_inference). I've +written about them a number of times; they're ubiquitous, particularly in the +sorts of things I like explore on this site. The [section on inference rules]({{< relref "01_aoc_coq#inference-rules" >}}) +from my Advent of Code series is pretty relevant, and [the notation section from +a post in my compiler series]({{< relref "03_compiler_typechecking#some-notation" >}}) says +much the same thing; I won't be re-describing them here. + +There are three pieces which demand semantics: expressions, simple statements, +and non-simple statements. The semantics of each of the three requires the +semantics of the items that precede it. We will therefore start with expressions. + +#### Expressions + +The trickiest thing about expression is that the value of an expression depends +on the "context": `x+1` can evaluate to `43` if `x` is `42`, or it can evaluate +to `0` if `x` is `-1`. To evaluate an expression, we will therefore need to +assign values to all of the variables in that expression. A mapping that +assigns values to variables is typically called an _environment_. We will write +\(\varnothing\) for "empty environment", and \(\{\texttt{x} \mapsto 42, \texttt{y} \mapsto -1 \}\) for +an environment that maps the variable \(\texttt{x}\) to 42, and the variable \(\texttt{y}\) to -1. + +Now, a bit of notation. We will use the letter \(\rho\) to represent environments +(and if several environments are involved, we will occasionally number them +as \(\rho_1\), \(\rho_2\), etc.) We will use the letter \(e\) to stand for +expressions, and the letter \(v\) to stand for values. Finally, we'll write +\(\rho, e \Downarrow v\) to say that "in an environment \(\rho\), expression \(e\) +evaluates to value \(v\)". Our two previous examples of evaluating `x+1` can +thus be written as follows: + +{{< latex >}} +\{ \texttt{x} \mapsto 42 \}, \texttt{x}+1 \Downarrow 43 \\ +\{ \texttt{x} \mapsto -1 \}, \texttt{x}+1 \Downarrow 0 \\ +{{< /latex >}} + +Now, on to the actual rules for how to evaluate expressions. Most simply, +integer literals `1` just evaluate to themselves. + +{{< latex >}} +\frac{n \in \text{Int}}{\rho, n \Downarrow n} +{{< /latex >}} + +Note that the letter \(\rho\) is completely unused in the above rule. That's +because no matter what values _variables_ have, a number still evaluates to +the same value. As we've already established, the same is not true for a +variable like \(\texttt{x}\). To evaluate such a variable, we need to retrieve +the value it's mapped to in the current environment, which we will write as +\(\rho(\texttt{x})\). This gives the following inference rule: + +{{< latex >}} +\frac{\rho(x) = v}{\rho, x \Downarrow v} +{{< /latex >}} + +All that's left is to define addition and subtraction. For an expression in the +form \(e_1+e_2\), we first need to evaluate the two subexpressions \(e_1\) +and \(e_2\), and then add the two resulting numbers. As a result, the addition +rule includes two additional premises, one for evaluating each summand. + +{{< latex >}} +\frac + {\rho, e_1 \Downarrow v_1 \quad \rho, e_2 \Downarrow v_2 \quad v_1 + v_2 = v} + {\rho, e_1+e_2 \Downarrow v} +{{< /latex >}} + +The subtraction rule is similar. Below, I've configured an instance of +[Bergamot]({{< relref "bergamot" >}}) to interpret these exact rules. Try +typing various expressions like `1`, `1+1`, etc. into the input box below +to see them evaluate. If you click the "Full Proof Tree" button, you can also view +the exact rules that were used in computing a particular value. The variables +`x`, `y`, and `z` are pre-defined for your convenience. + +{{< bergamot_widget id="expr-widget" query="" prompt="eval(extend(extend(extend(empty, x, 17), y, 42), z, 0), TERM, ?v)" modes="Expression:Expression" >}} +section "" { + EvalNum @ eval(?rho, lit(?n), ?n) <- int(?n); + EvalVar @ eval(?rho, var(?x), ?v) <- inenv(?x, ?v, ?rho); +} +section "" { + EvalPlus @ eval(?rho, plus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), add(?v_1, ?v_2, ?v); + EvalMinus @ eval(?rho, minus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), subtract(?v_1, ?v_2, ?v); +} +hidden section "" { + EnvTake @ inenv(?x, ?v, extend(?rho, ?x, ?v)) <-; + EnvSkip @ inenv(?x, ?v_1, extend(?rho, ?y, ?v_2)) <- inenv(?x, ?v_1, ?rho), not(symeq(?x, ?y)); +} +{{< /bergamot_widget >}} + +The Agda equivalent of this looks very similar to the rules themselves. I use +`⇒ᵉ` instead of \(\Downarrow\), and there's a little bit of tedium with +wrapping integers into a new `Value` type. I also used a (partial) relation +`(x, v) ∈ ρ` instead of explicitly defining accessing an environment, since +it is conceivable for a user to attempt accessing a variable that has not +been assigned to. Aside from these notational changes, the structure of each +of the constructors of the evaluation data type matches the inference rules +I showed above. + +{{< codelines "Agda" "agda-spa/Language/Semantics.agda" 27 35 >}} + +#### Simple Statements +The main difference between formalizing (simple and "normal") statements is +that they modify the environment. If `x` has one value, writing `x = x + 1` will +certainly change that value. On the other hands, statements don't produce values. +So, we will be writing claims like \(\rho_1 , \textit{bs} \Rightarrow \rho_2\) +to say that the basic statement \(\textit{bs}\), when starting in environment +\(\rho_1\), will produce environment \(\rho_2\). Here's an example: + +{{< latex >}} +\{ \texttt{x} \mapsto 42, \texttt{y} \mapsto 17 \}, \ \texttt{x = x - \text{1}} \Rightarrow \{ \texttt{x} \mapsto 41, \texttt{y} \mapsto 17 \} +{{< /latex >}} + +Here, we subtracted one from a variable with value `42`, leaving it with a new +value of `41`. + +There are two basic statements, and one of them quite literally does nothing. +The inference rule for `noop` is very simple: + +{{< latex >}} +\rho,\ \texttt{noop} \Rightarrow \rho +{{< /latex >}} + +For the assignment rule, we need to know how to evaluate the expression on the +right side of the equal sign. This is why we needed to define the semantics +of expressions first. Given those, the evaluation rule for assignment is as +follows, with \(\rho[x \mapsto v]\) meaning "the environment \(\rho\) but +mapping the variable \(x\) to value \(v\)". + +{{< latex >}} +\frac + {\rho, e \Downarrow v} + {\rho, x = e \Rightarrow \rho[x \mapsto v]} +{{< /latex >}} + +Those are actually all the rules we need, and below, I am once again configuring +a Bergamot instance, this time with simple statements. Try out `noop` or some +sort of variable assignment, like `x = x + 1`. + +{{< bergamot_widget id="basic-stmt-widget" query="" prompt="stepbasic(extend(extend(extend(empty, x, 17), y, 42), z, 0), TERM, ?env)" modes="Basic Statement:Basic Statement" >}} +hidden section "" { + EvalNum @ eval(?rho, lit(?n), ?n) <- int(?n); + EvalVar @ eval(?rho, var(?x), ?v) <- inenv(?x, ?v, ?rho); +} +hidden section "" { + EvalPlus @ eval(?rho, plus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), add(?v_1, ?v_2, ?v); + EvalMinus @ eval(?rho, minus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), subtract(?v_1, ?v_2, ?v); +} +section "" { + StepNoop @ stepbasic(?rho, noop, ?rho) <-; + StepAssign @ stepbasic(?rho, assign(?x, ?e), extend(?rho, ?x, ?v)) <- eval(?rho, ?e, ?v); +} +hidden section "" { + EnvTake @ inenv(?x, ?v, extend(?rho, ?x, ?v)) <-; + EnvSkip @ inenv(?x, ?v_1, extend(?rho, ?y, ?v_2)) <- inenv(?x, ?v_1, ?rho), not(symeq(?x, ?y)); +} +{{< /bergamot_widget >}} + +The Agda implementation is once again just a data type with constructors-for-rules. +This time they also look quite similar to the rules I've shown up until now, +though I continue to explicitly quantify over variables like `ρ`. + +{{< codelines "Agda" "agda-spa/Language/Semantics.agda" 37 40 >}} + +#### Statements + +Let's work on non-simple statements next. The easiest rule to define is probably +sequencing. When we use `then` (or `;`) to combine two statements, what we +actually want is to execute the first statement, which may change variables, +and then execute the second statement while keeping the changes from the first. +This means there are three environments: \(\rho_1\) for the initial state before +either statement is executed, \(\rho_2\) for the state between executing the +first and second statement, and \(\rho_3\) for the final state after both +are done executing. This leads to the following rule: + +{{< latex >}} +\frac + { \rho_1, s_1 \Rightarrow \rho_2 \quad \rho_2, s_2 \Rightarrow \rho_3 } + { \rho_1, s_1; s_2 \Rightarrow \rho_3 } +{{< /latex >}} + +We will actually need two rules to evaluate the conditional statement: one +for when the condition evaluates to "true", and one for when the condition +evaluates to "false". Only, I never specified booleans as being part of +the language, which means that we will need to come up what "false" and "true" +are. I will take my cue from C++ and use zero as "false", and any other number +as "true". + +If the condition of an `if`-`else` statement is "true" (nonzero), then the +effect of executing the `if`-`else` should be the same as executing the "then" +part of the statement, while completely ignoring the "else" part. + +{{< latex >}} +\frac + { \rho_1 , e \Downarrow v \quad v \neq 0 \quad \rho_1, s_1 \Rightarrow \rho_2} + { \rho_1, \textbf{if}\ e\ \{ s_1 \}\ \textbf{else}\ \{ s_2 \}\ \Rightarrow \rho_2 } +{{< /latex >}} + +Notice that in the above rule, we used the evaluation judgement \(\rho_1, e \Downarrow v\) +to evaluate the _expression_ that serves as the condition. We then had an +additional premise that requires the truthiness of the resulting value \(v\). +The rule for evaluating a conditional with a "false" branch is very similar. + + +{{< latex >}} +\frac + { \rho_1 , e \Downarrow v \quad v = 0 \quad \rho_1, s_2 \Rightarrow \rho_2} + { \rho_1, \textbf{if}\ e\ \{ s_1 \}\ \textbf{else}\ \{ s_2 \}\ \Rightarrow \rho_2 } +{{< /latex >}} + +{{< bergamot_widget id="stmt-widget" query="" prompt="step(extend(extend(extend(empty, x, 17), y, 42), z, 0), TERM, ?env)" modes="Statement:Statement" >}} +hidden section "" { + EvalNum @ eval(?rho, lit(?n), ?n) <- int(?n); + EvalVar @ eval(?rho, var(?x), ?v) <- inenv(?x, ?v, ?rho); +} +hidden section "" { + EvalPlus @ eval(?rho, plus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), add(?v_1, ?v_2, ?v); + EvalMinus @ eval(?rho, minus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), subtract(?v_1, ?v_2, ?v); +} +hidden section "" { + StepNoop @ stepbasic(?rho, noop, ?rho) <-; + StepAssign @ stepbasic(?rho, assign(?x, ?e), extend(?rho, ?x, ?v)) <- eval(?rho, ?e, ?v); +} +hidden section "" { + StepNoop @ stepbasic(?rho, noop, ?rho) <-; + StepAssign @ stepbasic(?rho, assign(?x, ?e), extend(?rho, ?x, ?v)) <- eval(?rho, ?e, ?v); +} +hidden section "" { + StepLiftBasic @ step(?rho_1, ?s, ?rho_2) <- stepbasic(?rho_1, ?s, ?rho_2); +} +section "" { + StepIfTrue @ step(?rho_1, if(?e, ?s_1, ?s_2), ?rho_2) <- eval(?rho_1, ?e, ?v), not(numeq(?v, 0)), step(?rho_1, ?s_1, ?rho_2); + StepIfFalse @ step(?rho_1, if(?e, ?s_1, ?s_2), ?rho_2) <- eval(?rho_1, ?e, ?v), numeq(?v, 0), step(?rho_1, ?s_2, ?rho_2); + StepWhileTrue @ step(?rho_1, while(?e, ?s), ?rho_3) <- eval(?rho_1, ?e, ?v), not(numeq(?v, 0)), step(?rho_1, ?s, ?rho_2), step(?rho_2, while(?e, ?s), ?rho_3); + StepWhileFalse @ step(?rho_1, while(?e, ?s), ?rho_1) <- eval(?rho_1, ?e, ?v), numeq(?v, 0); + StepSeq @ step(?rho_1, seq(?s_1, ?s_2), ?rho_3) <- step(?rho_1, ?s_1, ?rho_2), step(?rho_2, ?s_2, ?rho_3); +} +hidden section "" { + EnvTake @ inenv(?x, ?v, extend(?rho, ?x, ?v)) <-; + EnvSkip @ inenv(?x, ?v_1, extend(?rho, ?y, ?v_2)) <- inenv(?x, ?v_1, ?rho), not(symeq(?x, ?y)); +} +{{< /bergamot_widget >}} diff --git a/content/blog/05_spa_agda_semantics/parser.js b/content/blog/05_spa_agda_semantics/parser.js new file mode 100644 index 0000000..cdbe615 --- /dev/null +++ b/content/blog/05_spa_agda_semantics/parser.js @@ -0,0 +1,128 @@ +const match = str => input => { + if (input.startsWith(str)) { + return [[str, input.slice(str.length)]] + } + return []; +}; + +const map = (f, m) => input => { + return m(input).map(([v, rest]) => [f(v), rest]); +}; + +const apply = (m1, m2) => input => { + return m1(input).flatMap(([f, rest]) => m2(rest).map(([v, rest]) => [f(v), rest])); +}; + +const pure = v => input => [[v, input]]; + +const liftA = (f, ...ms) => input => { + if (ms.length <= 0) return [] + + let results = map(v => [v], ms[0])(input); + for (let i = 1; i < ms.length; i++) { + results = results.flatMap(([vals, rest]) => + ms[i](rest).map(([val, rest]) => [[...vals, val], rest]) + ); + } + return results.map(([vals, rest]) => [f(...vals), rest]); +}; + +const many1 = (m) => liftA((x, xs) => [x].concat(xs), m, oneOf([ + lazy(() => many1(m)), + pure([]) +])); + +const many = (m) => oneOf([ pure([]), many1(m) ]); + +const oneOf = ms => input => { + return ms.flatMap(m => m(input)); +}; + +const takeWhileRegex0 = regex => input => { + let idx = 0; + while (idx < input.length && regex.test(input[idx])) { + idx++; + } + return [[input.slice(0, idx), input.slice(idx)]]; +}; + +const takeWhileRegex = regex => input => { + const result = takeWhileRegex0(regex)(input); + if (result[0][0].length > 0) return result; + return []; +}; + +const spaces = takeWhileRegex0(/\s/); + +const digits = takeWhileRegex(/\d/); + +const alphas = takeWhileRegex(/[a-zA-Z]/); + +const left = (m1, m2) => liftA((a, _) => a, m1, m2); + +const right = (m1, m2) => liftA((_, b) => b, m1, m2); + +const word = s => left(match(s), spaces); + +const end = s => s.length == 0 ? [['', '']] : []; + +const lazy = deferred => input => deferred()(input); + +const ident = left(alphas, spaces); + +const number = oneOf([ + liftA((a, b) => a + b, word("-"), left(digits, spaces)), + left(digits, spaces), +]); + +const basicExpr = oneOf([ + map(n => `lit(${n})`, number), + map(x => `var(${x})`, ident), + liftA((lp, v, rp) => v, word("("), lazy(() => expr), word(")")), +]); + +const opExpr = oneOf([ + liftA((_a, _b, e) => ["plus", e], word("+"), spaces, lazy(() => expr)), + liftA((_a, _b, e) => ["minus", e], word("-"), spaces, lazy(() => expr)), +]); + +const flatten = (e, es) => { + return es.reduce((e1, [op, e2]) => `${op}(${e1}, ${e2})`, e); +} + +const expr = oneOf([ + basicExpr, + liftA(flatten, basicExpr, many(opExpr)), +]); + +const basicStmt = oneOf([ + liftA((x, _, e) => `assign(${x}, ${e})`, ident, word("="), expr), + word("noop"), +]); + +const stmt = oneOf([ + basicStmt, + liftA((_if, _lp_, cond, _rp, _lbr1_, s1, _rbr1, _else, _lbr2, s2, _rbr2) => `if(${cond}, ${s1}, ${s2})`, + word("if"), word("("), expr, word(")"), + word("{"), lazy(() => stmtSeq), word("}"), + word("else"), word("{"), lazy(() => stmtSeq), word("}")), + liftA((_while, _lp_, cond, _rp, _lbr_, s1, _rbr) => `while(${cond}, ${s1})`, + word("while"), word("("), expr, word(")"), + word("{"), lazy(() => stmtSeq), word("}")), +]); + +const stmtSeq = oneOf([ + liftA((s1, _semi, rest) => `seq(${s1}, ${rest})`, stmt, word(";"), lazy(() => stmtSeq)), + stmt, +]); + +const parseWhole = m => string => { + const result = left(m, end)(string); + console.log(result); + if (result.length > 0) return result[0][0]; + return null; +} + +window.parseExpr = parseWhole(expr); +window.parseBasicStmt = parseWhole(basicStmt); +window.parseStmt = parseWhole(stmtSeq);