Compare commits
94 Commits
7ccbaa7829
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 5e117e3f48 | |||
| a6f3cd3f9a | |||
| ccc8d6f0eb | |||
| 7a088d6739 | |||
| 626baefd76 | |||
| 4602d02720 | |||
| 7fbd4ea9f8 | |||
| 6fd1e1962b | |||
| 62c338e382 | |||
| 40ea9ec637 | |||
| 787e194d41 | |||
| 71162e2db9 | |||
| 43debc65e4 | |||
| b07ea85b70 | |||
| 06e8b8e022 | |||
| 647f47a5f3 | |||
| 36e4feb668 | |||
| 11be991946 | |||
| a8c2b1d05a | |||
| fb46142e9d | |||
| 0b33d03b73 | |||
| 804147caef | |||
| d847d20666 | |||
| 07408d01a9 | |||
| 816a473913 | |||
| ce8f8fb872 | |||
| 2f60004241 | |||
| 7130c6bd11 | |||
| c5aacc060a | |||
| 6048dc0b9c | |||
| 1f01c3caff | |||
| bca44343eb | |||
| 3b9c2edcdd | |||
| fa180ee24e | |||
| 5846dd5d04 | |||
| f6b347eb05 | |||
| c1b27a13ae | |||
| 147658ee89 | |||
| 4017c52fee | |||
| 65d290556f | |||
| 854dccd4d2 | |||
| 07859cd1af | |||
| 19be7eb1f5 | |||
| 5de2ae1203 | |||
| 595a1ad99b | |||
| 32ccfc76ae | |||
| ae0d54e567 | |||
| b774f0e088 | |||
| e294bcb2d0 | |||
| 1322e0249c | |||
| e40a05633c | |||
| e5fb0a2929 | |||
| 3bd1f0c8a0 | |||
| 948759b7d4 | |||
| f2e424944e | |||
| 8a471c6b45 | |||
| 9b93582d18 | |||
| 9fc2d16fb8 | |||
| f00c69f02c | |||
| 4fc1191d13 | |||
| 951aafc90a | |||
| ee13409b33 | |||
| 615aeb72da | |||
| 3be67ca4c8 | |||
| b216581f2b | |||
| 1060198b01 | |||
| 0485917a20 | |||
| 37dd9ad6d4 | |||
| 77dade1d1d | |||
| d140422225 | |||
| 2defe99c73 | |||
| 0766211d79 | |||
| b854eade3c | |||
| c6f34f8eaa | |||
| 0ba5799c75 | |||
| 324fa948e6 | |||
| 3e46ff8be6 | |||
| 18eb1da5ba | |||
| dde7df4604 | |||
| e4101f1396 | |||
| d94ceeab2e | |||
| 697f083237 | |||
| 3e97fdcfea | |||
| 40007c427e | |||
| 5ab0d0d40e | |||
| 7c65afbc93 | |||
| fb071e55aa | |||
| 406c934b7a | |||
| 292cf009e6 | |||
| 6bf7659b19 | |||
| 0a90e8da29 | |||
| 32fe8e5ee6 | |||
| 26a29865e7 | |||
| ba287130d3 |
@@ -3,11 +3,11 @@ GEM
|
||||
specs:
|
||||
duktape (2.7.0.0)
|
||||
execjs (2.9.1)
|
||||
mini_portile2 (2.8.6)
|
||||
nokogiri (1.15.6)
|
||||
mini_portile2 (2.8.8)
|
||||
nokogiri (1.18.3)
|
||||
mini_portile2 (~> 2.8.2)
|
||||
racc (~> 1.4)
|
||||
racc (1.8.0)
|
||||
racc (1.8.1)
|
||||
|
||||
PLATFORMS
|
||||
ruby
|
||||
|
||||
24
agda.rb
24
agda.rb
@@ -23,7 +23,7 @@ class AgdaContext
|
||||
return @file_infos[file] if @file_infos.include? file
|
||||
|
||||
@file_infos[file] = line_infos = {}
|
||||
unless File.exists?(file)
|
||||
unless File.exist?(file)
|
||||
return line_infos
|
||||
end
|
||||
|
||||
@@ -46,7 +46,17 @@ class AgdaContext
|
||||
# assumes that links can't span multiple pages, and that links
|
||||
# aren't nested, so ensure that the parent of the textual node
|
||||
# is the preformatted block itself.
|
||||
raise "unsupported Agda HTML output" if at.parent.name != "pre"
|
||||
if at.parent.name != "pre"
|
||||
# Costmetic highlight warnings are sometimes applied to newlines.
|
||||
# If they don't have content, treat them as normal newlines at the
|
||||
# top level.
|
||||
#
|
||||
# This is an <a class="CosmeticProblem">\n</a> node.
|
||||
unless at.parent.name == "a" and at.parent['class'] == "CosmeticProblem" and at.content.strip.empty?
|
||||
raise "unsupported Agda HTML output in file #{file} at line #{line} (content #{at.content.inspect})"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Increase the line and track the final offset. Written as a loop
|
||||
# in case we eventually want to add some handling for the pieces
|
||||
@@ -160,6 +170,14 @@ class FileGroup
|
||||
line_range = 1..
|
||||
end
|
||||
|
||||
# Sometimes, code is deeply nested in the source file, but we don't
|
||||
# want to show the leading space. In that case, the generator sets
|
||||
# data-source-offset with how much leading space was stripped off.
|
||||
initial_offset = 0
|
||||
if source_offset_attr = t.attribute("data-source-offset")
|
||||
initial_offset = source_offset_attr.to_s.to_i
|
||||
end
|
||||
|
||||
full_path = t.attribute("data-file-path").to_s
|
||||
full_path_dirs = Pathname(full_path).each_filename.to_a
|
||||
|
||||
@@ -195,7 +213,7 @@ class FileGroup
|
||||
line_info = agda_info[line_no]
|
||||
next unless line_info
|
||||
|
||||
offset = 0
|
||||
offset = initial_offset
|
||||
line.traverse do |lt|
|
||||
if lt.text?
|
||||
content = lt.content
|
||||
|
||||
@@ -43,7 +43,7 @@ files.each do |file|
|
||||
tags = []
|
||||
group = 1
|
||||
draft = false
|
||||
next unless File.exists?(file)
|
||||
next unless File.exist?(file)
|
||||
value = File.size(file)
|
||||
url = file.gsub(/^content/, "https://danilafe.com").delete_suffix("/index.md").delete_suffix(".md")
|
||||
File.readlines(file).each do |l|
|
||||
|
||||
65
assets/bergamot/rendering/imp.bergamot
Normal file
65
assets/bergamot/rendering/imp.bergamot
Normal file
@@ -0,0 +1,65 @@
|
||||
LatexListNil @ latexlist(nil, nil) <-;
|
||||
LatexListCons @ latexlist(cons(?x, ?xs), cons(?l_x, ?l_s)) <- latex(?x, ?l_x), latexlist(?xs, ?l_s);
|
||||
|
||||
IntercalateNil @ intercalate(?sep, nil, nil) <-;
|
||||
IntercalateConsCons @ intercalate(?sep, cons(?x_1, cons(?x_2, ?xs)), cons(?x_1, cons(?sep, ?ys))) <- intercalate(?sep, cons(?x_2, ?xs), ?ys);
|
||||
IntercalateConsNil @ intercalate(?sep, cons(?x, nil), cons(?x, nil)) <-;
|
||||
|
||||
NonEmpty @ nonempty(cons(?x, ?xs)) <-;
|
||||
|
||||
InListHere @ inlist(?e, cons(?e, ?es)) <-;
|
||||
InListThere @ inlist(?e_1, cons(?e_2, ?es)) <- inlist(?e_1, ?es);
|
||||
|
||||
BasicParenLit @ paren(lit(?v), ?l) <- latex(lit(?v), ?l);
|
||||
BasicParenVar @ paren(var(?x), ?l) <- latex(var(?x), ?l);
|
||||
BasicParenVar @ paren(metavariable(?x), ?l) <- latex(metavariable(?x), ?l);
|
||||
BasicParenOther @ paren(?t, ?l) <- latex(?t, ?l_t), join(["(", ?l_t, ")"], ?l);
|
||||
|
||||
LatexInt @ latex(?i, ?l) <- int(?i), tostring(?i, ?l);
|
||||
LatexFloat @ latex(?f, ?l) <- float(?f), tostring(?f, ?l);
|
||||
LatexStr @ latex(?s, ?l) <- str(?s), escapestring(?s, ?l_1), latexifystring(?s, ?l_2), join(["\\texttt{\"", ?l_2, "\"}"], ?l);
|
||||
LatexMeta @ latex(metavariable(?l), ?l) <-;
|
||||
LatexLit @ latex(lit(?i), ?l) <- latex(?i, ?l);
|
||||
LatexVar @ latex(var(metavariable(?s)), ?l) <- latex(metavariable(?s), ?l);
|
||||
LatexVar @ latex(var(?s), ?l) <- latex(?s, ?l_v), join(["\\texttt{", ?l_v, "}"], ?l);
|
||||
LatexPlus @ latex(plus(?e_1, ?e_2), ?l) <-
|
||||
paren(?e_1, ?l_1), paren(?e_2, ?l_2),
|
||||
join([?l_1, " + ", ?l_2], ?l);
|
||||
LatexMinus @ latex(minus(?e_1, ?e_2), ?l) <-
|
||||
paren(?e_1, ?l_1), paren(?e_2, ?l_2),
|
||||
join([?l_1, " - ", ?l_2], ?l);
|
||||
|
||||
EnvLiteralNil @ envlitrec(empty, "\\{", "", ?seen) <-;
|
||||
EnvLiteralSingle @ envlitsingle(?pre, ?e, ?v, "", ?pre, ?seen) <- inlist(?e, ?seen);
|
||||
EnvLiteralSingle @ envlitsingle(?pre, ?e, ?v, ?l, ", ", ?seen) <- latex(?e, ?l_e), latex(?v, ?l_v), join([?pre, "\\texttt{", ?l_e, "} \\mapsto", ?l_v], ?l);
|
||||
EnvLiteralCons @ envlitrec(extend(empty, ?e, ?v), ?l, ?newnext, ?seen) <- envlitrec(?rho, ?l_rho, ?next, cons(?e, ?seen)), envlitsingle(?next, ?e, ?v, ?l_v, ?newnext, ?seen), join([?l_rho, ?l_v], ?l);
|
||||
EnvLiteralCons @ envlitrec(extend(?rho, ?e, ?v), ?l, ?newnext, ?seen) <- envlitrec(?rho, ?l_rho, ?next, cons(?e, ?seen)), envlitsingle(?next, ?e, ?v, ?l_v, ?newnext, ?seen), join([?l_rho, ?l_v], ?l);
|
||||
EnvLiteralOuter @ envlit(?rho, ?l) <- envlitrec(?rho, ?l_rho, ?rest, []), join([?l_rho, "\\}"], ?l);
|
||||
|
||||
LatexEnvLit @ latex(?rho, ?l) <- envlit(?rho, ?l);
|
||||
LatexTypeEmpty @ latex(empty, "\\{\\}") <-;
|
||||
LatexExtend @ latex(extend(?a, ?b, ?c), ?l) <- latex(?a, ?l_a), latex(?b, ?l_b), latex(?c, ?l_c), join([?l_a, "[", ?l_b, " \\mapsto ", ?l_c, "]"], ?l);
|
||||
LatexInenv @ latex(inenv(?x, ?v, ?G), ?l) <-latex(?x, ?l_x), latex(?v, ?l_v), latex(?G, ?l_G), join([?l_G, "(", ?l_x, ") = ", ?l_v], ?l);
|
||||
LatexEvalTer @ latex(eval(?G, ?e, ?t), ?l) <- latex(?G, ?l_G), latex(?e, ?l_e), latex(?t, ?l_t), join([?l_G, ",\\ ", ?l_e, " \\Downarrow ", ?l_t], ?l);
|
||||
|
||||
LatexAdd @ latex(add(?a, ?b, ?c), ?l) <- latex(?a, ?l_a), latex(?b, ?l_b), latex(?c, ?l_c), join([?l_a, "+", ?l_b, "=", ?l_c], ?l);
|
||||
LatexSubtract @ latex(subtract(?a, ?b, ?c), ?l) <- latex(?a, ?l_a), latex(?b, ?l_b), latex(?c, ?l_c), join([?l_a, "-", ?l_b, "=", ?l_c], ?l);
|
||||
LatexEvalTer @ latex(stepbasic(?G, ?e, ?H), ?l) <- latex(?G, ?l_G), latex(?e, ?l_e), latex(?H, ?l_H), join([?l_G, ",\\ ", ?l_e, " \\Rightarrow ", ?l_H], ?l);
|
||||
LatexEvalTer @ latex(step(?G, ?e, ?H), ?l) <- latex(?G, ?l_G), latex(?e, ?l_e), latex(?H, ?l_H), join([?l_G, ",\\ ", ?l_e, " \\Rightarrow ", ?l_H], ?l);
|
||||
|
||||
LatexNoop @ latex(noop, "\\texttt{noop}") <-;
|
||||
LatexAssign @ latex(assign(?x, ?e), ?l) <- latex(?x, ?l_x), latex(?e, ?l_e), join([?l_x, " = ", ?l_e], ?l);
|
||||
LatexAssign @ latex(if(?e, ?s_1, ?s_2), ?l) <- latex(?e, ?l_e), latex(?s_1, ?l_1), latex(?s_2, ?l_2), join(["\\textbf{if}\\ ", ?l_e, "\\ \\{\\ ", ?l_1, "\\ \\}\\ \\textbf{else}\\ \\{\\ ", ?l_2, "\\ \\}"], ?l);
|
||||
LatexAssign @ latex(while(?e, ?s), ?l) <- latex(?e, ?l_e), latex(?s, ?l_s), join(["\\textbf{while}\\ ", ?l_e, "\\ \\{\\ ", ?l_s, "\\ \\}"], ?l);
|
||||
LatexAssign @ latex(seq(?s_1, ?s_2), ?l) <- latex(?s_1, ?l_1), latex(?s_2, ?l_2), join([?l_1, "; ", ?l_2], ?l);
|
||||
|
||||
LatexNumNeq @ latex(not(eq(?e_1, ?e_2)), ?l) <- latex(?e_1, ?l_1), latex(?e_2, ?l_2), join([?l_1, " \\neq ", ?l_2], ?l);
|
||||
LatexNot @ latex(not(?e), ?l) <- latex(?e, ?l_e), join(["\\neg (", ?l_e, ")"], ?l);
|
||||
LatexNumEq @ latex(eq(?e_1, ?e_2), ?l) <- latex(?e_1, ?l_1), latex(?e_2, ?l_2), join([?l_1, " = ", ?l_2], ?l);
|
||||
|
||||
LatexIsInt @ latex(int(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\in \\texttt{Int}"], ?l);
|
||||
LatexIsFloat @ latex(float(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\in \\texttt{Float}"], ?l);
|
||||
LatexIsNum @ latex(num(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\in \\texttt{Num}"], ?l);
|
||||
LatexIsStr @ latex(str(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\in \\texttt{Str}"], ?l);
|
||||
LatexSym @ latex(?s, ?l) <- sym(?s), tostring(?s, ?l_1), join(["\\text{", ?l_1,"}"], ?l);
|
||||
LatexCall @ latex(?c, ?l) <- call(?c, ?n, ?ts), nonempty(?ts), latexlist(?ts, ?lts_1), intercalate(", ", ?lts_1, ?lts_2), join(?lts_2, ?lts_3), join(["\\text{", ?n, "}", "(", ?lts_3, ")"], ?l);
|
||||
74
assets/bergamot/rendering/lc.bergamot
Normal file
74
assets/bergamot/rendering/lc.bergamot
Normal file
@@ -0,0 +1,74 @@
|
||||
PrecApp @ prec(app(?l, ?r), 100, left) <-;
|
||||
PrecPlus @ prec(plus(?l, ?r), 80, either) <-;
|
||||
PrecAbs @ prec(abs(?x, ?t, ?e), 0, right) <-;
|
||||
PrecArr @ prec(tarr(?l, ?r), 0, right) <-;
|
||||
|
||||
SelectHead @ select(cons([?t, ?v], ?rest), ?default, ?v) <- ?t;
|
||||
SelectTail @ select(cons([?t, ?v], ?rest), ?default, ?found) <- not(?t), select(?rest, ?default, ?found);
|
||||
SelectEmpty @ select(nil, ?default, ?default) <-;
|
||||
|
||||
Eq @ eq(?x, ?x) <-;
|
||||
|
||||
ParenthAssocLeft @ parenthassoc(?a_i, left, right) <-;
|
||||
ParenthAssocRight @ parenthassoc(?a_i, right, left) <-;
|
||||
ParenthAssocNone @ parenthassoc(?a_i, none, ?pos) <-;
|
||||
ParenthAssocNeq @ parenthassoc(?a_i, ?a_o, ?pos) <- not(eq(?a_i, ?a_o));
|
||||
|
||||
Parenth @ parenth(?inner, ?outer, ?pos, ?strin, ?strout) <-
|
||||
prec(?inner, ?p_i, ?a_i), prec(?outer, ?p_o, ?a_o),
|
||||
join(["(", ?strin, ")"], ?strinparen),
|
||||
select([ [less(?p_i, ?p_o), strinparen], [less(?p_o, ?p_i), ?strin], [ parenthassoc(?a_i, ?a_o, ?pos), ?strinparen ] ], ?strin, ?strout);
|
||||
ParenthFallback @ parenth(?inner, ?outer, ?pos, ?strin, ?strin) <-;
|
||||
|
||||
LatexListNil @ latexlist(nil, nil) <-;
|
||||
LatexListCons @ latexlist(cons(?x, ?xs), cons(?l_x, ?l_s)) <- latex(?x, ?l_x), latexlist(?xs, ?l_s);
|
||||
|
||||
IntercalateNil @ intercalate(?sep, nil, nil) <-;
|
||||
IntercalateConsCons @ intercalate(?sep, cons(?x_1, cons(?x_2, ?xs)), cons(?x_1, cons(?sep, ?ys))) <- intercalate(?sep, cons(?x_2, ?xs), ?ys);
|
||||
IntercalateConsNil @ intercalate(?sep, cons(?x, nil), cons(?x, nil)) <-;
|
||||
|
||||
NonEmpty @ nonempty(cons(?x, ?xs)) <-;
|
||||
|
||||
LatexInt @ latex(?i, ?l) <- int(?i), tostring(?i, ?l);
|
||||
LatexFloat @ latex(?f, ?l) <- float(?f), tostring(?f, ?l);
|
||||
LatexStr @ latex(?s, ?l) <- str(?s), escapestring(?s, ?l_1), latexifystring(?s, ?l_2), join(["\\texttt{\"", ?l_2, "\"}"], ?l);
|
||||
LatexMeta @ latex(metavariable(?l), ?l) <-;
|
||||
LatexLit @ latex(lit(?i), ?l) <- latex(?i, ?l);
|
||||
LatexVar @ latex(var(?s), ?l) <- latex(?s, ?l);
|
||||
LatexPlus @ latex(plus(?e_1, ?e_2), ?l) <-
|
||||
latex(?e_1, ?l_1), latex(?e_2, ?l_2),
|
||||
parenth(?e_1, plus(?e_1, ?e_2), left, ?l_1, ?lp_1),
|
||||
parenth(?e_2, plus(?e_1, ?e_2), right, ?l_2, ?lp_2),
|
||||
join([?lp_1, " + ", ?lp_2], ?l);
|
||||
LatexPair @ latex(pair(?e_1, ?e_2), ?l) <- latex(?e_1, ?l_1), latex(?e_2, ?l_2), join(["(", ?l_1, ", ", ?l_2, ")"], ?l);
|
||||
LatexAbs @ latex(abs(?x, ?t, ?e), ?l) <- latex(?e, ?l_e), latex(?t, ?l_t), latex(?x, ?l_x), join(["\\lambda ", ?l_x, " : ", ?l_t, " . ", ?l_e], ?l);
|
||||
LatexApp @ latex(app(?e_1, ?e_2), ?l) <-
|
||||
latex(?e_1, ?l_1), latex(?e_2, ?l_2),
|
||||
parenth(?e_1, app(?e_1, ?e_2), left, ?l_1, ?lp_1),
|
||||
parenth(?e_2, app(?e_1, ?e_2), right, ?l_2, ?lp_2),
|
||||
join([?lp_1, " \\enspace ", ?lp_2], ?l);
|
||||
|
||||
LatexTInt @ latex(tint, "\\text{tint}") <-;
|
||||
LatexTStr @ latex(tstr, "\\text{tstr}") <-;
|
||||
LatexTArr @ latex(tarr(?t_1, ?t_2), ?l) <-
|
||||
latex(?t_1, ?l_1), latex(?t_2, ?l_2),
|
||||
parenth(?t_1, tarr(?t_1, ?t_2), left, ?l_1, ?lp_1),
|
||||
parenth(?t_2, tarr(?t_1, ?t_2), right, ?l_2, ?lp_2),
|
||||
join([?lp_1, " \\to ", ?lp_2], ?l);
|
||||
LatexTPair @ latex(tpair(?t_1, ?t_2), ?l) <- latex(?t_1, ?l_1), latex(?t_2, ?l_2), join(["(", ?l_1, ", ", ?l_2, ")"], ?l);
|
||||
|
||||
LatexTypeEmpty @ latex(empty, "\\varnothing") <-;
|
||||
LatexTypeExtend @ latex(extend(?a, ?b, ?c), ?l) <- latex(?a, ?l_a), latex(?b, ?l_b), latex(?c, ?l_c), join([?l_a, " , ", ?l_b, " : ", ?l_c], ?l);
|
||||
LatexTypeInenv @ latex(inenv(?x, ?t, ?G), ?l) <-latex(?x, ?l_x), latex(?t, ?l_t), latex(?G, ?l_G), join([?l_x, " : ", ?l_t, " \\in ", ?l_G], ?l);
|
||||
|
||||
LatexTypeBin @ latex(type(?e, ?t), ?l) <- latex(?e, ?l_e), latex(?t, ?l_t), join([?l_e, " : ", ?l_t], ?l);
|
||||
LatexTypeTer @ latex(type(?G, ?e, ?t), ?l) <- latex(?G, ?l_G), latex(?e, ?l_e), latex(?t, ?l_t), join([?l_G, " \\vdash ", ?l_e, " : ", ?l_t], ?l);
|
||||
|
||||
LatexConverts @ latex(converts(?f, ?t), ?l) <- latex(?f, ?l_f), latex(?t, ?l_t), join([?l_f, " \\preceq ", ?l_t], ?l);
|
||||
|
||||
LatexIsInt @ latex(int(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\in \\texttt{Int}"], ?l);
|
||||
LatexIsFloat @ latex(float(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\in \\texttt{Float}"], ?l);
|
||||
LatexIsNum @ latex(num(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\in \\texttt{Num}"], ?l);
|
||||
LatexIsStr @ latex(str(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\in \\texttt{Str}"], ?l);
|
||||
LatexSym @ latex(?s, ?l) <- sym(?s), tostring(?s, ?l_1), join(["\\text{", ?l_1,"}"], ?l);
|
||||
LatexCall @ latex(?c, ?l) <- call(?c, ?n, ?ts), nonempty(?ts), latexlist(?ts, ?lts_1), intercalate(", ", ?lts_1, ?lts_2), join(?lts_2, ?lts_3), join(["\\text{", ?n, "}", "(", ?lts_3, ")"], ?l);
|
||||
430
assets/scss/thevoid.scss
Normal file
430
assets/scss/thevoid.scss
Normal file
@@ -0,0 +1,430 @@
|
||||
@import "variables.scss";
|
||||
|
||||
body {
|
||||
background-color: #1c1e26;
|
||||
--text-color: white;
|
||||
font-family: $font-code;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
text-align: left;
|
||||
font-family: $font-code;
|
||||
}
|
||||
|
||||
h1::after {
|
||||
content: "(writing)";
|
||||
font-size: 1rem;
|
||||
margin-left: 0.5em;
|
||||
position: relative;
|
||||
bottom: -0.5em;
|
||||
color: $primary-color;
|
||||
}
|
||||
|
||||
nav .container {
|
||||
justify-content: flex-start;
|
||||
|
||||
a {
|
||||
padding-left: 0;
|
||||
margin-right: 1em;
|
||||
}
|
||||
}
|
||||
|
||||
.header-divider {
|
||||
visibility: hidden;
|
||||
}
|
||||
|
||||
hr {
|
||||
height: auto;
|
||||
border: none;
|
||||
|
||||
&::after {
|
||||
content: "* * *";
|
||||
color: $primary-color;
|
||||
font-size: 2rem;
|
||||
display: block;
|
||||
text-align: center;
|
||||
}
|
||||
}
|
||||
|
||||
/* Code for the CSS glitch effect. Originally from: https://codepen.io/mattgrosswork/pen/VwprebG */
|
||||
|
||||
.glitch {
|
||||
position: relative;
|
||||
|
||||
span {
|
||||
animation: paths 5s step-end infinite;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
&::before, &::after {
|
||||
content: attr(data-text);
|
||||
position: absolute;
|
||||
width: 110%;
|
||||
z-index: -1;
|
||||
}
|
||||
|
||||
&::before {
|
||||
top: 10px;
|
||||
left: 15px;
|
||||
color: #e0287d;
|
||||
|
||||
animation: paths 5s step-end infinite, opacity 5s step-end infinite,
|
||||
font 8s step-end infinite, movement 10s step-end infinite;
|
||||
}
|
||||
|
||||
&::after {
|
||||
top: 5px;
|
||||
left: -10px;
|
||||
color: #1bc7fb;
|
||||
|
||||
animation: paths 5s step-end infinite, opacity 5s step-end infinite,
|
||||
font 7s step-end infinite, movement 8s step-end infinite;
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes paths {
|
||||
0% {
|
||||
clip-path: polygon(
|
||||
0% 43%,
|
||||
83% 43%,
|
||||
83% 22%,
|
||||
23% 22%,
|
||||
23% 24%,
|
||||
91% 24%,
|
||||
91% 26%,
|
||||
18% 26%,
|
||||
18% 83%,
|
||||
29% 83%,
|
||||
29% 17%,
|
||||
41% 17%,
|
||||
41% 39%,
|
||||
18% 39%,
|
||||
18% 82%,
|
||||
54% 82%,
|
||||
54% 88%,
|
||||
19% 88%,
|
||||
19% 4%,
|
||||
39% 4%,
|
||||
39% 14%,
|
||||
76% 14%,
|
||||
76% 52%,
|
||||
23% 52%,
|
||||
23% 35%,
|
||||
19% 35%,
|
||||
19% 8%,
|
||||
36% 8%,
|
||||
36% 31%,
|
||||
73% 31%,
|
||||
73% 16%,
|
||||
1% 16%,
|
||||
1% 56%,
|
||||
50% 56%,
|
||||
50% 8%
|
||||
);
|
||||
}
|
||||
|
||||
5% {
|
||||
clip-path: polygon(
|
||||
0% 29%,
|
||||
44% 29%,
|
||||
44% 83%,
|
||||
94% 83%,
|
||||
94% 56%,
|
||||
11% 56%,
|
||||
11% 64%,
|
||||
94% 64%,
|
||||
94% 70%,
|
||||
88% 70%,
|
||||
88% 32%,
|
||||
18% 32%,
|
||||
18% 96%,
|
||||
10% 96%,
|
||||
10% 62%,
|
||||
9% 62%,
|
||||
9% 84%,
|
||||
68% 84%,
|
||||
68% 50%,
|
||||
52% 50%,
|
||||
52% 55%,
|
||||
35% 55%,
|
||||
35% 87%,
|
||||
25% 87%,
|
||||
25% 39%,
|
||||
15% 39%,
|
||||
15% 88%,
|
||||
52% 88%
|
||||
);
|
||||
}
|
||||
|
||||
30% {
|
||||
clip-path: polygon(
|
||||
0% 53%,
|
||||
93% 53%,
|
||||
93% 62%,
|
||||
68% 62%,
|
||||
68% 37%,
|
||||
97% 37%,
|
||||
97% 89%,
|
||||
13% 89%,
|
||||
13% 45%,
|
||||
51% 45%,
|
||||
51% 88%,
|
||||
17% 88%,
|
||||
17% 54%,
|
||||
81% 54%,
|
||||
81% 75%,
|
||||
79% 75%,
|
||||
79% 76%,
|
||||
38% 76%,
|
||||
38% 28%,
|
||||
61% 28%,
|
||||
61% 12%,
|
||||
55% 12%,
|
||||
55% 62%,
|
||||
68% 62%,
|
||||
68% 51%,
|
||||
0% 51%,
|
||||
0% 92%,
|
||||
63% 92%,
|
||||
63% 4%,
|
||||
65% 4%
|
||||
);
|
||||
}
|
||||
|
||||
45% {
|
||||
clip-path: polygon(
|
||||
0% 33%,
|
||||
2% 33%,
|
||||
2% 69%,
|
||||
58% 69%,
|
||||
58% 94%,
|
||||
55% 94%,
|
||||
55% 25%,
|
||||
33% 25%,
|
||||
33% 85%,
|
||||
16% 85%,
|
||||
16% 19%,
|
||||
5% 19%,
|
||||
5% 20%,
|
||||
79% 20%,
|
||||
79% 96%,
|
||||
93% 96%,
|
||||
93% 50%,
|
||||
5% 50%,
|
||||
5% 74%,
|
||||
55% 74%,
|
||||
55% 57%,
|
||||
96% 57%,
|
||||
96% 59%,
|
||||
87% 59%,
|
||||
87% 65%,
|
||||
82% 65%,
|
||||
82% 39%,
|
||||
63% 39%,
|
||||
63% 92%,
|
||||
4% 92%,
|
||||
4% 36%,
|
||||
24% 36%,
|
||||
24% 70%,
|
||||
1% 70%,
|
||||
1% 43%,
|
||||
15% 43%,
|
||||
15% 28%,
|
||||
23% 28%,
|
||||
23% 71%,
|
||||
90% 71%,
|
||||
90% 86%,
|
||||
97% 86%,
|
||||
97% 1%,
|
||||
60% 1%,
|
||||
60% 67%,
|
||||
71% 67%,
|
||||
71% 91%,
|
||||
17% 91%,
|
||||
17% 14%,
|
||||
39% 14%,
|
||||
39% 30%,
|
||||
58% 30%,
|
||||
58% 11%,
|
||||
52% 11%,
|
||||
52% 83%,
|
||||
68% 83%
|
||||
);
|
||||
}
|
||||
|
||||
76% {
|
||||
clip-path: polygon(
|
||||
0% 26%,
|
||||
15% 26%,
|
||||
15% 73%,
|
||||
72% 73%,
|
||||
72% 70%,
|
||||
77% 70%,
|
||||
77% 75%,
|
||||
8% 75%,
|
||||
8% 42%,
|
||||
4% 42%,
|
||||
4% 61%,
|
||||
17% 61%,
|
||||
17% 12%,
|
||||
26% 12%,
|
||||
26% 63%,
|
||||
73% 63%,
|
||||
73% 43%,
|
||||
90% 43%,
|
||||
90% 67%,
|
||||
50% 67%,
|
||||
50% 41%,
|
||||
42% 41%,
|
||||
42% 46%,
|
||||
50% 46%,
|
||||
50% 84%,
|
||||
96% 84%,
|
||||
96% 78%,
|
||||
49% 78%,
|
||||
49% 25%,
|
||||
63% 25%,
|
||||
63% 14%
|
||||
);
|
||||
}
|
||||
|
||||
90% {
|
||||
clip-path: polygon(
|
||||
0% 41%,
|
||||
13% 41%,
|
||||
13% 6%,
|
||||
87% 6%,
|
||||
87% 93%,
|
||||
10% 93%,
|
||||
10% 13%,
|
||||
89% 13%,
|
||||
89% 6%,
|
||||
3% 6%,
|
||||
3% 8%,
|
||||
16% 8%,
|
||||
16% 79%,
|
||||
0% 79%,
|
||||
0% 99%,
|
||||
92% 99%,
|
||||
92% 90%,
|
||||
5% 90%,
|
||||
5% 60%,
|
||||
0% 60%,
|
||||
0% 48%,
|
||||
89% 48%,
|
||||
89% 13%,
|
||||
80% 13%,
|
||||
80% 43%,
|
||||
95% 43%,
|
||||
95% 19%,
|
||||
80% 19%,
|
||||
80% 85%,
|
||||
38% 85%,
|
||||
38% 62%
|
||||
);
|
||||
}
|
||||
|
||||
1%,
|
||||
7%,
|
||||
33%,
|
||||
47%,
|
||||
78%,
|
||||
93% {
|
||||
clip-path: none;
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes movement {
|
||||
0% {
|
||||
top: 0px;
|
||||
left: -20px;
|
||||
}
|
||||
|
||||
15% {
|
||||
top: 10px;
|
||||
left: 10px;
|
||||
}
|
||||
|
||||
60% {
|
||||
top: 5px;
|
||||
left: -10px;
|
||||
}
|
||||
|
||||
75% {
|
||||
top: -5px;
|
||||
left: 20px;
|
||||
}
|
||||
|
||||
100% {
|
||||
top: 10px;
|
||||
left: 5px;
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes opacity {
|
||||
0% {
|
||||
opacity: 0.1;
|
||||
}
|
||||
|
||||
5% {
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
30% {
|
||||
opacity: 0.4;
|
||||
}
|
||||
|
||||
45% {
|
||||
opacity: 0.6;
|
||||
}
|
||||
|
||||
76% {
|
||||
opacity: 0.4;
|
||||
}
|
||||
|
||||
90% {
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
||||
1%,
|
||||
7%,
|
||||
33%,
|
||||
47%,
|
||||
78%,
|
||||
93% {
|
||||
opacity: 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@keyframes font {
|
||||
0% {
|
||||
font-weight: 100;
|
||||
color: #e0287d;
|
||||
filter: blur(3px);
|
||||
}
|
||||
|
||||
20% {
|
||||
font-weight: 500;
|
||||
color: #fff;
|
||||
filter: blur(0);
|
||||
}
|
||||
|
||||
50% {
|
||||
font-weight: 300;
|
||||
color: #1bc7fb;
|
||||
filter: blur(2px);
|
||||
}
|
||||
|
||||
60% {
|
||||
font-weight: 700;
|
||||
color: #fff;
|
||||
filter: blur(0);
|
||||
}
|
||||
|
||||
90% {
|
||||
font-weight: 500;
|
||||
color: #e0287d;
|
||||
filter: blur(6px);
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,7 @@ files = ARGV
|
||||
code_paths = Dir.entries(root_path).select do |f|
|
||||
File.directory?(File.join(root_path, f)) and f != '.' and f != '..'
|
||||
end.to_set
|
||||
code_paths += JSON.parse(File.read(data_file)).keys if File.exists? data_file
|
||||
code_paths += JSON.parse(File.read(data_file)).keys if File.exist? data_file
|
||||
# Extending code_paths from submodules.json means that nested Agda modules
|
||||
# have their root dir correctly set.
|
||||
|
||||
@@ -60,7 +60,7 @@ files_for_paths.each do |path, files|
|
||||
FileUtils.mkdir_p html_dir
|
||||
|
||||
files.each do |file|
|
||||
command = "#{ARGV[0]} --local-interfaces #{file} --html --html-dir=#{html_dir}"
|
||||
command = "#{ARGV[0]} #{file} --html --html-dir=#{html_dir}"
|
||||
puts command
|
||||
puts `#{command}`
|
||||
|
||||
|
||||
49
chatgpt-subset-feather-icon.rb
Normal file
49
chatgpt-subset-feather-icon.rb
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env ruby
|
||||
# frozen_string_literal: true
|
||||
|
||||
require 'nokogiri'
|
||||
require 'set'
|
||||
|
||||
# 1) Process all files passed in from the command line
|
||||
svgpath = ARGV[0]
|
||||
files = ARGV[1..]
|
||||
|
||||
# 2) Extract used Feather icons
|
||||
used_icons = Set.new
|
||||
|
||||
files.each do |file|
|
||||
# Parse each HTML file
|
||||
doc = File.open(file, "r:UTF-8") { |f| Nokogiri::HTML(f) }
|
||||
|
||||
# Look for <use xlink:href="/feather-sprite.svg#iconName">
|
||||
doc.css("use").each do |use_tag|
|
||||
href = use_tag["xlink:href"] || use_tag["href"]
|
||||
if href && href.start_with?("/feather-sprite.svg#")
|
||||
icon_name = href.split("#").last
|
||||
used_icons << icon_name
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
puts "Found #{used_icons.size} unique icons: #{used_icons.to_a.join(', ')}"
|
||||
|
||||
# 3) Load the full feather-sprite.svg as XML
|
||||
sprite_doc = File.open(svgpath, "r:UTF-8") { |f| Nokogiri::XML(f) }
|
||||
|
||||
# 4) Create a new SVG with only the required symbols
|
||||
new_svg = Nokogiri::XML::Document.new
|
||||
svg_tag = Nokogiri::XML::Node.new("svg", new_svg)
|
||||
svg_tag["xmlns"] = "http://www.w3.org/2000/svg"
|
||||
new_svg.add_child(svg_tag)
|
||||
|
||||
sprite_doc.css("symbol").each do |symbol_node|
|
||||
if used_icons.include?(symbol_node["id"])
|
||||
# Duplicate the symbol node (so it can be inserted in the new document)
|
||||
svg_tag.add_child(symbol_node.dup)
|
||||
end
|
||||
end
|
||||
|
||||
# 5) Save the subset sprite
|
||||
File.open(svgpath, "w:UTF-8") do |f|
|
||||
f.write(new_svg.to_xml)
|
||||
end
|
||||
69
chatgpt-subset-one-go.py
Normal file
69
chatgpt-subset-one-go.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import os
|
||||
import sys
|
||||
from bs4 import BeautifulSoup
|
||||
from fontTools.subset import Subsetter, Options
|
||||
from fontTools.ttLib import TTFont
|
||||
|
||||
FONT_EXTENSIONS = (".ttf", ".woff", ".woff2", ".otf") # Font file types
|
||||
|
||||
def extract_text_from_html(file_path):
|
||||
"""Extract text content from a single HTML file."""
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
soup = BeautifulSoup(f.read(), "html.parser")
|
||||
return soup.get_text()
|
||||
|
||||
def get_used_characters(files):
|
||||
"""Collect unique characters from all .html files in the given directory."""
|
||||
char_set = set()
|
||||
for file in files:
|
||||
text = extract_text_from_html(file)
|
||||
char_set.update(text)
|
||||
return "".join(sorted(char_set))
|
||||
|
||||
def find_font_files(directory):
|
||||
"""Find all font files in the given directory, recursively."""
|
||||
font_files = []
|
||||
for root, _, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith(FONT_EXTENSIONS):
|
||||
font_files.append(os.path.join(root, file))
|
||||
return font_files
|
||||
|
||||
def subset_font_in_place(font_path, characters):
|
||||
"""Subsets the given font file to include only the specified characters."""
|
||||
# Convert characters to their integer code points
|
||||
unicode_set = {ord(c) for c in characters}
|
||||
|
||||
font = TTFont(font_path)
|
||||
options = Options()
|
||||
options.drop_tables += ["DSIG"]
|
||||
options.drop_tables += ["LTSH", "VDMX", "hdmx", "gasp"]
|
||||
options.unicodes = unicode_set
|
||||
options.variations = False
|
||||
options.drop_variations = True
|
||||
options.layout_features = ["*"] # keep all OT features
|
||||
options.hinting = False
|
||||
|
||||
# Preserve original format if it was WOFF/WOFF2
|
||||
if font_path.endswith(".woff2"):
|
||||
options.flavor = "woff2"
|
||||
elif font_path.endswith(".woff"):
|
||||
options.flavor = "woff"
|
||||
|
||||
subsetter = Subsetter(options)
|
||||
subsetter.populate(unicodes=unicode_set)
|
||||
subsetter.subset(font)
|
||||
|
||||
# Overwrite the original font file
|
||||
font.save(font_path)
|
||||
print(f"Subsetted font in place: {font_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
used_chars = get_used_characters(sys.argv[2:])
|
||||
print(f"Extracted {len(used_chars)} unique characters from {len(sys.argv[2:])} HTML files.")
|
||||
|
||||
font_files = find_font_files(sys.argv[1])
|
||||
print(f"Found {len(font_files)} font files to subset.")
|
||||
|
||||
for font_file in font_files:
|
||||
subset_font_in_place(font_file, used_chars)
|
||||
Submodule code/agda-spa updated: f0da9a9020...9131214880
@@ -44,8 +44,6 @@ defaultContentLanguage = 'en'
|
||||
languageCode = "en-us"
|
||||
|
||||
[params]
|
||||
bergamotJsUrl = "https://static.danilafe.com/bergamot/bergamot.js"
|
||||
katexJsUrl = "https://static.danilafe.com/katex/katex.min.js"
|
||||
plausibleAnalyticsDomain = "danilafe.com"
|
||||
githubUsername = "DanilaFe"
|
||||
siteSourceUrl = "https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master"
|
||||
|
||||
@@ -88,7 +88,12 @@ I'd like to cover the following major topics, spending a couple of posts on each
|
||||
which is not at all compiler-centric. We need to connect these two, showing
|
||||
that the CFGs we produce "make sense" for our language, and that given
|
||||
CFGs that make sense, our analysis produces results that match the language's
|
||||
execution.
|
||||
execution. To do so, I write about the language and its semantics
|
||||
in {{< draftlink "Part 5: Our Programming Language" "05_spa_agda_semantics" >}},
|
||||
then about building control flow graphs for the language in
|
||||
{{< draftlink "Part 6: Control Flow Graphs" "06_spa_agda_cfg" >}}.
|
||||
I then write about combining these two representations in
|
||||
{{< draftlink "Part 7: Connecting Semantics and Control Flow Graphs" "07_spa_agda_semantics_and_cfg" >}}.
|
||||
|
||||
|
||||
### Navigation
|
||||
@@ -98,3 +103,8 @@ Here are the posts that I’ve written so far for this series:
|
||||
* {{< draftlink "Combining Lattices" "02_spa_agda_combining_lattices" >}}
|
||||
* {{< draftlink "Lattices of Finite Height" "03_spa_agda_fixed_height" >}}
|
||||
* {{< draftlink "The Fixed-Point Algorithm" "04_spa_agda_fixedpoint" >}}
|
||||
* {{< draftlink "Our Programming Language" "05_spa_agda_semantics" >}}
|
||||
* {{< draftlink "Control Flow Graphs" "06_spa_agda_cfg" >}}
|
||||
* {{< draftlink "Connecting Semantics and Control Flow Graphs" "07_spa_agda_semantics_and_cfg" >}}
|
||||
* {{< draftlink "Forward Analysis" "08_spa_agda_forward" >}}
|
||||
* {{< draftlink "Verifying the Forward Analysis" "09_spa_agda_verified_forward" >}}
|
||||
|
||||
@@ -4,11 +4,16 @@ date: 2022-06-26T18:36:01-07:00
|
||||
tags: ["Type Systems", "Programming Languages"]
|
||||
series: "Everything I Know About Types"
|
||||
draft: true
|
||||
bergamot:
|
||||
render_presets:
|
||||
default: "bergamot/rendering/lc.bergamot"
|
||||
presets:
|
||||
intro:
|
||||
prompt: "type(TERM, ?t)"
|
||||
query: ""
|
||||
file: "intro.bergamot"
|
||||
---
|
||||
|
||||
{{< katex_component_js >}}
|
||||
{{< bergamot_js_css >}}
|
||||
|
||||
I am in love with types and type systems. They are, quite probably,
|
||||
my favorite concept in the world. Most of us mere
|
||||
mortals use types as a way to make sure we aren't writing
|
||||
@@ -171,12 +176,6 @@ to the tool than to type theory itself; I will denote these exercises as such wh
|
||||
possible. Also, whenever the context of the exercise can be loaded into
|
||||
Bergamot, I will denote this with a play button.
|
||||
|
||||
{{< bergamot_preset name="intro-preset" prompt="PromptConverter @ prompt(type(?term, ?t)) <- input(?term);" >}}
|
||||
TNumber @ type(lit(?n), number) <- num(?n);
|
||||
TPlusI @ type(plus(?e_1, ?e_2), number) <-
|
||||
type(?e_1, number), type(?e_2, number);
|
||||
{{< /bergamot_preset >}}
|
||||
|
||||
{{< bergamot_exercise label="bergamot; sample exercise" preset="intro-preset" id="exercise-2" >}}
|
||||
{{< bergamot_exercise label="bergamot; sample exercise" preset="intro" id="exercise-2" >}}
|
||||
Try typing `1+1` into the input field!
|
||||
{{< /bergamot_exercise >}}
|
||||
3
content/blog/00_types_intro/intro.bergamot
Normal file
3
content/blog/00_types_intro/intro.bergamot
Normal file
@@ -0,0 +1,3 @@
|
||||
TNumber @ type(lit(?n), number) <- num(?n);
|
||||
TPlusI @ type(plus(?e_1, ?e_2), number) <-
|
||||
type(?e_1, number), type(?e_2, number);
|
||||
@@ -79,6 +79,7 @@ a less specific output! The more you know going in, the more you should know
|
||||
coming out. Similarly, when given less specific / vaguer information, the
|
||||
analysis shouldn't produce a more specific answer -- how could it do that?
|
||||
This leads us to come up with the following rule:
|
||||
{#define-monotonicity}
|
||||
|
||||
{{< latex >}}
|
||||
\textbf{if}\ \text{input}_1 \le \text{input}_2,
|
||||
@@ -138,7 +139,7 @@ Moreover, the value we'll get out of the fixed point algorithm will be
|
||||
the _least fixed point_. For us, this means that the result will be
|
||||
"the most specific result possible".
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Fixedpoint.agda" 80 80 >}}
|
||||
{{< codelines "Agda" "agda-spa/Fixedpoint.agda" 86 86 >}}
|
||||
|
||||
The above explanation omits a lot of details, but it's a start. To get more
|
||||
precise, we must drill down into several aspects of what I've said so far.
|
||||
@@ -203,6 +204,7 @@ element that's greater (less specific) than either `s1` or `s2`". Conventionally
|
||||
this function is written as \(a \sqcup b\) (or in our case, \(s_1 \sqcup s_2\)).
|
||||
The \((\sqcup)\) symbol is also called the _join_ of \(a\) and \(b\).
|
||||
We can define it for our signs so far using the following [Cayley table](https://en.wikipedia.org/wiki/Cayley_table).
|
||||
{#least-upper-bound}
|
||||
|
||||
{{< latex >}}
|
||||
\begin{array}{c|cccc}
|
||||
@@ -342,7 +344,7 @@ the _absorption laws_:
|
||||
|
||||
In Agda, we can therefore write a lattice as follows:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Lattice.agda" 153 163 >}}
|
||||
{{< codelines "Agda" "agda-spa/Lattice.agda" 183 193 >}}
|
||||
|
||||
### Concrete Examples
|
||||
#### Natural Numbers
|
||||
|
||||
19
content/blog/01_types_basics/conversion.bergamot
Normal file
19
content/blog/01_types_basics/conversion.bergamot
Normal file
@@ -0,0 +1,19 @@
|
||||
section "Conversion rules" {
|
||||
ConvertsIS @ converts(integer, string) <-;
|
||||
ConvertsIF @ converts(integer, float) <-;
|
||||
ConvertsFS @ converts(float, string) <-;
|
||||
}
|
||||
|
||||
section "Rules for literals" {
|
||||
TInt @ type(lit(?n), integer) <- int(?n);
|
||||
TFloat @ type(lit(?f), float) <- float(?f);
|
||||
TString @ type(lit(?s), string) <- str(?s);
|
||||
}
|
||||
|
||||
section "" {
|
||||
TPlusInt @ type(plus(?e_1, ?e_2), integer) <- type(?e_1, integer), type(?e_2, integer);
|
||||
TPlusFloat @ type(plus(?e_1, ?e_2), float) <- type(?e_1, float), type(?e_2, float);
|
||||
TPlusString @ type(plus(?e_1, ?e_2), string) <- type(?e_1, string), type(?e_2, string);
|
||||
}
|
||||
|
||||
TConverts @ type(?e, ?tau_2) <- type(?e, ?tau_1), converts(?tau_1, ?tau_2);
|
||||
@@ -4,10 +4,24 @@ date: 2022-06-30T19:08:50-07:00
|
||||
tags: ["Type Systems", "Programming Languages"]
|
||||
series: "Everything I Know About Types"
|
||||
draft: true
|
||||
---
|
||||
bergamot:
|
||||
render_presets:
|
||||
default: "bergamot/rendering/lc.bergamot"
|
||||
presets:
|
||||
notation:
|
||||
prompt: "type(TERM, ?t)"
|
||||
query: ""
|
||||
file: "notation.bergamot"
|
||||
string:
|
||||
prompt: "type(TERM, ?t)"
|
||||
query: "\"hello\"+\"world\""
|
||||
file: "string.bergamot"
|
||||
conversion:
|
||||
prompt: "type(TERM, ?t)"
|
||||
query: ""
|
||||
file: "conversion.bergamot"
|
||||
|
||||
{{< katex_component_js >}}
|
||||
{{< bergamot_js_css >}}
|
||||
---
|
||||
|
||||
It's finally time to start looking at types. As I mentioned, I want
|
||||
to take an approach that draws a variety of examples from the real
|
||||
@@ -114,11 +128,7 @@ Another consequence of this is that not everyone agrees on notation; according
|
||||
to [this paper](https://labs.oracle.com/pls/apex/f?p=LABS:0::APPLICATION_PROCESS%3DGETDOC_INLINE:::DOC_ID:959),
|
||||
27 different ways of writing down substitutions were observed in the POPL conference alone.
|
||||
|
||||
{{< bergamot_preset name="notation-preset" prompt="PromptConverter @ prompt(type(?term, ?t)) <- input(?term);" >}}
|
||||
TNumber @ type(lit(?n), number) <- num(?n);
|
||||
{{< /bergamot_preset >}}
|
||||
|
||||
{{< bergamot_exercise label="bergamot; tweaking notation" preset="notation-preset" id="exercise-1" >}}
|
||||
{{< bergamot_exercise label="bergamot; tweaking notation" preset="notation" id="exercise-1" >}}
|
||||
Bergamot, the interactive tool I've developed for doing exercises, supports
|
||||
customizing the notation for rules. Try changing the \(:\) symbol to
|
||||
the \(\sim\) symbol (denoted in latex as `\sim`).
|
||||
@@ -317,13 +327,7 @@ This rule is read as follows:
|
||||
|
||||
> If \(e_1\) and \(e_2\) have type \(\text{string}\), then \(e_1+e_2\) has type \(\text{string}\).
|
||||
|
||||
{{< bergamot_preset name="string-preset" prompt="PromptConverter @ prompt(type(?term, ?t)) <- input(?term);" query="\"hello\"+\"world\"">}}
|
||||
TNumber @ type(lit(?n), number) <- num(?n);
|
||||
TPlusI @ type(plus(?e_1, ?e_2), number) <-
|
||||
type(?e_1, number), type(?e_2, number);
|
||||
{{< /bergamot_preset >}}
|
||||
|
||||
{{< bergamot_exercise label="bergamot; adding rules for strings" preset="string-preset" id="exercise-2" >}}
|
||||
{{< bergamot_exercise label="bergamot; adding rules for strings" preset="string" id="exercise-2" >}}
|
||||
Try writing the Bergamot rules that correspond to the inference rule for strings
|
||||
above. I've provided the rules for numbers; the rules for strings should be quite
|
||||
similar.
|
||||
@@ -384,29 +388,7 @@ from the conversion rules. Chapter 15 of _Types and Programming Languages_
|
||||
by Benjamin Pierce is a nice explanation, but the [Wikipedia page](https://en.wikipedia.org/wiki/Subtyping)
|
||||
ain't bad, either.
|
||||
|
||||
{{< bergamot_preset name="conversion-preset" prompt="PromptConverter @ prompt(type(?term, ?t)) <- input(?term);" >}}
|
||||
section "Conversion rules" {
|
||||
ConvertsIS @ converts(integer, string) <-;
|
||||
ConvertsIF @ converts(integer, float) <-;
|
||||
ConvertsFS @ converts(float, string) <-;
|
||||
}
|
||||
|
||||
section "Rules for literals" {
|
||||
TInt @ type(lit(?n), integer) <- int(?n);
|
||||
TFloat @ type(lit(?f), float) <- float(?f);
|
||||
TString @ type(lit(?s), string) <- str(?s);
|
||||
}
|
||||
|
||||
section "" {
|
||||
TPlusInt @ type(plus(?e_1, ?e_2), integer) <- type(?e_1, integer), type(?e_2, integer);
|
||||
TPlusFloat @ type(plus(?e_1, ?e_2), float) <- type(?e_1, float), type(?e_2, float);
|
||||
TPlusString @ type(plus(?e_1, ?e_2), string) <- type(?e_1, string), type(?e_2, string);
|
||||
}
|
||||
|
||||
TConverts @ type(?e, ?tau_2) <- type(?e, ?tau_1), converts(?tau_1, ?tau_2);
|
||||
{{< /bergamot_preset >}}
|
||||
|
||||
{{< bergamot_exercise label="advanced; a taste of conversions" preset="conversion-preset" id="exercise-3" >}}
|
||||
{{< bergamot_exercise label="advanced; a taste of conversions" preset="conversion" id="exercise-3" >}}
|
||||
This exercise is simply an early taste of formalizing conversions, which
|
||||
allow users to (for example) write numbers where the language expects strings, with the
|
||||
understanding that the number will be automatically turned into a string.
|
||||
@@ -564,7 +546,7 @@ and already be up-to-speed on a big chunk of the content.
|
||||
| {{< latex >}}\frac{e_1 : \text{number}\quad e_2 : \text{number}}{e_1+e_2 : \text{number}} {{< /latex >}}| Adding numbers gives a number |
|
||||
|
||||
#### Playground
|
||||
{{< bergamot_widget id="widget" query="" prompt="PromptConverter @ prompt(type(?term, ?t)) <- input(?term);" >}}
|
||||
{{< bergamot_widget id="widget" query="" prompt="type(TERM, ?t)" >}}
|
||||
section "" {
|
||||
TNumber @ type(lit(?n), number) <- num(?n);
|
||||
TString @ type(lit(?s), string) <- str(?s);
|
||||
1
content/blog/01_types_basics/notation.bergamot
Normal file
1
content/blog/01_types_basics/notation.bergamot
Normal file
@@ -0,0 +1 @@
|
||||
TNumber @ type(lit(?n), number) <- num(?n);
|
||||
3
content/blog/01_types_basics/string.bergamot
Normal file
3
content/blog/01_types_basics/string.bergamot
Normal file
@@ -0,0 +1,3 @@
|
||||
TNumber @ type(lit(?n), number) <- num(?n);
|
||||
TPlusI @ type(plus(?e_1, ?e_2), number) <-
|
||||
type(?e_1, number), type(?e_2, number);
|
||||
@@ -339,9 +339,8 @@ this means the rule applies to (object) variables declared to have type
|
||||
our system. A single rule takes care of figuring the types of _all_
|
||||
variables.
|
||||
|
||||
{{< todo >}}
|
||||
The rest of this, but mostly statements.
|
||||
{{< /todo >}}
|
||||
> [!TODO]
|
||||
> The rest of this, but mostly statements.
|
||||
|
||||
### This Page at a Glance
|
||||
#### Metavariables
|
||||
|
||||
@@ -97,7 +97,8 @@ ordering relation `R`/`<` are expected to play together nicely (if `a < b`, and
|
||||
{{< codelines "agda" "agda-spa/Chain.agda" 3 7 >}}
|
||||
|
||||
From there, the definition of the `Chain` data type is much like the definition
|
||||
of a vector, but indexed by the endpoints, and containing witnesses of `R`/`<`
|
||||
of [a vector from `Data.Vec`](https://agda.github.io/agda-stdlib/v2.0/Data.Vec.Base.html#1111),
|
||||
but indexed by the endpoints, and containing witnesses of `R`/`<`
|
||||
between its elements. The indexing allows for representing
|
||||
the type of chains between particular lattice elements, and serves to ensure
|
||||
concatenation and other operations don't merge disparate chains.
|
||||
@@ -125,7 +126,7 @@ we could take the previous natural number, and it would be a bound as well).
|
||||
Thus, I define the `Height` predicate to require that a chain of the desired
|
||||
height exists, and that this height bounds the length of all other chains.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Chain.agda" 47 48 >}}
|
||||
{{< codelines "agda" "agda-spa/Chain.agda" 47 53 >}}
|
||||
|
||||
Finally, for a lattice to have a finite height, the type of chains formed by using
|
||||
its less-than operator needs to have that height (satisfy the `Height h` predicate).
|
||||
@@ -133,7 +134,7 @@ To avoid having to thread through the equivalence relation, congruence proof,
|
||||
and more, I define a specialized predicate for lattices specifically.
|
||||
I do so as a "method" in my `IsLattice` record.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice.agda" 153 180 "hl_lines = 27 28">}}
|
||||
{{< codelines "agda" "agda-spa/Lattice.agda" 183 210 "hl_lines = 27 28">}}
|
||||
|
||||
Thus, bringing the operators and other definitions of `IsLattice` into scope
|
||||
will also bring in the `FixedHeight` predicate.
|
||||
@@ -192,7 +193,7 @@ three or more comparisons.
|
||||
Thus, the above-below lattice has a length of two comparisons (or alternatively,
|
||||
three elements).
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/AboveBelow.agda" 357 358 >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/AboveBelow.agda" 357 363 >}}
|
||||
|
||||
And that's it.
|
||||
|
||||
@@ -262,7 +263,7 @@ be collapsed into \(f(a) \not< f(a)\), changing its length. Finally,
|
||||
the function needs to produce equivalent outputs when giving equivalent inputs.
|
||||
The result is the following lemma:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice.agda" 196 217 >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice.agda" 226 247 >}}
|
||||
|
||||
Given this, and two lattices of finite height, we construct the full product
|
||||
chain by lifting the `A` chain into the product via \(a \mapsto (a, \bot_2)\),
|
||||
@@ -270,7 +271,7 @@ lifting the `B` chain into the product via \(b \mapsto (\top_1, b)\), and
|
||||
concatenating the results. This works because the first chain ends with
|
||||
\((\top_1, \bot_2)\), and the second starts with it.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/Prod.agda" 177 179 >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/Prod.agda" 169 171 >}}
|
||||
|
||||
This gets us the longest chain; what remains is to prove that this chain's
|
||||
length is the bound of all other changes. To do so, we need to work in
|
||||
@@ -292,15 +293,15 @@ and will put the implementation in a collapsible detail block. One last
|
||||
detail is that the need to decide which chain to grow --- and thus which element
|
||||
has increased --- is what introduces the need for decidable equality.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/Prod.agda" 158 158 >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/Prod.agda" 149 149 >}}
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/Prod.agda" 158 172 "" "**(Click here for the implementation of `unzip`)**" >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/Prod.agda" 149 163 "" "**(Click here for the implementation of `unzip`)**" >}}
|
||||
|
||||
Having decomposed the product chain into constituent chains, we simply combine
|
||||
the facts that they have to be bounded by the height of the `A` and `B` lattices,
|
||||
as well as the fact that they bound the combined chain.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/Prod.agda" 174 183 "hl_lines = 8-9" >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/Prod.agda" 165 175 "hl_lines = 8-10" >}}
|
||||
|
||||
This completes the proof!
|
||||
|
||||
@@ -326,7 +327,7 @@ lattices as input.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/IterProd.agda" 7 11 >}}
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/IterProd.agda" 21 22 >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/IterProd.agda" 23 24 >}}
|
||||
|
||||
At a high level, the proof goes by induction on the number of applications
|
||||
of the product. There's just one trick. I'd like to build up an `isLattice`
|
||||
@@ -347,18 +348,18 @@ To build up with the lattice instance and --- if possible --- the finite height
|
||||
instance, I needed to allow for the constituent lattices being either finite
|
||||
or infinite. I supported this by defining a helper type:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/IterProd.agda" 34 40 >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/IterProd.agda" 40 55 >}}
|
||||
|
||||
Then, I defined the "everything at once" type, in which, instead of
|
||||
a field for the proof of finite height, has a field that constructs
|
||||
this proof _if the necessary additional information is present_.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/IterProd.agda" 42 55 >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/IterProd.agda" 57 76 >}}
|
||||
|
||||
Finally, the proof by induction. It's actually relatively long, so I'll
|
||||
include it as a collapsible block.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/IterProd.agda" 57 95 "" "**(Click here to expand the inductive proof)**" >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/IterProd.agda" 78 120 "" "**(Click here to expand the inductive proof)**" >}}
|
||||
|
||||
### Fixed Height of the Map Lattice
|
||||
|
||||
@@ -472,11 +473,11 @@ As you can tell, the chain porting mechanism is doing the heavy lifting here.
|
||||
It's relatively easy to implement given the conditions we've set on
|
||||
conversion functions, in both directions:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Isomorphism.agda" 51 63 >}}
|
||||
{{< codelines "agda" "agda-spa/Isomorphism.agda" 52 64 >}}
|
||||
|
||||
With that, we can prove the second lattice's finite height:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Isomorphism.agda" 65 72 >}}
|
||||
{{< codelines "agda" "agda-spa/Isomorphism.agda" 66 80 >}}
|
||||
|
||||
|
||||
The conversion functions are also not too difficult to define. I give
|
||||
@@ -485,12 +486,12 @@ properties (such as the fact that `from` and `to` are inverses, preserve
|
||||
equivalence, and distribute over join) here. You can view them by clicking
|
||||
the link at the top of the code block below.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/FiniteValueMap.agda" 67 84 >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/FiniteValueMap.agda" 68 85 >}}
|
||||
|
||||
Above, `FiniteValueMap ks` is the type of maps whose keys are fixed to
|
||||
`ks`; defined as follows:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/FiniteMap.agda" 50 52 >}}
|
||||
{{< codelines "agda" "agda-spa/Lattice/FiniteMap.agda" 58 60 >}}
|
||||
|
||||
Proving the remaining properties (which as I mentioned, I omit from
|
||||
the main body of the post) is sufficient to apply the isomorphism,
|
||||
@@ -500,7 +501,7 @@ proving that maps with finite keys are of a finite height.
|
||||
### Using the Finite Height Property
|
||||
|
||||
Lattices having a finite height is a crucial property for the sorts of
|
||||
static program analyses I've been working to implement.
|
||||
static program analyses I've been working to implement.
|
||||
We can create functions that traverse "up" through the lattice,
|
||||
creating larger values each time. If these lattices are of a finite height,
|
||||
then the static analyses functions can only traverse "so high".
|
||||
|
||||
240
content/blog/04_spa_agda_fixedpoint.md
Normal file
240
content/blog/04_spa_agda_fixedpoint.md
Normal file
@@ -0,0 +1,240 @@
|
||||
---
|
||||
title: "Implementing and Verifying \"Static Program Analysis\" in Agda, Part 4: The Fixed-Point Algorithm"
|
||||
series: "Static Program Analysis in Agda"
|
||||
description: "In this post, I show how to find the least fixed point of a finite-height lattice"
|
||||
date: 2024-11-03T17:50:26-08:00
|
||||
tags: ["Agda", "Programming Languages"]
|
||||
---
|
||||
|
||||
In the preivous post we looked at lattices of finite height, which are a crucial
|
||||
ingredient to our static analyses. In this post, I will describe the specific
|
||||
algorithm that makes use of these lattices; this algorithm will be at the core
|
||||
of this series.
|
||||
|
||||
Lattice-based static analyses tend to operate by iteratively combining facts
|
||||
from the program into new ones. For instance, when analyzing `y = 1 + 2`, we
|
||||
take the (trivial) facts that the numbers one and two are positive, and combine
|
||||
them into the knowledge that `y` is positive as well. If another line of code
|
||||
reads `x = y + 1`, we then apply our new knowledge of `y` to determine the sign
|
||||
of `x`, too. Combining facs in this manner gives us more information, which we
|
||||
can then continue to apply to learn more about the program.
|
||||
|
||||
A static program analyzer, however, is a very practical thing. Although in
|
||||
mathemaitics we may allow ourselves to delve into infinite algorithms, we have
|
||||
no such luxury while trying to, say, compile some code. As a result, after
|
||||
a certain point, we need to stop our iterative (re)combination of facts. In
|
||||
an ideal world, that point would be when we know we have found out everything we
|
||||
could about the program. A corollary to that would be that this point must
|
||||
be guaranteed to eventually occur, lest we keep looking for it indenfinitely.
|
||||
|
||||
The fixed-point algorithm does this for us. If we describe our analysis as
|
||||
a monotonic function over a finite-height lattice, this algorithm gives
|
||||
us a surefire way to find out facts about our program that constitute "complete"
|
||||
information that can't be re-inspected to find out more. The algorithm is guaranteed
|
||||
to terminate, which means that we will not get stuck in an infinite loop.
|
||||
|
||||
### The Algorithm
|
||||
Take a lattice \(L\) and a monotonic function \(f\). We've
|
||||
[talked about monotonicity before]({{< relref "01_spa_agda_lattices#define-monotonicity" >}}),
|
||||
but it's easy to re-state. Specifically, a function is monotonic if the following
|
||||
rule holds true:
|
||||
|
||||
{{< latex >}}
|
||||
\textbf{if}\ a \le b\ \textbf{then}\ f(a) \le f(b)
|
||||
{{< /latex >}}
|
||||
|
||||
Recall that the less-than relation on lattices in our case
|
||||
[encodes specificity]({{< relref "01_spa_agda_lattices#specificity" >}}).
|
||||
In particular, if elements of our lattice describe our program, than smaller
|
||||
elements should provide more precise descriptions (where "`x` is potive"
|
||||
is more precise than "`x` has any sign", for example). Viewed through this
|
||||
lens, monotonicity means that more specific inputs produce more specific outputs.
|
||||
That seems reasonable.
|
||||
|
||||
Now, let's start with the least element of our lattice, denoted \(\bot\).
|
||||
A lattice of finite height is guaranteed to have such an element. If it didn't,
|
||||
we could always extend chains by tacking on a smaller element to their bottom,
|
||||
and then the lattice wouldn't have a finite height anymore.
|
||||
{#start-least}
|
||||
|
||||
Now, apply \(f\) to \(\bot\) to get \(f(\bot)\). Since \(\bot\) is the least
|
||||
element, it must be true that \(\bot \le f(\bot)\). Now, if it's "less than or equal",
|
||||
is it "less than", or is "equal")? If it's the latter, we have \(\bot = f(\bot)\).
|
||||
This means we've found a fixed point: given our input \(\bot\) our analysis \(f\)
|
||||
produced no new information, and we're done. Otherwise, we are not done, but we
|
||||
know that \(\bot < f(\bot)\), which will be helpful shortly.
|
||||
|
||||
Continuing the "less than" case, we can apply \(f\) again, this time to \(f(\bot)\).
|
||||
This gives us \(f(f(\bot))\). Since \(f\) is monotonic and \(\bot \le f(\bot)\), we know
|
||||
also that \(f(\bot) \le f(f(\bot))\). Again, ask "which is it?", and as before, if
|
||||
\(f(\bot) = f(f(\bot))\), we have found a fixed point. Otherwise, we know that
|
||||
\(f(\bot) < f(f(\bot))\).
|
||||
|
||||
We can keep doing this. Notice that with each step, we are either done
|
||||
(having found a fixed point) or we have a new inequality in our hands. We can
|
||||
arrange the ones we've seen so far into a chain:
|
||||
|
||||
{{< latex >}}
|
||||
\bot < f(\bot) < f(f(\bot)) < ...
|
||||
{{< /latex >}}
|
||||
|
||||
Each time we fail to find a fixed point, we add one element to our chain, growing
|
||||
it. But if our lattice \(L\) has a finite height, that means eventually this
|
||||
process will have to stop; the chain can't grow forever. Eventually, we will
|
||||
have to find a value such that \(v = f(v)\). Thus, our algorithm is guaranteed
|
||||
to terminate, and give a fixed point.
|
||||
|
||||
I implemented the iterative process of applying \(f\) using a recursive function.
|
||||
Agda has a termination checker, to which the logic above --- which proves that
|
||||
iteration will eventually finish --- is not at all obvious. The trick to
|
||||
getting it to work was to use a notion of "gas": an always-decreasing value
|
||||
that serves as one of the functions' arguments. Since the value is always decreasing
|
||||
in size, the termination checker is satisfied.
|
||||
|
||||
This works by observing that we already have a rough idea of the maximum number
|
||||
of times our function will recurse; that would be the height of the lattice.
|
||||
After that, we would be building an impossibly long chain. So, we'll give the
|
||||
function a "budget" of that many iterations, plus one more. Since the chain
|
||||
increases once each time the budget shrinks (indicating recursion), running
|
||||
out of our "gas" will mean that we built an impossibly long chain --- it
|
||||
will provably never happen.
|
||||
|
||||
In all, the recursive function is as follows:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Fixedpoint.agda" 53 64 >}}
|
||||
|
||||
The first case handles running out of gas, arguing by bottom-elimination (contradiction).
|
||||
The second case follows the algorithm I've described pretty closely; it
|
||||
applies \(f\) to an existing value, checks if the result is equal (equivalent)
|
||||
to the original, and if it isn't, it grows the existing chain of elements
|
||||
and invokes the step function rescurisvely with the grown chain and less gas.
|
||||
|
||||
The recursive function implements a single "step" of the process (applying `f`,
|
||||
comparing for equality, returning the fixed point if one was found). All that's
|
||||
left is to kick off the process using \(\bot\). This is what `fix` does:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Fixedpoint.agda" 66 67 >}}
|
||||
|
||||
This functions is responsible for providing gas to `doStep`; as I mentioned above,
|
||||
it provides just a bit more gas than the maximum-length chain, which means that
|
||||
if the gas is exhausted, we've certainly arrived at a contradiction. It also
|
||||
provides an initial chain onto which `doStep` will keep tacking on new inequalities
|
||||
as it finds them. Since we haven't found any yet, this is the single-element
|
||||
chain of \(\bot\). The last thing is does is set up the recursion invariant
|
||||
(that the sum of the gas and the chain length is constant), and provides
|
||||
a proof that \(\bot \le f(\bot)\). This function always returns a fixed point.
|
||||
|
||||
### Least Fixed Point
|
||||
Functions can have many fixed points. Take the identity function that simply
|
||||
returns its argument unchanged; this function has a fixed point for every
|
||||
element in its domain, since, for example, \(\text{id}(1) = 1\), \(\text{id}(2) = 2\), etc.
|
||||
The fixed point found by our algorithm above is somewhat special among the
|
||||
possible fixed points of \(f\): it is the _least fixed point_ of the function.
|
||||
Call our fixed point \(a\); if there's another point \(b\) such that \(b=f(b)\),
|
||||
then the fixed point we found must be less than or equal to \(b\) (that is,
|
||||
\(a \le b\)). This is important given our interpretation of "less than" as "more specific":
|
||||
the fixedpoint algorithm produces the most specific possible information about
|
||||
our program given the rules of our analysis.
|
||||
|
||||
The proof is simple; suppose that it took \(k\) iterations of calling \(f\)
|
||||
to arrive at our fixed point. This gives us:
|
||||
|
||||
{{< latex >}}
|
||||
a = \underbrace{f(f(...(f(}_{k\ \text{times}}\bot)))) = f^k(\bot)
|
||||
{{< /latex >}}
|
||||
|
||||
Now, take our other fixed point \(b\). Since \(\bot\) is the least element of
|
||||
the lattice, we have \(\bot \le b\).
|
||||
|
||||
{{< latex >}}
|
||||
\begin{array}{ccccccccr}
|
||||
& & \bot & \le & & & b & \quad \implies & \text{(monotonicity of}\ f \text{)}\\
|
||||
& & f(\bot) & \le & f(b) & = & b & \quad \implies & \text{(} b\ \text{is a fixed point, monotonicity of}\ f \text{)}\\
|
||||
& & f^2(\bot) & \le & f(b) & = & b & \quad \implies & \text{(} b\ \text{is a fixed point, monotonicity of}\ f \text{)}\\
|
||||
\\
|
||||
& & \vdots & & \vdots & & \vdots & & \\
|
||||
\\
|
||||
a & = & f^k(\bot) & \le & f(b) & = & b &
|
||||
\end{array}
|
||||
{{< /latex >}}
|
||||
|
||||
Because of the monotonicity of \(f\), each time we apply it, it preserves the
|
||||
less-than relationship that started with \(\bot \le b\). Doing that \(k\) times,
|
||||
we verify that \(a\) is our least fixed point.
|
||||
|
||||
To convince Agda of this proof, we once again get in an argument with the termination
|
||||
checker, which ends the same way it did last time: with us using the notion of 'gas'
|
||||
to ensure that the repeated application of \(f\) eventually ends. Since we're
|
||||
interested in verifying that `doStep` producdes the least fixed point, we formulate
|
||||
the proof in terms of `doStep` applied to various arguments.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Fixedpoint.agda" 76 84 >}}
|
||||
|
||||
As with `doStep`, this function takes as arguments the amount of gas `g` and
|
||||
a partially-built chain `c`, which gets appended to for each failed equality
|
||||
comparison. In addition, however, this function takes another arbitrary fixed
|
||||
point `b`, which is greater than the current input to `doStep` (which is
|
||||
a value \(f^i(\bot\)\) for some \(i\)). It then proves that when `doStep` terminates
|
||||
(which will be with a value in the form \(f^k(\bot)\)), this value will still
|
||||
be smaller than `b`. Since it is a proof about `doStep`, `stepPreservesLess`
|
||||
proceeds by the same case analysis as its subject, and has a very similar (albeit
|
||||
simpler) structure. In short, though, it encodes the relatively informal proof
|
||||
I gave above.
|
||||
|
||||
Just like with `doStep`, I define a helper function for `stepPreservesLess` that
|
||||
kicks off its recursive invocations.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Fixedpoint.agda" 86 87 >}}
|
||||
|
||||
Above, `aᶠ` is the output of `fix`:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Fixedpoint.agda" 69 70 >}}
|
||||
|
||||
### What is a Program?
|
||||
With the fixed point algorithm in hand, we have all the tools we need to define
|
||||
static program analyses:
|
||||
|
||||
1. We've created a collection of "lattice builders", which allow us to combine
|
||||
various lattice building blocks into more complicated structures; these
|
||||
structures are advanced enough to represent the information about our programs.
|
||||
2. We've figured out a way (our fixed point algorithm) to repeatedly apply
|
||||
an inference function to our programs and eventually produce results.
|
||||
This algorithm requires some additional properties from our latttices.
|
||||
3. We've proven that our lattice builders create lattices with these properties,
|
||||
making it possible to use them to construct functions fit for our fixed point
|
||||
algorithm.
|
||||
|
||||
All that's left is to start defining monotonic functions over lattices! Except,
|
||||
what are we analyzing? We've focused a fair bit on the theory of lattices,
|
||||
but we haven't yet defined even a tiny piece of the language that our programs
|
||||
will be analyzing. We will start with programs like this:
|
||||
|
||||
```
|
||||
x = 42
|
||||
y = 1
|
||||
if y {
|
||||
x = -1;
|
||||
} else {
|
||||
x = -2;
|
||||
}
|
||||
```
|
||||
|
||||
We will need to model these programs in Agda by describing them as trees
|
||||
([Abstract Syntax Trees](https://en.wikipedia.org/wiki/Abstract_syntax_tree), to be
|
||||
precise). We will also need to specify how to evaluate these programs (provide
|
||||
the [semantics](https://en.wikipedia.org/wiki/Semantics_(computer_science)) of
|
||||
our language). We will use big-step (also known as "natural") operational semantics
|
||||
to do so; here's an example rule:
|
||||
|
||||
{{< latex >}}
|
||||
\frac{\rho_1, e \Downarrow z \quad \neg (z = 0) \quad \rho_1,s_1 \Downarrow \rho_2}
|
||||
{\rho_1, \textbf{if}\ e\ \textbf{then}\ s_1\ \textbf{else}\ s_2\ \Downarrow\ \rho_2}
|
||||
{{< /latex >}}
|
||||
|
||||
The above reads:
|
||||
|
||||
> If the condition of an `if`-`else` statement evaluates to a nonzero value,
|
||||
> then to evaluate the statement, you evaluate its `then` branch.
|
||||
|
||||
In the next post, we'll talk more about how these rules work, and define
|
||||
the remainder of them to give our language life. See you then!
|
||||
551
content/blog/05_spa_agda_semantics/index.md
Normal file
551
content/blog/05_spa_agda_semantics/index.md
Normal file
@@ -0,0 +1,551 @@
|
||||
---
|
||||
title: "Implementing and Verifying \"Static Program Analysis\" in Agda, Part 5: Our Programming Language"
|
||||
series: "Static Program Analysis in Agda"
|
||||
description: "In this post, I define the language that well serve as the object of our vartious analyses"
|
||||
date: 2024-11-03T17:50:27-08:00
|
||||
tags: ["Agda", "Programming Languages"]
|
||||
custom_js: ["parser.js"]
|
||||
bergamot:
|
||||
render_presets:
|
||||
default: "bergamot/rendering/imp.bergamot"
|
||||
input_modes:
|
||||
- name: "Expression"
|
||||
fn: "window.parseExpr"
|
||||
- name: "Basic Statement"
|
||||
fn: "window.parseBasicStmt"
|
||||
- name: "Statement"
|
||||
fn: "window.parseStmt"
|
||||
---
|
||||
|
||||
In the previous several posts, I've formalized the notion of lattices, which
|
||||
are an essential ingredient to formalizing the analyses in Anders Møller's
|
||||
lecture notes. However, there can be no program analysis without a program
|
||||
to analyze! In this post, I will define the (very simple) language that we
|
||||
will be analyzing. An essential aspect of the language is its
|
||||
[semantics](https://en.wikipedia.org/wiki/Semantics_(computer_science)), which
|
||||
simply speaking explains what each feature of the language does. At the end
|
||||
of the previous article, I gave the following _inference rule_ which defined
|
||||
(partially) how the `if`-`else` statement in the language works.
|
||||
|
||||
{{< latex >}}
|
||||
\frac{\rho_1, e \Downarrow z \quad \neg (z = 0) \quad \rho_1,s_1 \Downarrow \rho_2}
|
||||
{\rho_1, \textbf{if}\ e\ \textbf{then}\ s_1\ \textbf{else}\ s_2\ \Downarrow\ \rho_2}
|
||||
{{< /latex >}}
|
||||
|
||||
Like I mentioned then, this rule reads as follows:
|
||||
|
||||
> If the condition of an `if`-`else` statement evaluates to a nonzero value,
|
||||
> then to evaluate the statement, you evaluate its `then` branch.
|
||||
|
||||
Another similar --- but crucially, not equivlalent -- rule is the following:
|
||||
|
||||
{{< latex >}}
|
||||
\frac{\rho_1, e \Downarrow z \quad z = 1 \quad \rho_1,s_1 \Downarrow \rho_2}
|
||||
{\rho_1, \textbf{if}\ e\ \textbf{then}\ s_1\ \textbf{else}\ s_2\ \Downarrow\ \rho_2}
|
||||
{{< /latex >}}
|
||||
|
||||
This time, the English interpretation of the rule is as follows:
|
||||
|
||||
> If the condition of an `if`-`else` statement evaluates to one,
|
||||
> then to evaluate the statement, you evaluate its `then` branch.
|
||||
|
||||
These rules are certainly not equivalent. For instance, the former allows
|
||||
the "then" branch to be executed when the condition is `2`; however, in
|
||||
the latter, the value of the conditional must be `1`. If our analysis were
|
||||
intelligent (our first few will not be), then this difference would change
|
||||
its output when determining the signs of the following program:
|
||||
|
||||
```
|
||||
x = 2
|
||||
if x {
|
||||
y = - 1
|
||||
} else {
|
||||
y = 1
|
||||
}
|
||||
```
|
||||
|
||||
Using the first, more "relaxed" rule, the condition would be considered "true",
|
||||
and the sign of `y` would be `-`. On the other hand, using the second,
|
||||
"stricter" rule, the sign of `y` would be `+`. I stress that in this case,
|
||||
I am showing a flow-sensitive analysis (one that can understand control flow
|
||||
and make more specific predictions); for our simplest analyses, we will not
|
||||
be aiming for flow-sensitivity. There is plenty of work to do even then.
|
||||
|
||||
The point of showing these two distinct rules is that we need to be very precise
|
||||
about how the language will behave, because our analyses depend on that behavior.
|
||||
|
||||
Let's not get ahead of ourselves, though. I've motivated the need for semantics,
|
||||
but there is much groundwork to be laid before we delve into the precise rules
|
||||
of our language. After all, to define the language's semantics, we need to
|
||||
have a language.
|
||||
|
||||
### The Syntax of Our Simple Language
|
||||
|
||||
I've shown a couple of examples our our language now, and there won't be that
|
||||
much more to it. We can start with _expressions_: things that evaluate to
|
||||
something. Some examples of expressions are `1`, `x`, and `2-(x+y)`. For our
|
||||
specific language, the precise set of possible expressions can be given
|
||||
by the following [Context-Free Grammar](https://en.wikipedia.org/wiki/Context-free_grammar):
|
||||
|
||||
{{< latex >}}
|
||||
\begin{array}{rcll}
|
||||
e & ::= & x & \text{(variables)} \\
|
||||
& | & z & \text{(integer literals)} \\
|
||||
& | & e + e & \text{(addition)} \\
|
||||
& | & e - e & \text{(subtraction)}
|
||||
\end{array}
|
||||
{{< /latex >}}
|
||||
|
||||
The above can be read as follows:
|
||||
|
||||
> An expression \(e\) is one of the following things:
|
||||
> 1. Some variable \(x\) [importantly \(x\) is a placeholder for _any_ variable,
|
||||
> which could be `x` or `y` in our program code; specifically, \(x\) is
|
||||
> a [_metavariable_](https://en.wikipedia.org/wiki/Metavariable).]
|
||||
> 2. Some integer \(z\) [once again, \(z\) can be any integer, like 1, -42, etc.].
|
||||
> 3. The addition of two other expressions [which could themselves be additions etc.].
|
||||
> 4. The subtraction of two other expressions [which could also themselves be additions, subtractions, etc.].
|
||||
|
||||
Since expressions can be nested within other expressions --- which is necessary
|
||||
to allow complicated code like `2-(x+y)` above --- they form a tree. Each node
|
||||
is one of the elements of the grammar above (variable, addition, etc.). If
|
||||
a node contains sub-expressions (like addition and subtraction do), then
|
||||
these sub-expressions form sub-trees of the given node. This data structure
|
||||
is called an [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree).
|
||||
|
||||
Notably, though `2-(x+y)` has parentheses, our grammar above does not include
|
||||
include them as a case. The reason for this is that the structure of an
|
||||
abstract syntax tree is sufficient to encode the order in which the operations
|
||||
should be evaluated. Since I lack a nice way of drawing ASTs, I will use
|
||||
an ASCII drawing to show an example.
|
||||
|
||||
```
|
||||
Expression: 2 - (x+y)
|
||||
(-)
|
||||
/ \
|
||||
2 (+)
|
||||
/ \
|
||||
x y
|
||||
|
||||
|
||||
Expression: (2-x) + y
|
||||
(+)
|
||||
/ \
|
||||
(-) y
|
||||
/ \
|
||||
2 x
|
||||
```
|
||||
|
||||
Above, in the first AST, `(+)` is a child of the `(-)` node, which means
|
||||
that it's a sub-expression. As a result, that subexpression is evaluated first,
|
||||
before evaluating `(-)`, and so, the AST expresents `2-(x+y)`. In the other
|
||||
example, `(-)` is a child of `(+)`, and is therefore evaluated first. The resulting
|
||||
association encoded by that AST is `(2-x)+y`.
|
||||
|
||||
To an Agda programmer, the one-of-four-things definition above should read
|
||||
quite similarly to the definition of an algebraic data type. Indeed, this
|
||||
is how we can encode the abstract syntax tree of expressions:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Base.agda" 12 16 >}}
|
||||
|
||||
The only departure from the grammar above is that I had to invent constructors
|
||||
for the variable and integer cases, since Agda doesn't support implicit coercions.
|
||||
This adds a little bit of extra overhead, requiring, for example, that we write
|
||||
numbers as `# 42` instead of `42`.
|
||||
|
||||
Having defined expressions, the next thing on the menu is _statements_. Unlike
|
||||
expressions, which just produce values, statements "do something"; an example
|
||||
of a statement might be the following Python line:
|
||||
|
||||
```Python
|
||||
print("Hello, world!")
|
||||
```
|
||||
|
||||
The `print` function doesn't produce any value, but it does perform an action;
|
||||
it prints its argument to the console!
|
||||
|
||||
For the formalization, it turns out to be convenient to separate "simple"
|
||||
statements from "complex" ones. Pragmatically speaking, the difference is that
|
||||
between the "simple" and the "complex" is control flow; simple statements
|
||||
will be guaranteed to always execute without any decisions or jumps.
|
||||
The reason for this will become clearer in subsequent posts; I will foreshadow
|
||||
a bit by saying that consecutive simple statements can be placed into a single
|
||||
[basic block](https://en.wikipedia.org/wiki/Basic_block).
|
||||
{#introduce-simple-statements}
|
||||
|
||||
The following is a group of three simple statements:
|
||||
|
||||
```
|
||||
x = 1
|
||||
y = x + 2
|
||||
noop
|
||||
```
|
||||
|
||||
These will always be executed in the same order, exactly once. Here, `noop`
|
||||
is a convenient type of statement that simply does nothing.
|
||||
|
||||
On the other hand, the following statement is not simple:
|
||||
|
||||
```
|
||||
while x {
|
||||
x = x - 1
|
||||
}
|
||||
```
|
||||
|
||||
It's not simple because it makes decisions about how the code should be executed;
|
||||
if `x` is nonzero, it will try executing the statement in the body of the loop
|
||||
(`x = x - 1`). Otherwise, it would skip evaluating that statement, and carry on
|
||||
with subsequent code.
|
||||
|
||||
I first define simple statements using the `BasicStmt` type:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Base.agda" 18 20 >}}
|
||||
|
||||
Complex statements are just called `Stmt`; they include loops, conditionals and
|
||||
sequences ---
|
||||
{{< sidenote "right" "then-note" "\(s_1\ \text{then}\ s_2\)" >}}
|
||||
The standard notation for sequencing in imperative languages is
|
||||
\(s_1; s_2\). However, Agda gives special meaning to the semicolon,
|
||||
and I couldn't find any passable symbolic alternatives.
|
||||
{{< /sidenote >}} is a sequence where \(s_2\) is evaluated after \(s_1\).
|
||||
Complex statements subsume simple statements, which I model using the constructor
|
||||
`⟨_⟩`.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Base.agda" 25 29 >}}
|
||||
|
||||
For an example of using this encoding, take the following simple program:
|
||||
|
||||
```
|
||||
var = 1
|
||||
if var {
|
||||
x = 1
|
||||
}
|
||||
```
|
||||
|
||||
The Agda version is:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Main.agda" 27 34 >}}
|
||||
|
||||
Notice how we used `noop` to express the fact that the `else` branch of the
|
||||
conditional does nothing.
|
||||
|
||||
### The Semantics of Our Language
|
||||
|
||||
We now have all the language constructs that I'll be showing off --- because
|
||||
those are all the concepts that I've formalized. What's left is to define
|
||||
how they behave. We will do this using a logical tool called
|
||||
[_inference rules_](https://en.wikipedia.org/wiki/Rule_of_inference). I've
|
||||
written about them a number of times; they're ubiquitous, particularly in the
|
||||
sorts of things I like explore on this site. The [section on inference rules]({{< relref "01_aoc_coq#inference-rules" >}})
|
||||
from my Advent of Code series is pretty relevant, and [the notation section from
|
||||
a post in my compiler series]({{< relref "03_compiler_typechecking#some-notation" >}}) says
|
||||
much the same thing; I won't be re-describing them here.
|
||||
|
||||
There are three pieces which demand semantics: expressions, simple statements,
|
||||
and non-simple statements. The semantics of each of the three requires the
|
||||
semantics of the items that precede it. We will therefore start with expressions.
|
||||
|
||||
#### Expressions
|
||||
|
||||
The trickiest thing about expression is that the value of an expression depends
|
||||
on the "context": `x+1` can evaluate to `43` if `x` is `42`, or it can evaluate
|
||||
to `0` if `x` is `-1`. To evaluate an expression, we will therefore need to
|
||||
assign values to all of the variables in that expression. A mapping that
|
||||
assigns values to variables is typically called an _environment_. We will write
|
||||
\(\varnothing\) for "empty environment", and \(\{\texttt{x} \mapsto 42, \texttt{y} \mapsto -1 \}\) for
|
||||
an environment that maps the variable \(\texttt{x}\) to 42, and the variable \(\texttt{y}\) to -1.
|
||||
|
||||
Now, a bit of notation. We will use the letter \(\rho\) to represent environments
|
||||
(and if several environments are involved, we will occasionally number them
|
||||
as \(\rho_1\), \(\rho_2\), etc.) We will use the letter \(e\) to stand for
|
||||
expressions, and the letter \(v\) to stand for values. Finally, we'll write
|
||||
\(\rho, e \Downarrow v\) to say that "in an environment \(\rho\), expression \(e\)
|
||||
evaluates to value \(v\)". Our two previous examples of evaluating `x+1` can
|
||||
thus be written as follows:
|
||||
{#notation-for-environments}
|
||||
|
||||
{{< latex >}}
|
||||
\{ \texttt{x} \mapsto 42 \}, \texttt{x}+1 \Downarrow 43 \\
|
||||
\{ \texttt{x} \mapsto -1 \}, \texttt{x}+1 \Downarrow 0 \\
|
||||
{{< /latex >}}
|
||||
|
||||
Now, on to the actual rules for how to evaluate expressions. Most simply,
|
||||
integer literals like `1` just evaluate to themselves.
|
||||
|
||||
{{< latex >}}
|
||||
\frac{n \in \text{Int}}{\rho, n \Downarrow n}
|
||||
{{< /latex >}}
|
||||
|
||||
Note that the letter \(\rho\) is completely unused in the above rule. That's
|
||||
because no matter what values _variables_ have, a number still evaluates to
|
||||
the same value. As we've already established, the same is not true for a
|
||||
variable like \(\texttt{x}\). To evaluate such a variable, we need to retrieve
|
||||
the value it's mapped to in the current environment, which we will write as
|
||||
\(\rho(\texttt{x})\). This gives the following inference rule:
|
||||
|
||||
{{< latex >}}
|
||||
\frac{\rho(x) = v}{\rho, x \Downarrow v}
|
||||
{{< /latex >}}
|
||||
|
||||
All that's left is to define addition and subtraction. For an expression in the
|
||||
form \(e_1+e_2\), we first need to evaluate the two subexpressions \(e_1\)
|
||||
and \(e_2\), and then add the two resulting numbers. As a result, the addition
|
||||
rule includes two additional premises, one for evaluating each summand.
|
||||
|
||||
{{< latex >}}
|
||||
\frac
|
||||
{\rho, e_1 \Downarrow v_1 \quad \rho, e_2 \Downarrow v_2 \quad v_1 + v_2 = v}
|
||||
{\rho, e_1+e_2 \Downarrow v}
|
||||
{{< /latex >}}
|
||||
|
||||
The subtraction rule is similar. Below, I've configured an instance of
|
||||
[Bergamot]({{< relref "bergamot" >}}) to interpret these exact rules. Try
|
||||
typing various expressions like `1`, `1+1`, etc. into the input box below
|
||||
to see them evaluate. If you click the "Full Proof Tree" button, you can also view
|
||||
the exact rules that were used in computing a particular value. The variables
|
||||
`x`, `y`, and `z` are pre-defined for your convenience.
|
||||
|
||||
{{< bergamot_widget id="expr-widget" query="" prompt="eval(extend(extend(extend(empty, x, 17), y, 42), z, 0), TERM, ?v)" modes="Expression:Expression" >}}
|
||||
hidden section "" {
|
||||
Eq @ eq(?x, ?x) <-;
|
||||
}
|
||||
section "" {
|
||||
EvalNum @ eval(?rho, lit(?n), ?n) <- int(?n);
|
||||
EvalVar @ eval(?rho, var(?x), ?v) <- inenv(?x, ?v, ?rho);
|
||||
}
|
||||
section "" {
|
||||
EvalPlus @ eval(?rho, plus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), add(?v_1, ?v_2, ?v);
|
||||
EvalMinus @ eval(?rho, minus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), subtract(?v_1, ?v_2, ?v);
|
||||
}
|
||||
hidden section "" {
|
||||
EnvTake @ inenv(?x, ?v, extend(?rho, ?x, ?v)) <-;
|
||||
EnvSkip @ inenv(?x, ?v_1, extend(?rho, ?y, ?v_2)) <- inenv(?x, ?v_1, ?rho), not(eq(?x, ?y));
|
||||
}
|
||||
{{< /bergamot_widget >}}
|
||||
|
||||
The Agda equivalent of this looks very similar to the rules themselves. I use
|
||||
`⇒ᵉ` instead of \(\Downarrow\), and there's a little bit of tedium with
|
||||
wrapping integers into a new `Value` type. I also used a (partial) relation
|
||||
`(x, v) ∈ ρ` instead of explicitly defining accessing an environment, since
|
||||
it is conceivable for a user to attempt accessing a variable that has not
|
||||
been assigned to. Aside from these notational changes, the structure of each
|
||||
of the constructors of the evaluation data type matches the inference rules
|
||||
I showed above.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Semantics.agda" 27 35 >}}
|
||||
|
||||
#### Simple Statements
|
||||
The main difference between formalizing (simple and "normal") statements is
|
||||
that they modify the environment. If `x` has one value, writing `x = x + 1` will
|
||||
certainly change that value. On the other hand, statements don't produce values.
|
||||
So, we will be writing claims like \(\rho_1 , \textit{bs} \Rightarrow \rho_2\)
|
||||
to say that the basic statement \(\textit{bs}\), when starting in environment
|
||||
\(\rho_1\), will produce environment \(\rho_2\). Here's an example:
|
||||
|
||||
{{< latex >}}
|
||||
\{ \texttt{x} \mapsto 42, \texttt{y} \mapsto 17 \}, \ \texttt{x = x - \text{1}} \Rightarrow \{ \texttt{x} \mapsto 41, \texttt{y} \mapsto 17 \}
|
||||
{{< /latex >}}
|
||||
|
||||
Here, we subtracted one from a variable with value `42`, leaving it with a new
|
||||
value of `41`.
|
||||
|
||||
There are two basic statements, and one of them quite literally does nothing.
|
||||
The inference rule for `noop` is very simple:
|
||||
|
||||
{{< latex >}}
|
||||
\rho,\ \texttt{noop} \Rightarrow \rho
|
||||
{{< /latex >}}
|
||||
|
||||
For the assignment rule, we need to know how to evaluate the expression on the
|
||||
right side of the equal sign. This is why we needed to define the semantics
|
||||
of expressions first. Given those, the evaluation rule for assignment is as
|
||||
follows, with \(\rho[x \mapsto v]\) meaning "the environment \(\rho\) but
|
||||
mapping the variable \(x\) to value \(v\)".
|
||||
|
||||
{{< latex >}}
|
||||
\frac
|
||||
{\rho, e \Downarrow v}
|
||||
{\rho, x = e \Rightarrow \rho[x \mapsto v]}
|
||||
{{< /latex >}}
|
||||
|
||||
Those are actually all the rules we need, and below, I am once again configuring
|
||||
a Bergamot instance, this time with simple statements. Try out `noop` or some
|
||||
sort of variable assignment, like `x = x + 1`.
|
||||
|
||||
{{< bergamot_widget id="basic-stmt-widget" query="" prompt="stepbasic(extend(extend(extend(empty, x, 17), y, 42), z, 0), TERM, ?env)" modes="Basic Statement:Basic Statement" >}}
|
||||
hidden section "" {
|
||||
Eq @ eq(?x, ?x) <-;
|
||||
}
|
||||
hidden section "" {
|
||||
EvalNum @ eval(?rho, lit(?n), ?n) <- int(?n);
|
||||
EvalVar @ eval(?rho, var(?x), ?v) <- inenv(?x, ?v, ?rho);
|
||||
}
|
||||
hidden section "" {
|
||||
EvalPlus @ eval(?rho, plus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), add(?v_1, ?v_2, ?v);
|
||||
EvalMinus @ eval(?rho, minus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), subtract(?v_1, ?v_2, ?v);
|
||||
}
|
||||
section "" {
|
||||
StepNoop @ stepbasic(?rho, noop, ?rho) <-;
|
||||
StepAssign @ stepbasic(?rho, assign(?x, ?e), extend(?rho, ?x, ?v)) <- eval(?rho, ?e, ?v);
|
||||
}
|
||||
hidden section "" {
|
||||
EnvTake @ inenv(?x, ?v, extend(?rho, ?x, ?v)) <-;
|
||||
EnvSkip @ inenv(?x, ?v_1, extend(?rho, ?y, ?v_2)) <- inenv(?x, ?v_1, ?rho), not(eq(?x, ?y));
|
||||
}
|
||||
{{< /bergamot_widget >}}
|
||||
|
||||
The Agda implementation is once again just a data type with constructors-for-rules.
|
||||
This time they also look quite similar to the rules I've shown up until now,
|
||||
though I continue to explicitly quantify over variables like `ρ`.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Semantics.agda" 37 40 >}}
|
||||
|
||||
#### Statements
|
||||
|
||||
Let's work on non-simple statements next. The easiest rule to define is probably
|
||||
sequencing. When we use `then` (or `;`) to combine two statements, what we
|
||||
actually want is to execute the first statement, which may change variables,
|
||||
and then execute the second statement while keeping the changes from the first.
|
||||
This means there are three environments: \(\rho_1\) for the initial state before
|
||||
either statement is executed, \(\rho_2\) for the state between executing the
|
||||
first and second statement, and \(\rho_3\) for the final state after both
|
||||
are done executing. This leads to the following rule:
|
||||
|
||||
{{< latex >}}
|
||||
\frac
|
||||
{ \rho_1, s_1 \Rightarrow \rho_2 \quad \rho_2, s_2 \Rightarrow \rho_3 }
|
||||
{ \rho_1, s_1; s_2 \Rightarrow \rho_3 }
|
||||
{{< /latex >}}
|
||||
|
||||
We will actually need two rules to evaluate the conditional statement: one
|
||||
for when the condition evaluates to "true", and one for when the condition
|
||||
evaluates to "false". Only, I never specified booleans as being part of
|
||||
the language, which means that we will need to come up what "false" and "true"
|
||||
are. I will take my cue from C++ and use zero as "false", and any other number
|
||||
as "true".
|
||||
|
||||
If the condition of an `if`-`else` statement is "true" (nonzero), then the
|
||||
effect of executing the `if`-`else` should be the same as executing the "then"
|
||||
part of the statement, while completely ignoring the "else" part.
|
||||
|
||||
{{< latex >}}
|
||||
\frac
|
||||
{ \rho_1 , e \Downarrow v \quad v \neq 0 \quad \rho_1, s_1 \Rightarrow \rho_2}
|
||||
{ \rho_1, \textbf{if}\ e\ \{ s_1 \}\ \textbf{else}\ \{ s_2 \}\ \Rightarrow \rho_2 }
|
||||
{{< /latex >}}
|
||||
|
||||
Notice that in the above rule, we used the evaluation judgement \(\rho_1, e \Downarrow v\)
|
||||
to evaluate the _expression_ that serves as the condition. We then had an
|
||||
additional premise that requires the truthiness of the resulting value \(v\).
|
||||
The rule for evaluating a conditional with a "false" branch is very similar.
|
||||
|
||||
{{< latex >}}
|
||||
\frac
|
||||
{ \rho_1 , e \Downarrow v \quad v = 0 \quad \rho_1, s_2 \Rightarrow \rho_2}
|
||||
{ \rho_1, \textbf{if}\ e\ \{ s_1 \}\ \textbf{else}\ \{ s_2 \}\ \Rightarrow \rho_2 }
|
||||
{{< /latex >}}
|
||||
|
||||
Now that we have rules for conditional statements, it will be surprisingly easy
|
||||
to define the rules for `while` loops. A `while` loop will also have two rules,
|
||||
one for when its condition is truthy and one for when it's falsey. However,
|
||||
unlike the "false" case, a while loop will do nothing, leaving the environment
|
||||
unchanged:
|
||||
|
||||
{{< latex >}}
|
||||
\frac
|
||||
{ \rho_1 , e \Downarrow v \quad v = 0 }
|
||||
{ \rho_1 , \textbf{while}\ e\ \{ s \}\ \Rightarrow \rho_1 }
|
||||
{{< /latex >}}
|
||||
|
||||
The trickiest rule is for when the condition of a `while` loop is true.
|
||||
We evaluate the body once, starting in environment \(\rho_1\) and finishing
|
||||
in \(\rho_2\), but then we're not done. In fact, we have to go back to the top,
|
||||
and check the condition again, starting over. As a result, we include another
|
||||
premise, that tells us that evaluating the loop starting at \(\rho_2\), we
|
||||
eventually end in state \(\rho_3\). This encodes the "rest of the iterations"
|
||||
in addition to the one we just performed. The environment \(\rho_3\) is our
|
||||
final state, so that's what we use in the rule's conclusion.
|
||||
|
||||
{{< latex >}}
|
||||
\frac
|
||||
{ \rho_1 , e \Downarrow v \quad v \neq 0 \quad \rho_1 , s \Rightarrow \rho_2 \quad \rho_2 , \textbf{while}\ e\ \{ s \}\ \Rightarrow \rho_3 }
|
||||
{ \rho_1 , \textbf{while}\ e\ \{ s \}\ \Rightarrow \rho_3 }
|
||||
{{< /latex >}}
|
||||
|
||||
And that's it! We have now seen every rule that defines the little object language
|
||||
I've been using for my Agda work. Below is a Bergamot widget that implements
|
||||
these rules. Try the following program, which computes the `x`th power of two,
|
||||
and stores it in `y`:
|
||||
|
||||
```
|
||||
x = 5; y = 1; while (x) { y = y + y; x = x - 1 }
|
||||
```
|
||||
|
||||
{{< bergamot_widget id="stmt-widget" query="" prompt="step(extend(extend(extend(empty, x, 17), y, 42), z, 0), TERM, ?env)" modes="Statement:Statement" >}}
|
||||
hidden section "" {
|
||||
Eq @ eq(?x, ?x) <-;
|
||||
}
|
||||
hidden section "" {
|
||||
EvalNum @ eval(?rho, lit(?n), ?n) <- int(?n);
|
||||
EvalVar @ eval(?rho, var(?x), ?v) <- inenv(?x, ?v, ?rho);
|
||||
}
|
||||
hidden section "" {
|
||||
EvalPlus @ eval(?rho, plus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), add(?v_1, ?v_2, ?v);
|
||||
EvalMinus @ eval(?rho, minus(?e_1, ?e_2), ?v) <- eval(?rho, ?e_1, ?v_1), eval(?rho, ?e_2, ?v_2), subtract(?v_1, ?v_2, ?v);
|
||||
}
|
||||
hidden section "" {
|
||||
StepNoop @ stepbasic(?rho, noop, ?rho) <-;
|
||||
StepAssign @ stepbasic(?rho, assign(?x, ?e), extend(?rho, ?x, ?v)) <- eval(?rho, ?e, ?v);
|
||||
}
|
||||
hidden section "" {
|
||||
StepNoop @ stepbasic(?rho, noop, ?rho) <-;
|
||||
StepAssign @ stepbasic(?rho, assign(?x, ?e), extend(?rho, ?x, ?v)) <- eval(?rho, ?e, ?v);
|
||||
}
|
||||
hidden section "" {
|
||||
StepLiftBasic @ step(?rho_1, ?s, ?rho_2) <- stepbasic(?rho_1, ?s, ?rho_2);
|
||||
}
|
||||
section "" {
|
||||
StepIfTrue @ step(?rho_1, if(?e, ?s_1, ?s_2), ?rho_2) <- eval(?rho_1, ?e, ?v), not(eq(?v, 0)), step(?rho_1, ?s_1, ?rho_2);
|
||||
StepIfFalse @ step(?rho_1, if(?e, ?s_1, ?s_2), ?rho_2) <- eval(?rho_1, ?e, ?v), eq(?v, 0), step(?rho_1, ?s_2, ?rho_2);
|
||||
StepWhileTrue @ step(?rho_1, while(?e, ?s), ?rho_3) <- eval(?rho_1, ?e, ?v), not(eq(?v, 0)), step(?rho_1, ?s, ?rho_2), step(?rho_2, while(?e, ?s), ?rho_3);
|
||||
StepWhileFalse @ step(?rho_1, while(?e, ?s), ?rho_1) <- eval(?rho_1, ?e, ?v), eq(?v, 0);
|
||||
StepSeq @ step(?rho_1, seq(?s_1, ?s_2), ?rho_3) <- step(?rho_1, ?s_1, ?rho_2), step(?rho_2, ?s_2, ?rho_3);
|
||||
}
|
||||
hidden section "" {
|
||||
EnvTake @ inenv(?x, ?v, extend(?rho, ?x, ?v)) <-;
|
||||
EnvSkip @ inenv(?x, ?v_1, extend(?rho, ?y, ?v_2)) <- inenv(?x, ?v_1, ?rho), not(eq(?x, ?y));
|
||||
}
|
||||
{{< /bergamot_widget >}}
|
||||
|
||||
As with all the other rules we've seen, the mathematical notation above can
|
||||
be directly translated into Agda:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Semantics.agda" 47 64 >}}
|
||||
|
||||
### Semantics as Ground Truth
|
||||
|
||||
Prior to this post, we had been talking about using lattices and monotone
|
||||
functions for program analysis. The key problem with using this framework to
|
||||
define analyses is that there are many monotone functions that produce complete
|
||||
nonsese; their output is, at best, unrelated to the program they're supposed
|
||||
to analyze. We don't want to write such functions, since having incorrect
|
||||
information about the programs in question is unhelpful.
|
||||
|
||||
What does it mean for a function to produce correct information, though?
|
||||
In the context of sign analysis, it would mean that if we say a variable `x` is `+`,
|
||||
then evaluating the program will leave us in a state in which `x` is posive.
|
||||
The semantics we defined in this post give us the "evaluating the program piece".
|
||||
They establish what the programs _actually_ do, and we can use this ground
|
||||
truth when checking that our analyses are correct. In subsequent posts, I will
|
||||
prove the exact property I informally stated above: __for the program analyses
|
||||
we define, things they "claim" about our program will match what actually happens
|
||||
when executing the program using our semantics__.
|
||||
|
||||
A piece of the puzzle still remains: how are we going to use the monotone
|
||||
functions we've been talking so much about? We need to figure out what to feed
|
||||
to our analyses before we can prove their correctness.
|
||||
|
||||
I have an answer to that question: we will be using _control flow graphs_ (CFGs).
|
||||
These are another program representation, one that's more commonly found in
|
||||
compilers. I will show what they look like in the next post. I hope to see you
|
||||
there!
|
||||
128
content/blog/05_spa_agda_semantics/parser.js
Normal file
128
content/blog/05_spa_agda_semantics/parser.js
Normal file
@@ -0,0 +1,128 @@
|
||||
const match = str => input => {
|
||||
if (input.startsWith(str)) {
|
||||
return [[str, input.slice(str.length)]]
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
const map = (f, m) => input => {
|
||||
return m(input).map(([v, rest]) => [f(v), rest]);
|
||||
};
|
||||
|
||||
const apply = (m1, m2) => input => {
|
||||
return m1(input).flatMap(([f, rest]) => m2(rest).map(([v, rest]) => [f(v), rest]));
|
||||
};
|
||||
|
||||
const pure = v => input => [[v, input]];
|
||||
|
||||
const liftA = (f, ...ms) => input => {
|
||||
if (ms.length <= 0) return []
|
||||
|
||||
let results = map(v => [v], ms[0])(input);
|
||||
for (let i = 1; i < ms.length; i++) {
|
||||
results = results.flatMap(([vals, rest]) =>
|
||||
ms[i](rest).map(([val, rest]) => [[...vals, val], rest])
|
||||
);
|
||||
}
|
||||
return results.map(([vals, rest]) => [f(...vals), rest]);
|
||||
};
|
||||
|
||||
const many1 = (m) => liftA((x, xs) => [x].concat(xs), m, oneOf([
|
||||
lazy(() => many1(m)),
|
||||
pure([])
|
||||
]));
|
||||
|
||||
const many = (m) => oneOf([ pure([]), many1(m) ]);
|
||||
|
||||
const oneOf = ms => input => {
|
||||
return ms.flatMap(m => m(input));
|
||||
};
|
||||
|
||||
const takeWhileRegex0 = regex => input => {
|
||||
let idx = 0;
|
||||
while (idx < input.length && regex.test(input[idx])) {
|
||||
idx++;
|
||||
}
|
||||
return [[input.slice(0, idx), input.slice(idx)]];
|
||||
};
|
||||
|
||||
const takeWhileRegex = regex => input => {
|
||||
const result = takeWhileRegex0(regex)(input);
|
||||
if (result[0][0].length > 0) return result;
|
||||
return [];
|
||||
};
|
||||
|
||||
const spaces = takeWhileRegex0(/\s/);
|
||||
|
||||
const digits = takeWhileRegex(/\d/);
|
||||
|
||||
const alphas = takeWhileRegex(/[a-zA-Z]/);
|
||||
|
||||
const left = (m1, m2) => liftA((a, _) => a, m1, m2);
|
||||
|
||||
const right = (m1, m2) => liftA((_, b) => b, m1, m2);
|
||||
|
||||
const word = s => left(match(s), spaces);
|
||||
|
||||
const end = s => s.length == 0 ? [['', '']] : [];
|
||||
|
||||
const lazy = deferred => input => deferred()(input);
|
||||
|
||||
const ident = left(alphas, spaces);
|
||||
|
||||
const number = oneOf([
|
||||
liftA((a, b) => a + b, word("-"), left(digits, spaces)),
|
||||
left(digits, spaces),
|
||||
]);
|
||||
|
||||
const basicExpr = oneOf([
|
||||
map(n => `lit(${n})`, number),
|
||||
map(x => `var(${x})`, ident),
|
||||
liftA((lp, v, rp) => v, word("("), lazy(() => expr), word(")")),
|
||||
]);
|
||||
|
||||
const opExpr = oneOf([
|
||||
liftA((_a, _b, e) => ["plus", e], word("+"), spaces, lazy(() => expr)),
|
||||
liftA((_a, _b, e) => ["minus", e], word("-"), spaces, lazy(() => expr)),
|
||||
]);
|
||||
|
||||
const flatten = (e, es) => {
|
||||
return es.reduce((e1, [op, e2]) => `${op}(${e1}, ${e2})`, e);
|
||||
}
|
||||
|
||||
const expr = oneOf([
|
||||
basicExpr,
|
||||
liftA(flatten, basicExpr, many(opExpr)),
|
||||
]);
|
||||
|
||||
const basicStmt = oneOf([
|
||||
liftA((x, _, e) => `assign(${x}, ${e})`, ident, word("="), expr),
|
||||
word("noop"),
|
||||
]);
|
||||
|
||||
const stmt = oneOf([
|
||||
basicStmt,
|
||||
liftA((_if, _lp_, cond, _rp, _lbr1_, s1, _rbr1, _else, _lbr2, s2, _rbr2) => `if(${cond}, ${s1}, ${s2})`,
|
||||
word("if"), word("("), expr, word(")"),
|
||||
word("{"), lazy(() => stmtSeq), word("}"),
|
||||
word("else"), word("{"), lazy(() => stmtSeq), word("}")),
|
||||
liftA((_while, _lp_, cond, _rp, _lbr_, s1, _rbr) => `while(${cond}, ${s1})`,
|
||||
word("while"), word("("), expr, word(")"),
|
||||
word("{"), lazy(() => stmtSeq), word("}")),
|
||||
]);
|
||||
|
||||
const stmtSeq = oneOf([
|
||||
liftA((s1, _semi, rest) => `seq(${s1}, ${rest})`, stmt, word(";"), lazy(() => stmtSeq)),
|
||||
stmt,
|
||||
]);
|
||||
|
||||
const parseWhole = m => string => {
|
||||
const result = left(m, end)(string);
|
||||
console.log(result);
|
||||
if (result.length > 0) return result[0][0];
|
||||
return null;
|
||||
}
|
||||
|
||||
window.parseExpr = parseWhole(expr);
|
||||
window.parseBasicStmt = parseWhole(basicStmt);
|
||||
window.parseStmt = parseWhole(stmtSeq);
|
||||
15
content/blog/06_spa_agda_cfg/if-cfg.dot
Normal file
15
content/blog/06_spa_agda_cfg/if-cfg.dot
Normal file
@@ -0,0 +1,15 @@
|
||||
digraph G {
|
||||
graph[dpi=300 fontsize=14 fontname="Courier New"];
|
||||
node[shape=rectangle style="filled" fillcolor="#fafafa" penwidth=0.5 color="#aaaaaa"];
|
||||
edge[arrowsize=0.3 color="#444444"]
|
||||
|
||||
node_begin [label="x = ...;\lx\l"]
|
||||
node_then [label="x = 1\l"]
|
||||
node_else [label="x = 0\l"]
|
||||
node_end [label="y = x\l"]
|
||||
|
||||
node_begin -> node_then
|
||||
node_begin -> node_else
|
||||
node_then -> node_end
|
||||
node_else -> node_end
|
||||
}
|
||||
BIN
content/blog/06_spa_agda_cfg/if-cfg.png
Normal file
BIN
content/blog/06_spa_agda_cfg/if-cfg.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 21 KiB |
377
content/blog/06_spa_agda_cfg/index.md
Normal file
377
content/blog/06_spa_agda_cfg/index.md
Normal file
@@ -0,0 +1,377 @@
|
||||
---
|
||||
title: "Implementing and Verifying \"Static Program Analysis\" in Agda, Part 6: Control Flow Graphs"
|
||||
series: "Static Program Analysis in Agda"
|
||||
description: "In this post, I show an Agda definition of Control Flow Graphs and their construction"
|
||||
date: 2024-11-27T16:26:42-07:00
|
||||
tags: ["Agda", "Programming Languages"]
|
||||
---
|
||||
|
||||
In the previous section, I've given a formal definition of the programming
|
||||
language that I've been trying to analyze. This formal definition serves
|
||||
as the "ground truth" for how our little imperative programs are executed;
|
||||
however, program analyses (especially in practice) seldom take the formal
|
||||
semantics as input. Instead, they focus on more pragmatic program
|
||||
representations from the world of compilers. One such representation are
|
||||
_Control Flow Graphs (CFGs)_. That's what I want to discuss in this post.
|
||||
|
||||
Let's start by building some informal intuition. CFGs are pretty much what
|
||||
their name suggests: they are a type of [graph](https://en.wikipedia.org/wiki/Graph_(discrete_mathematics));
|
||||
their edges show how execution might jump from one piece of code to
|
||||
another (how control might flow).
|
||||
|
||||
For example, take the below program.
|
||||
|
||||
|
||||
```
|
||||
x = ...;
|
||||
if x {
|
||||
x = 1;
|
||||
} else {
|
||||
x = 0;
|
||||
}
|
||||
y = x;
|
||||
```
|
||||
|
||||
The CFG might look like this:
|
||||
|
||||
{{< figure src="if-cfg.png" label="CFG for simple `if`-`else` code." class="small" >}}
|
||||
|
||||
Here, the initialization of `x` with `...`, as well as the `if` condition (just `x`),
|
||||
are guaranteed to execute one after another, so they occupy a single node. From there,
|
||||
depending on the condition, the control flow can jump to one of the
|
||||
branches of the `if` statement: the "then" branch if the condition is truthy,
|
||||
and the "else" branch if the condition is falsy. As a result, there are two
|
||||
arrows coming out of the initial node. Once either branch is executed, control
|
||||
always jumps to the code right after the `if` statement (the `y = x`). Thus,
|
||||
both the `x = 1` and `x = 0` nodes have a single arrow to the `y = x` node.
|
||||
|
||||
As another example, if you had a loop:
|
||||
|
||||
```
|
||||
x = ...;
|
||||
while x {
|
||||
x = x - 1;
|
||||
}
|
||||
y = x;
|
||||
```
|
||||
|
||||
The CFG would look like this:
|
||||
|
||||
{{< figure src="while-cfg.png" label="CFG for simple `while` code." class="small" >}}
|
||||
|
||||
Here, the condition of the loop (`x`) is not always guaranteed to execute together
|
||||
with the code that initializes `x`. That's because the condition of the loop
|
||||
is checked after every iteration, whereas the code before the loop is executed
|
||||
only once. As a result, `x = ...` and `x` occupy distinct CFG nodes. From there,
|
||||
the control flow can proceed in two different ways, depending on the value
|
||||
of `x`. If `x` is truthy, the program will proceed to the loop body (decrementing `x`).
|
||||
If `x` is falsy, the program will skip the loop body altogether, and go to the
|
||||
code right after the loop (`y = x`). This is indicated by the two arrows
|
||||
going out of the `x` node. After executing the body, we return to the condition
|
||||
of the loop to see if we need to run another iteration. Because of this, the
|
||||
decrementing node has an arrow back to the loop condition.
|
||||
|
||||
Now, let's be a bit more precise. Control Flow Graphs are defined as follows:
|
||||
|
||||
* __The nodes__ are [_basic blocks_](https://en.wikipedia.org/wiki/Basic_block).
|
||||
Paraphrasing Wikipedia's definition, a basic block is a piece of code that
|
||||
has only one entry point and one exit point.
|
||||
|
||||
The one-entry-point rule means that it's not possible to jump into the middle
|
||||
of the basic block, executing only half of its instructions. The execution of
|
||||
a basic block always begins at the top. Symmetrically, the one-exit-point
|
||||
rule means that you can't jump away to other code, skipping some instructions.
|
||||
The execution of a basic block always ends at the bottom.
|
||||
|
||||
As a result of these constraints, when running a basic block, you are
|
||||
guaranteed to execute every instruction in exactly the order they occur in,
|
||||
and execute each instruction exactly once.
|
||||
* __The edges__ are jumps between basic blocks. We've already seen how
|
||||
`if` and `while` statements introduce these jumps.
|
||||
|
||||
Basic blocks can only be made of code that doesn't jump (otherwise,
|
||||
we violate the single-exit-point policy). In the previous post,
|
||||
we defined exactly this kind of code as [simple statements]({{< relref "05_spa_agda_semantics#introduce-simple-statements" >}}).
|
||||
So, in our control flow graph, nodes will be sequences of simple statements.
|
||||
{#list-basic-stmts}
|
||||
|
||||
### Control Flow Graphs in Agda
|
||||
|
||||
#### Basic Definition
|
||||
At an abstract level, it's easy to say "it's just a graph where X is Y" about
|
||||
anything. It's much harder to give a precise definition of such a graph,
|
||||
particularly if you want to rule out invalid graphs (e.g., ones with edges
|
||||
pointing nowhere). In Agda, I chose the represent a CFG with two lists: one of nodes,
|
||||
and one of edges. Each node is simply a list of `BasicStmt`s, as
|
||||
I described in a preceding paragraph. An edge is simply a pair of numbers,
|
||||
each number encoding the index of the node connected by the edge.
|
||||
|
||||
Here's where it gets a little complicated. I don't want to use plain natural
|
||||
numbers for indices, because that means you can easily introduce "broken" edge.
|
||||
For example, what if you have 4 nodes, and you have an edge `(5, 5)`? To avoid
|
||||
this, I picked the finite natural numbers represented by
|
||||
[`Fin`](https://agda.github.io/agda-stdlib/v2.0/Data.Fin.Base.html#1154)
|
||||
as endpoints for edges.
|
||||
|
||||
```Agda
|
||||
data Fin : ℕ → Set where
|
||||
zero : Fin (suc n)
|
||||
suc : (i : Fin n) → Fin (suc n)
|
||||
```
|
||||
|
||||
Specifically, `Fin n` is the type of natural numbers less than `n`. Following
|
||||
this definition, `Fin 3` represents the numbers `0`, `1` and `2`. These are
|
||||
represented using the same constructors as `Nat`: `zero` and `suc`. The type
|
||||
of `zero` is `Fin (suc n)` for any `n`; this makes sense because zero is less
|
||||
than any number plus one. For `suc`, the bound `n` of the input `i` is incremented
|
||||
by one, leading to another `suc n` in the final type. This makes sense because if
|
||||
`i < n`, then `i + 1 < n + 1`. I've previously explained this data type
|
||||
[in another post on this site]({{< relref "01_aoc_coq#aside-vectors-and-finite-mathbbn" >}}).
|
||||
|
||||
Here's my definition of `Graph`s written using `Fin`:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 24 39 >}}
|
||||
|
||||
I explicitly used a `size` field, which determines how many nodes are in the
|
||||
graph, and serves as the upper bound for the edge indices. From there, an
|
||||
index `Index` into the node list is
|
||||
{{< sidenote "right" "size-note" "just a natural number less than `size`," >}}
|
||||
Ther are <code>size</code> natural numbers less than <code>size</code>:<br>
|
||||
<code>0, 1, ..., size - 1</code>.
|
||||
{{< /sidenote >}}
|
||||
and an edge is just a pair of indices. The graph then contains a vector
|
||||
(exact-length list) `nodes` of all the basic blocks, and then a list of
|
||||
edges `edges`.
|
||||
|
||||
There are two fields here that I have not yet said anything about: `inputs`
|
||||
and `outputs`. When we have a complete CFG for our programs, these fields are
|
||||
totally unnecessary. However, as we are _building_ the CFG, these will come
|
||||
in handy, by telling us how to stitch together smaller sub-graphs that we've
|
||||
already built. Let's talk about that next.
|
||||
|
||||
#### Combining Graphs
|
||||
Suppose you're building a CFG for a program in the following form:
|
||||
|
||||
```
|
||||
code1;
|
||||
code2;
|
||||
```
|
||||
|
||||
Where `code1` and `code2` are arbitrary pieces of code, which could include
|
||||
statements, loops, and pretty much anything else. Besides the fact that they
|
||||
occur one after another, these pieces of code are unrelated, and we can
|
||||
build CFGs for each one them independently. However, the fact that `code1` and
|
||||
`code2` are in sequence means that the full control flow graph for the above
|
||||
program should have edges going from the nodes in `code1` to the nodes in `code2`.
|
||||
Of course, not _every_ node in `code1` should have such edges: that would
|
||||
mean that after executing any "basic" sequence of instructions, you could suddenly
|
||||
decide to skip the rest of `code1` and move on to executing `code2`.
|
||||
|
||||
Thus, we need to be more precise about what edges we need to insert; we want
|
||||
to insert edges between the "final" nodes in `code1` (where control ends up
|
||||
after `code1` is finished executing) and the "initial" nodes in `code2` (where
|
||||
control would begin once we started executing `code2`). Those are the `outputs`
|
||||
and `inputs`, respectively. When stitching together sequenced control graphs,
|
||||
we will connect each of the outputs of one to each of the inputs of the other.
|
||||
|
||||
This is defined by the operation `g₁ ↦ g₂`, which sequences two graphs `g₁` and `g₂`:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 72 83 >}}
|
||||
|
||||
The definition starts out pretty innocuous, but gets a bit complicated by the
|
||||
end. The sum of the numbers of nodes in the two operands becomes the new graph
|
||||
size, and the nodes from the two graphs are all included in the result. Then,
|
||||
the definitions start making use of various operators like `↑ˡᵉ` and `↑ʳᵉ`;
|
||||
these deserve an explanation.
|
||||
|
||||
The tricky thing is that when we're concatenating lists of nodes, we are changing
|
||||
some of the indices of the elements within. For instance, in the lists
|
||||
`[x]` and `[y]`, the indices of both `x` and `y` are `0`; however, in the
|
||||
concatenated list `[x, y]`, the index of `x` is still `0`, but the index of `y`
|
||||
is `1`. More generally, when we concatenate two lists `l1` and `l2`, the indices
|
||||
into `l1` remain unchanged, whereas the indices `l2` are shifted by `length l1`.
|
||||
{#fin-reindexing}
|
||||
|
||||
Actually, that's not all there is to it. The _values_ of the indices into
|
||||
the left list don't change, but their types do! They start as `Fin (length l1)`,
|
||||
but for the whole list, these same indices will have type `Fin (length l1 + length l2))`.
|
||||
|
||||
To help deal with this, Agda provides the operators
|
||||
[`↑ˡ`](https://agda.github.io/agda-stdlib/v2.0/Data.Fin.Base.html#2355)
|
||||
and [`↑ʳ`](https://agda.github.io/agda-stdlib/v2.0/Data.Fin.Base.html#2522)
|
||||
that implement this re-indexing and re-typing. The former implements "re-indexing
|
||||
on the left" -- given an index into the left list `l1`, it changes its type
|
||||
by adding the other list's length to it, but keeps the index value itself
|
||||
unchanged. The latter implements "re-indexing on the right" -- given an index
|
||||
into the right list `l2`, it adds the length of the first list to it (shifting it),
|
||||
and does the same to its type.
|
||||
|
||||
The definition leads to the following equations:
|
||||
|
||||
```Agda
|
||||
l1 : Vec A n
|
||||
l2 : Vec A m
|
||||
|
||||
idx1 : Fin n -- index into l1
|
||||
idx2 : Fin m -- index into l2
|
||||
|
||||
l1 [ idx1 ] ≡ (l1 ++ l2) [ idx1 ↑ˡ m ]
|
||||
l2 [ idx2 ] ≡ (l1 ++ l2) [ n ↑ʳ idx2 ]
|
||||
```
|
||||
|
||||
The operators used in the definition above are just versions of the same
|
||||
re-indexing operators. The `↑ˡᵉ` operator applies `↑ˡ` to all the (__e__)dges
|
||||
in a graph, and the `↑ˡⁱ` applies it to all the (__i__)ndices in a list
|
||||
(like `inputs` and `outputs`).
|
||||
|
||||
Given these definitions, hopefully the intent with the rest of the definition
|
||||
is not too hard to see. The edges in the new graph come from three places:
|
||||
the graph `g₁` and `g₂`, and from creating a new edge from each of the outputs
|
||||
of `g₁` to each of the inputs of `g₂`. We keep the inputs of `g₁` as the
|
||||
inputs of the whole graph (since `g₁` comes first), and symmetrically we keep
|
||||
the outputs of `g₂`. Of course, we do have to re-index them to keep them
|
||||
pointing at the right nodes.
|
||||
|
||||
Another operation we will need is "overlaying" two graphs: this will be like
|
||||
placing them in parallel, without adding jumps between the two. We use this
|
||||
operation when combining the sub-CFGs of the "if" and "else" branches of an
|
||||
`if`/`else`, which both follow the condition, and both proceed to the code after
|
||||
the conditional.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 59 70 >}}
|
||||
|
||||
Everything here is just concatenation; we pool together the nodes, edges,
|
||||
inputs, and outputs, and the main source of complexity is the re-indexing.
|
||||
|
||||
The one last operation, which we will use for `while` loops, is looping. This
|
||||
operation simply connects the outputs of a graph back to its inputs (allowing
|
||||
looping), and also allows the body to be skipped. This is slightly different
|
||||
from the graph for `while` loops I showed above; the reason for that is that
|
||||
I currently don't include the conditional expressions in my CFG. This is a
|
||||
limitation that I will address in future work.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 85 95 >}}
|
||||
|
||||
Given these thee operations, I construct Control Flow Graphs as follows, where
|
||||
`singleton` creates a new CFG node with the given list of simple statements:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 122 126 >}}
|
||||
|
||||
Throughout this, I've been liberal to include empty CFG nodes as was convenient.
|
||||
This is a departure from the formal definition I gave above, but it makes
|
||||
things much simpler.
|
||||
|
||||
### Additional Functions
|
||||
|
||||
To integrate Control Flow Graphs into our lattice-based program analyses, we'll
|
||||
need to do a couple of things. First, upon reading the
|
||||
[reference _Static Program Analysis_ text](https://cs.au.dk/~amoeller/spa/),
|
||||
one sees a lot of quantification over the predecessors or successors of a
|
||||
given CFG node. For example, the following equation is from Chapter 5:
|
||||
|
||||
{{< latex >}}
|
||||
\textit{JOIN}(v) = \bigsqcup_{w \in \textit{pred}(v)} \llbracket w \rrbracket
|
||||
{{< /latex >}}
|
||||
|
||||
To compute the \(\textit{JOIN}\) function (which we have not covered yet) for
|
||||
a given CFG node, we need to iterate over all of its predecessors, and
|
||||
combine their static information using \(\sqcup\), which I first
|
||||
[explained several posts ago]({{< relref "01_spa_agda_lattices#least-upper-bound" >}}).
|
||||
To be able to iterate over them, we need to be able to retrieve the predecessors
|
||||
of a node from a graph!
|
||||
|
||||
Our encoding does not make computing the predecessors particularly easy; to
|
||||
check if two nodes are connected, we need to check if an `Index`-`Index` pair
|
||||
corresponding to the nodes is present in the `edges` list. To this end, we need
|
||||
to be able to compare edges for equality. Fortunately, it's relatively
|
||||
straightforward to show that our edges can be compared in such a way;
|
||||
after all, they are just pairs of `Fin`s, and `Fin`s and products support
|
||||
these comparisons.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 149 152 >}}
|
||||
|
||||
Next, if we can compare edges for equality, we can check if an edge is in
|
||||
a list. Agda provides a built-in function for this:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 154 154 >}}
|
||||
|
||||
To find the predecessors of a particular node, we go through all other nodes
|
||||
in the graph and see if there's an edge there between those nodes and the
|
||||
current one. This is preferable to simply iterating over the edges because
|
||||
we may have duplicates in that list (why not?).
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 165 166 >}}
|
||||
|
||||
Above, `indices` is a list of all the node identifiers in the graph. Since the
|
||||
graph has `size` nodes, the indices of all these nodes are simply the values
|
||||
`0`, `1`, ..., `size - 1`. I defined a special function `finValues` to compute
|
||||
this list, together with a proof that this list is unique.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 127 143 >}}
|
||||
|
||||
Another important property of `finValues` is that each node identifier is
|
||||
present in the list, so that our computation written by traversing the node
|
||||
list do not "miss" nodes.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 145 147 >}}
|
||||
|
||||
We can specialize these definitions for a particular graph `g`:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 156 163 >}}
|
||||
|
||||
To recap, we now have:
|
||||
* A way to build control flow graphs from programs
|
||||
* A list (unique'd and complete) of all nodes in the control flow graph so that
|
||||
we can iterate over them when the algorithm demands.
|
||||
* A 'predecessors' function, which will be used by our static program analyses,
|
||||
implemented as an iteration over the list of nodes.
|
||||
|
||||
All that's left is to connect our `predecessors` function to edges in the graph.
|
||||
The following definitions say that when an edge is in the graph, the starting
|
||||
node is listed as a predecessor of the ending node, and vise versa.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 168 177 >}}
|
||||
|
||||
### Connecting Two Distinct Representations
|
||||
|
||||
I've described Control Flow Graphs as a compiler-centric representation of the
|
||||
program. Unlike the formal semantics from the previous post, CFGs do not reason
|
||||
about the dynamic behavior of the code. Instead, they capture the possible
|
||||
paths that execution can take through the instructions. In that
|
||||
sense, they are more of an approximation of what the program will do. This is
|
||||
good: because of [Rice's theorem](https://en.wikipedia.org/wiki/Rice%27s_theorem),
|
||||
we can't do anything other than approximating without running the program.
|
||||
|
||||
However, an incorrect approximation is of no use at all. Since the CFGs we build
|
||||
will be the core data type used by our program analyses, it's important that they
|
||||
are an accurate, if incomplete, representation. Specifically, because most
|
||||
of our analyses reason about possible outcomes --- we report what sign each
|
||||
variable __could__ have, for instance --- it's important that we don't accidentally
|
||||
omit cases that can happen in practice from our CFGs. Formally, this means
|
||||
that for each possible execution of a program according to its semantics,
|
||||
{{< sidenote "right" "converse-note" "there exists a corresponding path through the graph." >}}
|
||||
The converse is desirable too: that the graph has only paths that correspond
|
||||
to possible executions of the program. One graph that violates this property is
|
||||
the strongly-connected graph of all basic blocks in a program. Analyzing
|
||||
such a graph would give us an overly-conservative estimation; since anything
|
||||
can happen, most of our answers will likely be too general to be of any use. If,
|
||||
on the other hand, only the necessary graph connections exist, we can be more
|
||||
precise.<br>
|
||||
<br>
|
||||
However, proving this converse property (or even stating it precisely) is much
|
||||
harder, because our graphs are somewhat conservative already. There exist
|
||||
programs in which the condition of an <code>if</code>-statement is always
|
||||
evaluated to <code>false</code>, but our graphs always have edges for both
|
||||
the "then" and "else" cases. Determining whether a condition is always false
|
||||
(e.g.) is undecidable thanks to Rice's theorem (again), so we can't rule it out.
|
||||
Instead, we could broaden "all possible executions"
|
||||
to "all possible executions where branching conditions can produce arbitrary
|
||||
results", but this is something else entirely.<br>
|
||||
<br>
|
||||
For the time being, I will leave this converse property aside. As a result,
|
||||
our approximations might be "too careful". However, they will at the very least
|
||||
be sound.
|
||||
{{< /sidenote >}}
|
||||
|
||||
In the next post, I will prove that this property holds for the graphs shown
|
||||
here and the formal semantics I defined earlier. I hope to see you there!
|
||||
15
content/blog/06_spa_agda_cfg/while-cfg.dot
Normal file
15
content/blog/06_spa_agda_cfg/while-cfg.dot
Normal file
@@ -0,0 +1,15 @@
|
||||
digraph G {
|
||||
graph[dpi=300 fontsize=14 fontname="Courier New"];
|
||||
node[shape=rectangle style="filled" fillcolor="#fafafa" penwidth=0.5 color="#aaaaaa"];
|
||||
edge[arrowsize=0.3 color="#444444"]
|
||||
|
||||
node_begin [label="x = ...;\l"]
|
||||
node_cond [label="x\l"]
|
||||
node_body [label="x = x - 1\l"]
|
||||
node_end [label="y = x\l"]
|
||||
|
||||
node_begin -> node_cond
|
||||
node_cond -> node_body
|
||||
node_cond -> node_end
|
||||
node_body -> node_cond
|
||||
}
|
||||
BIN
content/blog/06_spa_agda_cfg/while-cfg.png
Normal file
BIN
content/blog/06_spa_agda_cfg/while-cfg.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 14 KiB |
376
content/blog/07_spa_agda_semantics_and_cfg/index.md
Normal file
376
content/blog/07_spa_agda_semantics_and_cfg/index.md
Normal file
@@ -0,0 +1,376 @@
|
||||
---
|
||||
title: "Implementing and Verifying \"Static Program Analysis\" in Agda, Part 7: Connecting Semantics and Control Flow Graphs"
|
||||
series: "Static Program Analysis in Agda"
|
||||
description: "In this post, I prove that the Control Flow Graphs from the previous sections are sound according to our language's semantics"
|
||||
date: 2024-11-28T20:32:00-07:00
|
||||
tags: ["Agda", "Programming Languages"]
|
||||
---
|
||||
|
||||
In the previous two posts, I covered two ways of looking at programs in
|
||||
my little toy language:
|
||||
|
||||
* [In part 5]({{< relref "05_spa_agda_semantics" >}}), I covered the
|
||||
__formal semantics__ of the programming language. These are precise rules
|
||||
that describe how programs are executed. These serve as the source of truth
|
||||
for what each statement and expression does.
|
||||
|
||||
Because they are the source of truth, they capture all information about
|
||||
how programs are executed. To determine that a program starts in one
|
||||
environment and ends in another (getting a judgement \(\rho_1, s \Rightarrow \rho_2\)),
|
||||
we need to actually run the program. In fact, our Agda definitions
|
||||
encoding the semantics actually produce proof trees, which contain every
|
||||
single step of the program's execution.
|
||||
* [In part 6]({{< relref "06_spa_agda_cfg" >}}), I covered
|
||||
__Control Flow Graphs__ (CFGs), which in short arranged code into a structure
|
||||
that represents how execution moves from one statement or expression to the
|
||||
next.
|
||||
|
||||
Unlike the semantics, CFGs do not capture a program's entire execution;
|
||||
they merely contain the possible orders in which statements can be evaluated.
|
||||
Instead of capturing the exact number of iterations performed by a `while`
|
||||
loop, they encode repetition as cycles in the graph. Because they are
|
||||
missing some information, they're more of an approximation of a program's
|
||||
behavior.
|
||||
|
||||
Our analyses operate on CFGs, but it is our semantics that actually determine
|
||||
how a program behaves. In order for our analyses to be able to produce correct
|
||||
results, we need to make sure that there isn't a disconnect between the
|
||||
approximation and the truth. In the previous post, I stated the property I will
|
||||
use to establish the connection between the two perspectives:
|
||||
|
||||
> For each possible execution of a program according to its semantics,
|
||||
> there exists a corresponding path through the graph.
|
||||
|
||||
By ensuring this property, we will guarantee that our Control Flow Graphs
|
||||
account for anything that might happen. Thus, a correct analysis built on top
|
||||
of the graphs will produce results that match reality.
|
||||
|
||||
### Traces: Paths Through a Graph
|
||||
A CFG contains each "basic" statement in our program, by definition; when we're
|
||||
executing the program, we are therefore running code in one of the CFG's nodes.
|
||||
When we switch from one node to another, there ought to be an edge between the
|
||||
two, since edges in the CFG encode possible control flow. We keep doing this
|
||||
until the program terminates (if ever).
|
||||
|
||||
Now, I said that there "ought to be edges" in the graph that correspond to
|
||||
our program's execution. Moreover, the endpoints of these edges have to line
|
||||
up, since we can only switch which basic block / node we're executing by following
|
||||
an edge. As a result, if our CFG is correct, then for every program execution,
|
||||
there is a path between the CFG's nodes that matches the statements that we
|
||||
were executing.
|
||||
|
||||
Take the following program and CFG from the previous post as an example.
|
||||
|
||||
{{< sidebyside >}}
|
||||
{{% sidebysideitem weight="0.55" %}}
|
||||
```
|
||||
x = 2;
|
||||
while x {
|
||||
x = x - 1;
|
||||
}
|
||||
y = x;
|
||||
```
|
||||
{{% /sidebysideitem %}}
|
||||
{{< sidebysideitem weight="0.5" >}}
|
||||
{{< figure src="while-cfg.png" label="CFG for simple `while` code." class="small" >}}
|
||||
{{< /sidebysideitem >}}
|
||||
{{< /sidebyside >}}
|
||||
|
||||
We start by executing `x = 2`, which is the top node in the CFG. Then, we execute
|
||||
the condition of the loop, `x`. This condition is in the second node from
|
||||
the top; fortunately, there exists an edge between `x = 2` and `x` that
|
||||
allows for this possibility. Once we computed `x`, we know that it's nonzero,
|
||||
and therefore we proceed to the loop body. This is the statement `x = x - 1`,
|
||||
contained in the bottom left node in the CFG. There is once again an edge
|
||||
between `x` and that node; so far, so good. Once we're done executing the
|
||||
statement, we go back to the top of the loop again, following the edge back to
|
||||
the middle node. We then execute the condition, loop body, and condition again.
|
||||
At that point we have reduced `x` to zero, so the condition produces a falsey
|
||||
value. We exit the loop and execute `y = x`, which is allowed by the edge from
|
||||
the middle node to the bottom right node.
|
||||
|
||||
We will want to show that every possible execution of the program (e.g.,
|
||||
with different variable assignments) corresponds to a path in the CFG. If one
|
||||
doesn't, then our program can do something that our CFG doesn't account for,
|
||||
which means that our analyses will not be correct.
|
||||
|
||||
I will define a `Trace` datatype, which will be an embellished
|
||||
path through the graph. At its core, a path is simply a list of indices
|
||||
together with edges that connect them. Viewed another way, it's a list of edges,
|
||||
where each edge's endpoint is the next edge's starting point. We want to make
|
||||
illegal states unrepresentable, and therefore use the type system to assert
|
||||
that the edges are compatible. The easiest way to do this is by making
|
||||
our `Trace` indexed by its start and end points. An empty trace, containing
|
||||
no edges, will start and end in the same node; the `::` equivalent for the trace
|
||||
will allow prepending one edge, starting at node `i1` and ending in `i2`, to
|
||||
another trace which starts in `i2` and ends in some arbitrary `i3`. Here's
|
||||
an initial stab at that:
|
||||
|
||||
```Agda
|
||||
module _ {g : Graph} where
|
||||
open Graph g using (Index; edges; inputs; outputs)
|
||||
|
||||
data Trace : Index → Index → Set where
|
||||
Trace-single : ∀ {idx : Index} → Trace idx idx
|
||||
Trace-edge : ∀ {idx₁ idx₂ idx₃ : Index} →
|
||||
(idx₁ , idx₂) ∈ edges →
|
||||
Trace idx₂ idx₃ → Trace idx₁ idx₃
|
||||
```
|
||||
|
||||
This isn't enough, though. Suppose you had a function that takes an evaluation
|
||||
judgement and produces a trace, resulting in a signature like this:
|
||||
|
||||
```Agda
|
||||
buildCfg-sufficient : ∀ {s : Stmt} {ρ₁ ρ₂ : Env} → ρ₁ , s ⇒ˢ ρ₂ →
|
||||
let g = buildCfg s
|
||||
in Σ (Index g × Index g) (λ (idx₁ , idx₂) → Trace {g} idx₁ idx₂)
|
||||
```
|
||||
|
||||
What's stopping this function from returning _any_ trace through the graph,
|
||||
including one that doesn't even include the statements in our program `s`?
|
||||
We need to narrow the type somewhat to require that the nodes it visits have
|
||||
some relation to the program execution in question.
|
||||
|
||||
We could do this by indexing the `Trace` data type by a list of statements
|
||||
that we expect it to match, and requiring that for each constructor, the
|
||||
statements of the starting node be at the front of that list. We could compute
|
||||
the list of executed statements in order using
|
||||
{{< sidenote "right" "full-execution=note" "a recursive function on the `_,_⇒ˢ_` data type." >}}
|
||||
I mentioned earlier that our encoding of the semantics is actually defining
|
||||
a proof tree, which includes every step of the computation. That's why we can
|
||||
write a function that takes the proof tree and extracts the executed statements.
|
||||
{{< /sidenote >}}
|
||||
|
||||
That would work, but it loses a bit of information. The execution judgement
|
||||
contains not only each statement that was evaluated, but also the environments
|
||||
before and after evaluating it. Keeping those around will be useful: eventually,
|
||||
we'd like to state the invariant that at every CFG node, the results of our
|
||||
analysis match the current program environment. Thus, instead of indexing simply
|
||||
by the statements of code, I chose to index my `Trace` by the
|
||||
starting and ending environment, and to require it to contain evaluation judgements
|
||||
for each node's code. The judgements include the statements that were evaluated,
|
||||
which we can match against the code in the CFG node. However, they also assert
|
||||
that the environments before and after are connected by that code in the
|
||||
language's formal semantics. The resulting definition is as follows:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Traces.agda" 10 18 >}}
|
||||
|
||||
The `g [ idx ]` and `g [ idx₁ ]` represent accessing the basic block code at
|
||||
indices `idx` and `idx₁` in graph `g`.
|
||||
|
||||
### Trace Preservation by Graph Operations
|
||||
|
||||
Our proofs of trace existence will have the same "shape" as the functions that
|
||||
build the graph. To prove the trace property, we'll assume that evaluations of
|
||||
sub-statements correspond to traces in the sub-graphs, and use that to prove
|
||||
that the full statements have corresponding traces in the full graph. We built
|
||||
up graphs by combining sub-graphs for sub-statements, using `_∙_` (overlaying
|
||||
two graphs), `_↦_` (sequencing two graphs) and `loop` (creating a zero-or-more
|
||||
loop in the graph). Thus, to make the jump from sub-graphs to full graphs,
|
||||
we'll need to prove that traces persist through overlaying, sequencing,
|
||||
and looping.
|
||||
|
||||
Take `_∙_`, for instance; we want to show that if a trace exists in the left
|
||||
operand of overlaying, it also exists in the final graph. This leads to
|
||||
the following statement and proof:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 88 97 >}}
|
||||
|
||||
There are some details there to discuss.
|
||||
* First, we have to change the
|
||||
indices of the returned `Trace`. That's because they start out as indices
|
||||
into the graph `g₁`, but become indices into the graph `g₁ ∙ g₂`. To take
|
||||
care of this re-indexing, we have to make use of the `↑ˡ` operators,
|
||||
which I described in [this section of the previous post]({{< relref "06_spa_agda_cfg#fin-reindexing" >}}).
|
||||
* Next, in either case, we need to show that the new index acquired via `↑ˡ`
|
||||
returns the same basic block in the new graph as the old index returned in
|
||||
the original graph. Fortunately, the Agda standard library provides a proof
|
||||
of this, `lookup-++ˡ`. The resulting equality is the following:
|
||||
|
||||
```Agda
|
||||
g₁ [ idx₁ ] ≡ (g₁ ∙ g₂) [ idx₁ ↑ˡ Graph.size g₂ ]
|
||||
```
|
||||
|
||||
This allows us to use the evaluation judgement in each constructor for
|
||||
traces in the output of the function.
|
||||
* Lastly, in the `Trace-edge` case, we have to additionally return a proof that the
|
||||
edge used by the trace still exists in the output graph. This follows
|
||||
from the fact that we include the edges from `g₁` after re-indexing them.
|
||||
|
||||
```Agda
|
||||
; edges = (Graph.edges g₁ ↑ˡᵉ Graph.size g₂) List.++
|
||||
(Graph.size g₁ ↑ʳᵉ Graph.edges g₂)
|
||||
|
||||
```
|
||||
|
||||
The `↑ˡᵉ` function is just a list `map` with `↑ˡ`. Thus, if a pair of edges
|
||||
is in the original list (`Graph.edges g₁`), as is evidenced by `idx₁→idx`,
|
||||
then its re-indexing is in the mapped list. To show this, I use the utility
|
||||
lemma `x∈xs⇒fx∈fxs`. The mapped list is the left-hand-side of a `List.++`
|
||||
operator, so I additionally use the lemma `∈-++⁺ˡ` that shows membership is
|
||||
preserved by list concatenation.
|
||||
|
||||
The proof of `Trace-∙ʳ`, the same property but for the right-hand operand `g₂`,
|
||||
is very similar, as are the proofs for sequencing. I give their statements,
|
||||
but not their proofs, below.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 99 101 >}}
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 139 141 >}}
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 150 152 >}}
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 175 176 >}}
|
||||
|
||||
Preserving traces is unfortunately not quite enough. The thing that we're missing
|
||||
is looping: the same sub-graph can be re-traversed several times as part of
|
||||
execution, which suggests that we ought to be able to combine multiple traces
|
||||
through a loop graph into one. Using our earlier concrete example, we might
|
||||
have traces for evaluating `x` then `x = x -1` with the variable `x` being
|
||||
mapped first to `2` and then to `1`. These traces occur back-to-back, so we
|
||||
will put them together into a single trace. To prove some properties about this,
|
||||
I'll define a more precise type of trace.
|
||||
|
||||
### End-To-End Traces
|
||||
The key way that traces through a loop graph are combined is through the
|
||||
back-edges. Specifically, our `loop` graphs have edges from each of the `output`
|
||||
nodes to each of the `input` nodes. Thus, if we have two paths, both
|
||||
starting at the beginning of the graph and ending at the end, we know that
|
||||
the first path's end has an edge to the second path's beginning. This is
|
||||
enough to combine them.
|
||||
|
||||
This logic doesn't work if one of the paths ends in the middle of the graph,
|
||||
and not on one of the `output`s. That's because there is no guarantee that there
|
||||
is a connecting edge.
|
||||
|
||||
To make things easier, I defined a new data type of "end-to-end" traces, whose
|
||||
first nodes are one of the graph's `input`s, and whose last nodes are one
|
||||
of the graph's `output`s.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Traces.agda" 27 36 >}}
|
||||
|
||||
We can trivially lift the proofs from the previous section to end-to-end traces.
|
||||
For example, here's the lifted version of the first property we proved:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 110 121 >}}
|
||||
|
||||
The other lifted properties are similar.
|
||||
|
||||
For looping, the proofs get far more tedious, because of just how many
|
||||
sources of edges there are in the output graph --- they span four lines:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Graphs.agda" 84 94 "hl_lines=5-8" >}}
|
||||
|
||||
I therefore made use of two helper lemmas. The first is about list membership
|
||||
under concatenation. Simply put, if you concatenate a bunch of lists, and
|
||||
one of them (`l`) contains some element `x`, then the concatenation contains
|
||||
`x` too.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Utils.agda" 82 85 >}}
|
||||
|
||||
I then specialized this lemma for concatenated groups of edges.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 162 172 "hl_lines=9-11" >}}
|
||||
|
||||
Now we can finally prove end-to-end properties of loop graphs. The simplest one is
|
||||
that they allow the code within them to be entirely bypassed
|
||||
(as when the loop body is evaluated zero times). I called this
|
||||
`EndToEndTrace-loop⁰`. The "input" node of the loop graph is index `zero`,
|
||||
while the "output" node of the loop graph is index `suc zero`. Thus, the key
|
||||
step is to show that an edge between these two indices exists:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 227 240 "hl_lines=5-6" >}}
|
||||
|
||||
The only remaining novelty is the `trace` field of the returned `EndToEndTrace`.
|
||||
It uses the trace concatenation operation `++⟨_⟩`. This operator allows concatenating
|
||||
two traces, which start and end at distinct nodes, as long as there's an edge
|
||||
that connects them:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Traces.agda" 21 25 >}}
|
||||
|
||||
The expression on line 239 of `Properties.agda` is simply the single-edge trace
|
||||
constructed from the edge `0 -> 1` that connects the start and end nodes of the
|
||||
loop graph. Both of those nodes is empty, so no code is evaluated in that case.
|
||||
|
||||
The proof for combining several traces through a loop follows a very similar
|
||||
pattern. However, instead of constructing a single-edge trace as we did above,
|
||||
it concatenates two traces from its arguments. Also, instead of using
|
||||
the edge from the first node to the last, it instead uses an edge from the
|
||||
last to the first, as I described at the very beginning of this section.
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 209 225 "hl_lines=8-9" >}}
|
||||
|
||||
### Proof of Sufficiency
|
||||
|
||||
We now have all the pieces to show each execution of our program has a corresponding
|
||||
trace through a graph. Here is the whole proof:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Properties.agda" 281 296 >}}
|
||||
|
||||
We proceed by
|
||||
{{< sidenote "right" "derivation-note" "checking what inference rule was used to execute a particular statement," >}}
|
||||
Precisely, we proceed by induction on the derivation of \(\rho_1, s \Rightarrow \rho_2\).
|
||||
{{< /sidenote >}}
|
||||
because that's what tells us what the program did in that particular moment.
|
||||
|
||||
* When executing a basic statement, we know that we constructed a singleton
|
||||
graph that contains one node with that statement. Thus, we can trivially
|
||||
construct a single-step trace without any edges.
|
||||
* When executing a sequence of statements, we have two induction hypotheses.
|
||||
These state that the sub-graphs we construct for the first and second statement
|
||||
have the trace property. We also have two evaluation judgements (one for each
|
||||
statement), which means that we can apply that property to get traces. The
|
||||
`buildCfg` function sequences the two graphs, and we can sequence
|
||||
the two traces through them, resulting in a trace through the final output.
|
||||
* For both the `then` and `else` cases of evaluating an `if` statement,
|
||||
we observe that `buildCfg` overlays the sub-graphs of the two branches using
|
||||
`_∙_`. We also know that the two sub-graphs have the trace property.
|
||||
* In the `then` case, since we have an evaluation judgement for
|
||||
`s₁` (in variable `ρ₁,s₁⇒ρ₂`), we conclude that there's a correct trace
|
||||
through the `then` sub-graph. Since that graph is the left operand of
|
||||
`_∙_`, we use `EndToEndTrace-∙ˡ` to show that the trace is preserved in the full graph.
|
||||
* In the `else` case things are symmetric. We are evaluating `s₂`, with
|
||||
a judgement given by `ρ₁,s₂⇒ρ₂`. We use that to conclude that there's a
|
||||
trace through the graph built from `s₂`. Since this sub-graph is the right
|
||||
operand of `_∙_`, we use `EndToEndTrace-∙ʳ` to show that it's preserved in
|
||||
the full graph.
|
||||
* For the `true` case of `while`, we have two evaluation judgements: one
|
||||
for the body and one for the loop again, this time
|
||||
in a new environment. They are stored in `ρ₁,s⇒ρ₂` and `ρ₂,ws⇒ρ₃`, respectively.
|
||||
The statement being evaluated by `ρ₂,ws⇒ρ₃` is actually the exact same statement
|
||||
that's being evaluated at the top level of the proof. Thus, we can use
|
||||
`EndToEndTrace-loop²`, which sequences two traces through the same graph.
|
||||
|
||||
We also use `EndToEndTrace-loop` to lift the trace through `buildCfg s` into
|
||||
a trace through `buildCfg (while e s)`.
|
||||
* For the `false` case of the `while`, we don't execute any instructions,
|
||||
and finish evaluating right away. This corresponds to the do-nothing trace,
|
||||
which we have established exists using `EndToEndTrace-loop⁰`.
|
||||
|
||||
That's it! We have now validated that the Control Flow Graphs we construct
|
||||
match the semantics of the programming language, which makes them a good
|
||||
input to our static program analyses. We can finally start writing those!
|
||||
|
||||
### Defining and Verifying Static Program Analyses
|
||||
|
||||
We have all the pieces we need to define a formally-verified forward analysis:
|
||||
|
||||
* We have used the framework of lattices to encode the precision of program
|
||||
analysis outputs. Smaller elements in a lattice are more specific,
|
||||
meaning more useful information.
|
||||
* We have [implemented fixed-point algorithm]({{< relref "04_spa_agda_fixedpoint" >}}),
|
||||
which finds the smallest solutions to equations in the form \(f(x) = x\)
|
||||
for monotonic functions over lattices. By defining our analysis as such a function,
|
||||
we can apply the algorithm to find the most precise steady-state description
|
||||
of our program.
|
||||
* We have defined how our programs are executed, which is crucial for defining
|
||||
"correctness".
|
||||
|
||||
Here's how these pieces will fit together. We will construct a
|
||||
finite-height lattice. Every single element of this lattice will contain
|
||||
information about each variable at each node in the Control Flow Graph. We will
|
||||
then define a monotonic function that updates this information using the
|
||||
structure encoded in the CFG's edges and nodes. Then, using the fixed-point algorithm,
|
||||
we will find the least element of the lattice, which will give us a precise
|
||||
description of all program variables at all points in the program. Because
|
||||
we have just validated our CFGs to be faithful to the language's semantics,
|
||||
we'll be able to prove that our algorithm produces accurate results.
|
||||
|
||||
The next post or two will be the last stretch; I hope to see you there!
|
||||
15
content/blog/07_spa_agda_semantics_and_cfg/while-cfg.dot
Normal file
15
content/blog/07_spa_agda_semantics_and_cfg/while-cfg.dot
Normal file
@@ -0,0 +1,15 @@
|
||||
digraph G {
|
||||
graph[dpi=300 fontsize=14 fontname="Courier New"];
|
||||
node[shape=rectangle style="filled" fillcolor="#fafafa" penwidth=0.5 color="#aaaaaa"];
|
||||
edge[arrowsize=0.3 color="#444444"]
|
||||
|
||||
node_begin [label="x = 2;\l"]
|
||||
node_cond [label="x\l"]
|
||||
node_body [label="x = x - 1\l"]
|
||||
node_end [label="y = x\l"]
|
||||
|
||||
node_begin -> node_cond
|
||||
node_cond -> node_body
|
||||
node_cond -> node_end
|
||||
node_body -> node_cond
|
||||
}
|
||||
BIN
content/blog/07_spa_agda_semantics_and_cfg/while-cfg.png
Normal file
BIN
content/blog/07_spa_agda_semantics_and_cfg/while-cfg.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 14 KiB |
445
content/blog/08_spa_agda_forward/index.md
Normal file
445
content/blog/08_spa_agda_forward/index.md
Normal file
@@ -0,0 +1,445 @@
|
||||
---
|
||||
title: "Implementing and Verifying \"Static Program Analysis\" in Agda, Part 8: Forward Analysis"
|
||||
series: "Static Program Analysis in Agda"
|
||||
description: "In this post, I use the monotone lattice framework and verified CFGs to define a sign analysis"
|
||||
date: 2024-12-01T15:09:07-08:00
|
||||
tags: ["Agda", "Programming Languages"]
|
||||
---
|
||||
|
||||
In the previous post, I showed that the Control Flow graphs we built of our
|
||||
programs match how they are really executed. This means that we can rely
|
||||
on these graphs to compute program information. In this post, we finally
|
||||
get to compute that information. Here's a quick bit paraphrasing from last time
|
||||
that provides a summary of our approach:
|
||||
|
||||
1. We will construct a finite-height lattice. Every single element of this
|
||||
lattice will contain information about each variable at each node in the
|
||||
Control Flow Graph.
|
||||
2. We will then define a monotonic function that update this information using
|
||||
the structure encoded in the CFG’s edges and nodes.
|
||||
3. Then, using the fixed-point algorithm, we will find the least element of the
|
||||
lattice, which will give us a precise description of all program variables at
|
||||
all points in the program.
|
||||
4. Because we have just validated our CFGs to be faithful to the language’s
|
||||
semantics, we’ll be able to prove that our algorithm produces accurate results.
|
||||
|
||||
Let's jump right into it!
|
||||
|
||||
### Choosing a Lattice
|
||||
A lot of this time, we have been [talking about lattices]({{< relref "01_spa_agda_lattices" >}}),
|
||||
particularly [lattices of finite height]({{< relref "03_spa_agda_fixed_height" >}}).
|
||||
These structures represent things we know about the program, and provide operators
|
||||
like \((\sqcup)\) and \((\sqcap)\) that help us combine such knowledge.
|
||||
|
||||
The forward analysis code I present here will work with any finite-height
|
||||
lattice, with the additional constraint that equivalence of lattices
|
||||
is decidable, which comes from [the implementation of the fixed-point algorithm]({{< relref "04_spa_agda_fixedpoint" >}}),
|
||||
in which we routinely check if a function's output is the same as its input.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 4 8 >}}
|
||||
|
||||
The finite-height lattice `L` is intended to describe the state of a single
|
||||
variable.
|
||||
One example of a lattice that can be used as `L` is our
|
||||
sign lattice. We've been using the sign lattice in our examples [from the very beginning]({{< relref "01_spa_agda_lattices#lattices" >}}),
|
||||
and we will stick with it for the purposes of this explanation. However, this
|
||||
lattice alone does not describe our program, since it only talks about a single
|
||||
sign; programs have lots of variables, all of which can have different signs!
|
||||
So, we might go one step further and define a map lattice from variables to
|
||||
their signs:
|
||||
|
||||
{{< latex >}}
|
||||
\text{Variable} \to \text{Sign}
|
||||
{{< /latex >}}
|
||||
|
||||
We [have seen]({{< relref "02_spa_agda_combining_lattices#the-map-lattice" >}})
|
||||
that we can turn any lattice \(L\) into a map lattice \(A \to L\), for any
|
||||
type of keys \(A\). In this case, we will define \(A \triangleq \text{Variable}\),
|
||||
and \(L \triangleq \text{Sign}\). The
|
||||
[sign lattice has a finite height]({{< relref "02_spa_agda_combining_lattices#the-map-lattice" >}}),
|
||||
and I've proven that, as long as we pick a finite set of keys, [map lattices
|
||||
\(A \to L\) have a finite height if \(L\) has a finite height]({{< relref "03_spa_agda_fixed_height#fixed-height-of-the-map-lattice" >}}).
|
||||
Since a program's text is finite, \(\text{Variable}\) is a finite set, and
|
||||
we have ourselves a finite-height lattice \(\text{Variable} \to \text{Sign}\).
|
||||
|
||||
We're on the right track, but even the lattice we have so far is not sufficient.
|
||||
That's because variables have different signs at different points in the program!
|
||||
You might initialize a variable with `x = 1`, making it positive, and then
|
||||
go on to compute some arbitrary function using loops and conditionals. For
|
||||
each variable, we need to keep track of its sign at various points in the code.
|
||||
When we [defined Control Flow Graphs]({{< relref "06_spa_agda_cfg" >}}), we
|
||||
split our programs into sequences of statements that are guaranteed to execute
|
||||
together --- basic blocks. For our analysis, we'll keep per-variable for
|
||||
each basic block in the program. Since basic blocks are nodes in the Control Flow
|
||||
Graph of our program, our whole lattice will be as follows:
|
||||
{#whole-lattice}
|
||||
|
||||
{{< latex >}}
|
||||
\text{Info} \triangleq \text{NodeId} \to (\text{Variable} \to \text{Sign})
|
||||
{{< /latex >}}
|
||||
|
||||
We follow the same logic we just did for the variable-sign lattice; since
|
||||
\(\text{Variable} \to \text{Sign}\) is a lattice of finite height, and since
|
||||
\(\text{NodeId}\) is a finite set, the whole \(\text{Info}\) map will be
|
||||
a lattice with a finite height.
|
||||
|
||||
Notice that both the sets of \(\text{Variable}\) and \(\text{NodeId}\) depend
|
||||
on the program in question. The lattice we use is slightly different for
|
||||
each input program! We can use Agda's parameterized modules to automaitcally
|
||||
parameterize all our functions over programs:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 36 37 >}}
|
||||
|
||||
Now, let's make the informal descriptions above into code, by instantiating
|
||||
our map lattice modules. First, I invoked the code for the smaller variable-sign
|
||||
lattice. This ended up being quite long, so that I could rename variables I
|
||||
brought into scope. I will collapse the relevant code block; suffice to say
|
||||
that I used the suffix `v` (e.g., renaming `_⊔_` to `_⊔ᵛ_`) for properties
|
||||
and operators to do with variable-sign maps (in Agda: `VariableValuesFiniteMap`).
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 41 82 "" "**(Click here to expand the module uses for variable-sign maps)**" >}}
|
||||
|
||||
I then used this lattice as an argument to the map module again, to
|
||||
construct the top-level \(\text{Info}\) lattice (in Agda: `StateVariablesFiniteMap`).
|
||||
This also required a fair bit of code, most of it to do with renaming.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 85 112 "" "**(Click here to expand the module uses for the top-level lattice)**" >}}
|
||||
|
||||
### Constructing a Monotone Function
|
||||
|
||||
We now have a lattice in hand; the next step is to define a function over
|
||||
this lattice. For us to be able to use the fixed-point algorithm on this
|
||||
function, it will need to be [monotonic]({{< relref "01_spa_agda_lattices#define-monotonicity" >}}).
|
||||
|
||||
Our goal with static analysis is to compute information about our program; that's
|
||||
what we want the function to do. When the lattice we're using is the sign lattice,
|
||||
we're trying to determine the signs of each of the variables in various parts
|
||||
of the program. How do we go about this?
|
||||
|
||||
Each piece of code in the program might change a variable's sign. For instance,
|
||||
if `x` has sign \(0\), and we run the statement `x = x - 1`, the sign of
|
||||
`x` will be \(-\). If we have an expression `y + z`, we can use the signs of
|
||||
`y` and `z` to compute the sign of the whole thing. This is a form
|
||||
of [abstract interpretation](https://en.wikipedia.org/wiki/Abstract_interpretation),
|
||||
in which we almost-run the program, but forget some details (e.g., the
|
||||
exact values of `x`, `y`, and `z`, leaving only their signs). The exact details
|
||||
of how this partial evaluation is done are analysis-specific; in general, we
|
||||
simply require an analysis to provide an evaluator. We will define
|
||||
[an evaluator for the sign lattice below](#instantiating-with-the-sign-lattice).
|
||||
{#general-evaluator}
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 166 167 >}}
|
||||
|
||||
From this, we know how each statement and basic block will change variables
|
||||
in the function. But we have described them process as "if a variable has
|
||||
sign X, it becomes sign Y" -- how do we know what sign a variable has _before_
|
||||
the code runs? Fortunately, the Control Flow Graph tells us exactly
|
||||
what code could be executed before any given basic block. Recall that edges
|
||||
in the graph describe all possible jumps that could occur; thus, for any
|
||||
node, the incoming edges describe all possible blocks that can precede it.
|
||||
This is why we spent all that time [defining the `predecessors` function]({{< relref "06_spa_agda_cfg#additional-functions" >}}).
|
||||
|
||||
We proceed as follows: for any given node, find its predecessors. By accessing
|
||||
our \(\text{Info}\) map for each predecessor, we can determine our current
|
||||
best guess of variable signs at that point, in the form of a \(\text{Variable} \to \text{Sign}\)
|
||||
map (more generally, \(\text{Variable} \to L\) map in an arbitrary analysis).
|
||||
We know that any of these predecessors could've been the previous point of
|
||||
execution; if a variable `x` has sign \(+\) in one predecessor and \(-\)
|
||||
in another, it can be either one or the other when we start executing the
|
||||
current block. Early on, we saw that [the \((\sqcup)\) operator models disjunction
|
||||
("A or B")]({{< relref "01_spa_agda_lattices#lub-glub-or-and" >}}). So, we apply
|
||||
\((\sqcup)\) to the variable-sign maps of all predecessors. The
|
||||
[reference _Static Program Analysis_ text](https://cs.au.dk/~amoeller/spa/)
|
||||
calls this operation \(\text{JOIN}\):
|
||||
{#join-preds}
|
||||
|
||||
{{< latex >}}
|
||||
\textit{JOIN}(v) = \bigsqcup_{w \in \textit{pred}(v)} \llbracket w \rrbracket
|
||||
{{< /latex >}}
|
||||
|
||||
The Agda implementation uses a `foldr`:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 139 140 >}}
|
||||
|
||||
Computing the "combined incoming states" for any node is a monotonic function.
|
||||
This follows from the monotonicity of \((\sqcup)\) --- in both arguments ---
|
||||
and the definition of `foldr`.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice.agda" 143 151 "" "**(Click here to expand the general proof)**" >}}
|
||||
|
||||
From this, we can formally state that \(\text{JOIN}\) is monotonic. Note that
|
||||
the input and output lattices are different: the input lattice is the lattice
|
||||
of variable states at each block, and the output lattice is a single variable-sign
|
||||
map, representing the combined preceding state at a given node.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 145 149 >}}
|
||||
|
||||
Above, the `m₁≼m₂⇒m₁[ks]≼m₂[ks]` lemma states that for two maps with the same
|
||||
keys, where one map is less than another, all the values for any subset of keys
|
||||
`ks` are pairwise less than each other (i.e. `m₁[k]≼m₂[k]`, and `m₁[l]≼m₂[l]`, etc.).
|
||||
This follows from the definition of "less than" for maps.
|
||||
{#less-than-lemma}
|
||||
|
||||
So those are the two pieces: first, join all the preceding states, then use
|
||||
the abstract interpretation function. I opted to do both of these in bulk:
|
||||
|
||||
1. Take an initial \(\text{Info}\) map, and update every basic block's entry
|
||||
to be the join of its predecessors.
|
||||
2. In the new joined map, each key now contains the variable state at
|
||||
the beginning of the block; so, apply the abstract interpretation function
|
||||
via `eval` to each key, computing the state at the end of the block.
|
||||
|
||||
I chose to do these in bulk because this way, after each application of
|
||||
the function, we have updated each block with exactly one round of information.
|
||||
The alternative --- which is specified in the reference text --- is to update
|
||||
one key at a time. The difference there is that updates to later keys might be
|
||||
"tainted" by updates to keys that came before them. This is probably fine
|
||||
(and perhaps more efficient, in that it "moves faster"), but it's harder to
|
||||
reason about.
|
||||
|
||||
#### Generalized Update
|
||||
|
||||
To implement bulk assignment, I needed to implement the source text's
|
||||
Exercise 4.26:
|
||||
|
||||
> __Exercise 4.26__: Recall that \(f[a \leftarrow x]\) denotes the function that is identical to
|
||||
> \(f\) except that it maps \(a\) to \(x\). Assume \(f : L_1 \to (A \to L_2)\)
|
||||
> and \(g : L_1 \to L_2\) are monotone functions where \(L_1\) and \(L_2\) are
|
||||
> lattices and \(A\) is a set, and let \(a \in A\). (Note that the codomain of
|
||||
> \(f\) is a map lattice.)
|
||||
>
|
||||
> Show that the function \(h : L_1 \to (A \to L_2)\)
|
||||
> defined by \(h(x) = f(x)[a \leftarrow g(x)]\) is monotone.
|
||||
|
||||
In fact, I generalized this statement to update several keys at once, as follows:
|
||||
|
||||
{{< latex >}}
|
||||
h(x) = f(x)[a_1 \leftarrow g(a_1, x),\ ...,\ a_n \leftarrow g(a_n, x)]
|
||||
{{< /latex >}}
|
||||
|
||||
I called this operation "generalized update".
|
||||
|
||||
At first, the exercise may not obviously correspond to the bulk operation
|
||||
I've described. Particularly confusing is the fact that it has two lattices,
|
||||
\(L_1\) and \(L_2\). In fact, the exercise results in a very general theorem;
|
||||
we can exploit a more concrete version of the theorem by setting
|
||||
\(L_1 \triangleq A \to L_2\), resulting in an overall signature for \(f\) and \(h\):
|
||||
|
||||
{{< latex >}}
|
||||
f : (A \to L_2) \to (A \to L_2)
|
||||
{{< /latex >}}
|
||||
|
||||
In other words, if we give the entire operation in Exercise 4.26 a type,
|
||||
it would look like this:
|
||||
|
||||
{{< latex >}}
|
||||
\text{ex}_{4.26} : \underbrace{K}_{\text{value of}\ a} \to \underbrace{(\text{Map} \to V)}_{\text{updater}} \to \underbrace{\text{Map} \to \text{Map}}_{f} \to \underbrace{\text{Map} \to \text{Map}}_{h}
|
||||
{{< /latex >}}
|
||||
|
||||
That's still more general than we need it. This here allows us to modify
|
||||
any map-to-map function by updating a certain key in that function. If we
|
||||
_just_ want to update keys (as we do for the purposes of static analysis),
|
||||
we can recover a simpler version by setting \(f \triangleq id\), which
|
||||
results in an updater \(h(x) = x[a \leftarrow g(x)]\), and a signature for
|
||||
the exercise:
|
||||
|
||||
{{< latex >}}
|
||||
\text{ex}_{4.26} : \underbrace{K}_{\text{value of}\ a} \to \underbrace{(\text{Map} \to V)}_{\text{updater}\ g} \to \underbrace{\text{Map}}_{\text{old map}} \to \underbrace{\text{Map}}_{\text{updated map}}
|
||||
{{< /latex >}}
|
||||
|
||||
This looks just like Haskell's [`Data.Map.adjust` function](https://hackage.haskell.org/package/containers-0.4.0.0/docs/src/Data-Map.html#adjust), except that it
|
||||
can take the entire map into consideration when updating a key.
|
||||
|
||||
My generalized version takes in a list of keys to update, and makes the updater
|
||||
accept a key so that its behavior can be specialized for each entry it changes.
|
||||
The sketch of the implementation is in the `_updating_via_` function from
|
||||
the `Map` module, and its helper `transform`. Here, I collapse its definition,
|
||||
since it's not particularly important.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/Map.agda" 926 931 "" "**(Click here to see the definition of `transform`)**" >}}
|
||||
|
||||
The proof of monotonicity --- which is the solution to the exercise --- is
|
||||
actually quite complicated. I will omit its description, and show it here
|
||||
in another collapsed block.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Lattice/Map.agda" 1042 1105 "" "**(Click here to see the proof of monotonicity of \(h\))**" >}}
|
||||
|
||||
Given a proof of the exercise, all that's left is to instantiate the theorem
|
||||
with the argument I described. Specifically:
|
||||
|
||||
* \(L_1 \triangleq \text{Info} \triangleq \text{NodeId} \to (\text{Variable} \to \text{Sign})\)
|
||||
* \(L_2 \triangleq \text{Variable} \to \text{Sign} \)
|
||||
* \(A \triangleq \text{NodeId}\)
|
||||
* \(f \triangleq \text{id} \triangleq x \mapsto x\)
|
||||
* \(g(k, m) = \text{JOIN}(k, m)\)
|
||||
|
||||
In the equation for \(g\), I explicitly insert the map \(m\) instead of leaving
|
||||
it implicit as the textbook does. In Agda, this instantiation for joining
|
||||
all predecessor looks like this (using `states` as the list of keys to update,
|
||||
indicating that we should update _every_ key):
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 152 157 >}}
|
||||
|
||||
And the one for evaluating all programs looks like this:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 215 220 >}}
|
||||
|
||||
Actually, we haven't yet seen that `updateVariablesFromStmt`. This is
|
||||
a function that we can define using the user-provided abtract interpretation
|
||||
`eval`. Specifically, it handles the job of updating the sign of a variable
|
||||
once it has been assigned to (or doing nothing if the statement is a no-op).
|
||||
{#define-updateVariablesFromStmt}
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 191 193 >}}
|
||||
|
||||
The `updateVariablesFromExpression` is now new, and it is yet another map update,
|
||||
which changes the sign of a variable `k` to be the one we get from running
|
||||
`eval` on it. Map updates are instances of the generalized update; this
|
||||
time, the updater \(g\) is `eval`. The exercise requires the updater to be
|
||||
monotonic, which constrains the user-provided evaluation function to be
|
||||
monotonic too.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 173 181 >}}
|
||||
|
||||
We finally write the `analyze` function as the composition of the two bulk updates:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 226 232 >}}
|
||||
|
||||
### Instantiating with the Sign Lattice
|
||||
Thus far, I've been talking about the sign lattice throughout, but implementing
|
||||
the Agda code in terms of a general lattice `L` and evaluation function `eval`.
|
||||
In order to actually run the Agda code, we do need to provide an `eval` function,
|
||||
which implements the logic we used above, in which a zero-sign variable \(x\)
|
||||
minus one was determined to be negative. For binary operators specifically,
|
||||
I've used the table provided in the textbook; here they are:
|
||||
|
||||
{{< figure src="plusminus.png" caption="Cayley tables for abstract interpretation of plus and minus" >}}
|
||||
|
||||
These are pretty much common sense:
|
||||
* A positive plus a positive is still positive, so \(+\ \hat{+}\ + = +\)
|
||||
* A positive plus any sign could be any sign still, so \(+\ \hat{+}\ \top = \top\)
|
||||
* Any sign plus "impossible" is impossible, so \(\top\ \hat{+} \bot = \bot\).
|
||||
* etc.
|
||||
|
||||
The Agda encoding for the plus function is as follows, and the one for minus
|
||||
is similar.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Sign.agda" 76 94 >}}
|
||||
|
||||
As the comment in the block says, it would be incredibly tedious to verify
|
||||
the monotonicity of these tables, since you would have to consider roughly
|
||||
125 cases _per argument_: for each (fixed) sign \(s\) and two other signs
|
||||
\(s_1 \le s_2\), we'd need to show that \(s\ \hat{+}\ s_1 \le s\ \hat{+}\ s_2\).
|
||||
I therefore commit the _faux pas_ of using `postulate`. Fortunately, the proof
|
||||
of monotonicity is not used for the execution of the program, so we will
|
||||
get away with this, barring any meddling kids.
|
||||
|
||||
From this, all that's left is to show that for any expression `e`, the
|
||||
evaluation function:
|
||||
|
||||
{{< latex >}}
|
||||
\text{eval} : \text{Expr} \to (\text{Variable} \to \text{Sign}) \to \text{Sign}
|
||||
{{< /latex >}}
|
||||
|
||||
is monotonic. It's defined straightforwardly and very much like an evaluator /
|
||||
interpreter, suggesting that "abstract interpretation" is the correct term here.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Sign.agda" 176 184 >}}
|
||||
|
||||
Thought it won't happen, it was easier to just handle the case where there's
|
||||
an undefined variable; I give it "any sign". Otherwise, the function simply
|
||||
consults the sign tables for `+` or `-`, as well as the known signs of the
|
||||
variables. For natural number literals, it assigns `0` the "zero" sign, and
|
||||
any other natural number the "\(+\)".
|
||||
|
||||
To prove monotonicity, we need to consider two variable maps (one less than
|
||||
the other), and show that the abstract interpretation respects that ordering.
|
||||
This boils down to the fact that the `plus` and `minus` tables are monotonic
|
||||
in both arguments (thus, if their sub-expressions are evaluated monotonically
|
||||
given an environment, then so is the whole addition or subtraction), and
|
||||
to the fact that for two maps `m₁ ≼ m₂`, the values at corresponding keys
|
||||
are similarly ordered: `m₁[k] ≼ m₂[k]`. We [saw that above](#less-than-lemma).
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Sign.agda" 186 223 "" "**(Click to expand the proof that the evaluation function for signs is monotonic)**" >}}
|
||||
|
||||
That's all we need. With this, I just instantiate the `Forward` module we have
|
||||
been working with, and make use of the `result`. I also used a `show`
|
||||
function (which I defined) to stringify that output.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Sign.agda" 225 229 >}}
|
||||
|
||||
But wait, `result`? We haven't seen a result just yet. That's the last piece,
|
||||
and it involves finally making use of the fixed-point algorithm.
|
||||
|
||||
### Invoking the Fixed Point Algorithm
|
||||
Our \(\text{Info}\) lattice is of finite height, and the function we have defined
|
||||
is monotonic (by virtue of being constructed only from map updates, which
|
||||
are monotonic by Exercise 4.26, and from function composition, which preserves
|
||||
monotonicity). We can therefore apply the fixed-point-algorithm, and compute
|
||||
the least fixed point:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 235 238 >}}
|
||||
|
||||
With this, `analyze` is the result of our forward analysis!
|
||||
|
||||
In a `Main.agda` file, I invoked this analysis on a sample program:
|
||||
|
||||
```Agda
|
||||
testCode : Stmt
|
||||
testCode =
|
||||
⟨ "zero" ← (# 0) ⟩ then
|
||||
⟨ "pos" ← ((` "zero") Expr.+ (# 1)) ⟩ then
|
||||
⟨ "neg" ← ((` "zero") Expr.- (# 1)) ⟩ then
|
||||
⟨ "unknown" ← ((` "pos") Expr.+ (` "neg")) ⟩
|
||||
|
||||
testProgram : Program
|
||||
testProgram = record
|
||||
{ rootStmt = testCode
|
||||
}
|
||||
|
||||
open WithProg testProgram using (output; analyze-correct)
|
||||
|
||||
main = run {0ℓ} (putStrLn output)
|
||||
```
|
||||
|
||||
The result is verbose, since it shows variable signs for each statement
|
||||
in the program. However, the key is the last basic block, which shows
|
||||
the variables at the end of the program. It reads:
|
||||
|
||||
```
|
||||
{"neg" ↦ -, "pos" ↦ +, "unknown" ↦ ⊤, "zero" ↦ 0, }
|
||||
```
|
||||
|
||||
### Verifying the Analysis
|
||||
We now have a general framework for running forward analyses: you provide
|
||||
an abstract interpretation function for expressions, as well as a proof
|
||||
that this function is monotonic, and you get an Agda function that takes
|
||||
a program and tells you the variable states at every point. If your abstract
|
||||
interpretation function is for determining the signs of expressions, the
|
||||
final result is an analysis that determines all possible signs for all variables,
|
||||
anywhere in the code. It's pretty easy to instantiate this framework with
|
||||
another type of forward analysis --- in fact, by switching the
|
||||
`plus` function to one that uses `AboveBelow ℤ`, rather than `AboveBelow Sign`:
|
||||
|
||||
```Agda
|
||||
plus : ConstLattice → ConstLattice → ConstLattice
|
||||
plus ⊥ᶜ _ = ⊥ᶜ
|
||||
plus _ ⊥ᶜ = ⊥ᶜ
|
||||
plus ⊤ᶜ _ = ⊤ᶜ
|
||||
plus _ ⊤ᶜ = ⊤ᶜ
|
||||
plus [ z₁ ]ᶜ [ z₂ ]ᶜ = [ z₁ Int.+ z₂ ]ᶜ
|
||||
```
|
||||
|
||||
we can defined a constant-propagation analysis.
|
||||
|
||||
```
|
||||
{"neg" ↦ -1, "pos" ↦ 1, "unknown" ↦ 0, "zero" ↦ 0, }
|
||||
```
|
||||
|
||||
However, we haven't proved our analysis correct, and we haven't yet made use of
|
||||
the CFG-semantics equivalence that we
|
||||
[proved in the previous section]({{< relref "07_spa_agda_semantics_and_cfg" >}}).
|
||||
I was hoping to get to it in this post, but there was just too much to
|
||||
cover. So, I will get to that in the next post, where we will make use
|
||||
of the remaining machinery to demonstrate that the output of our analyzer
|
||||
matches reality.
|
||||
BIN
content/blog/08_spa_agda_forward/plusminus.png
Normal file
BIN
content/blog/08_spa_agda_forward/plusminus.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 42 KiB |
547
content/blog/09_spa_agda_verified_forward/index.md
Normal file
547
content/blog/09_spa_agda_verified_forward/index.md
Normal file
@@ -0,0 +1,547 @@
|
||||
---
|
||||
title: "Implementing and Verifying \"Static Program Analysis\" in Agda, Part 9: Verifying the Forward Analysis"
|
||||
series: "Static Program Analysis in Agda"
|
||||
description: "In this post, I prove that the sign analysis from the previous is correct"
|
||||
date: 2024-12-25T19:00:00-08:00
|
||||
tags: ["Agda", "Programming Languages"]
|
||||
left_align_code: true
|
||||
---
|
||||
|
||||
In the previous post, we put together a number of powerful pieces of machinery
|
||||
to construct a sign analysis. However, we still haven't verified that this
|
||||
analysis produces correct results. For the most part, we already have the
|
||||
tools required to demonstrate correctness; the most important one
|
||||
is the [validity of our CFGs]({{< relref "07_spa_agda_semantics_and_cfg" >}})
|
||||
relative to [the semantics of the little language]({{< relref "05_spa_agda_semantics" >}}).
|
||||
|
||||
### High-Level Algorithm
|
||||
We'll keep working with the sign lattice as an example, keeping in mind
|
||||
how what we do generalizes to a any lattice \(L\) describing a variable's
|
||||
state. The general shape of our argument will be as follows, where I've underlined and
|
||||
numbered assumptions or aspects that we have yet to provide.
|
||||
|
||||
1. Our fixed-point analysis from the previous section gave us a result \(r\)
|
||||
that satisfies the following equation:
|
||||
|
||||
{{< latex >}}
|
||||
r = \text{update}(\text{join}(r))
|
||||
{{< /latex >}}
|
||||
|
||||
Above \(\text{join}\) applies the [predecessor-combining function]({{< relref "08_spa_agda_forward#join-preds" >}})
|
||||
from the previous post to each state (corresponding to `joinAll` in Agda)
|
||||
and \(\text{update}\) performs one round of abstract interpretation.
|
||||
|
||||
2. Because of the [correspondence of our semantics and CFGs]({{< relref "07_spa_agda_semantics_and_cfg" >}}),
|
||||
each program evaluation in the form \(\rho, s \Rightarrow \rho'\)
|
||||
corresponds to a path through the Control Flow Graph. Along the path,
|
||||
each node contains simple statements, which correspond to intermediate steps
|
||||
in evaluating the program. These will also be in the form
|
||||
\(\rho_1, b \Rightarrow \rho_2\).
|
||||
|
||||
3. We will proceed iteratively, stepping through the trace one basic block at
|
||||
a time. At each node in the graph:
|
||||
* We will assume that the beginning state (the variables in \(\rho_1\)) are
|
||||
{{< internal "correctly-described" >}}
|
||||
correctly described
|
||||
{{< /internal >}}
|
||||
by one of the predecessors of the current node. Since
|
||||
{{< internal "disjunction" >}}
|
||||
joining represents "or"
|
||||
{{< /internal >}},
|
||||
that is the same
|
||||
as saying that \(\text{join}(r)\)
|
||||
contains an accurate description of \(\rho_1\).
|
||||
|
||||
* Because
|
||||
{{< internal "abstract-interpretation" >}}
|
||||
the abstract interpretation function preserves accurate descriptions
|
||||
{{< /internal >}},
|
||||
if \(\text{join}(r)\) contains an accurate description \(\rho_1\), then applying our
|
||||
abstract interpretation function via \(\text{update}\) should result in
|
||||
a map that contains an accurate-described \(\rho_2\). In other words, \(\text{update}(\text{join}(r))\)
|
||||
describes \(\rho_2\) at the current block.
|
||||
{{< internal "equivalence" >}}
|
||||
By the equation above
|
||||
{{< /internal >}}, that's the same as saying
|
||||
\(r\) describes \(\rho_2\) at the current block.
|
||||
|
||||
* Since the trace is a path through a graph, there must be an edge from
|
||||
the current basic block to the next. This means that the current basic
|
||||
block is a predecessor of the next one. From the previous point, we know
|
||||
that \(\rho_2\) is accurately described by this predecessor, fulfilling
|
||||
our earlier assumption and allowing us to continue iteration.
|
||||
|
||||
So, what are the missing pieces?
|
||||
|
||||
1. We need to define what it means for a lattice (like our sign lattice)
|
||||
to "correctly describe" what happens when evaluating a program for real.
|
||||
For example, the \(+\) in sign analysis describes values that are bigger than zero,
|
||||
and a map like `{x:+}` states that `x` can only take on positive values.
|
||||
2. We've seen before [the \((\sqcup)\) operator models disjunction
|
||||
("A or B")]({{< relref "01_spa_agda_lattices#lub-glub-or-and" >}}), but
|
||||
that was only an informal observation; we'll need to specify it preceisely.
|
||||
3. Each analysis [provides an abstract interpretation `eval` function]({{< relref "08_spa_agda_forward#general-evaluator" >}}).
|
||||
However, until now, nothing has formally constrained this function; we could
|
||||
return \(+\) in every case, even though that would not be accurate. We will
|
||||
need, for each analysis, a proof that its `eval` preserves accurate descriptions.
|
||||
4. The equalities between our lattice elements [are actually equivalences]({{< relref "01_spa_agda_lattices#definitional-equality" >}}),
|
||||
which helps us use simpler representations of data structures. Thus, even
|
||||
in statements of the fixed point algorithm, our final result is a value \(a\)
|
||||
such that \(a \approx f(a)\). We need to prove that our notion of equivalent
|
||||
lattice elements plays nicely with correctness.
|
||||
|
||||
Let's start with the first bullet point.
|
||||
|
||||
### A Formal Definition of Correctness
|
||||
|
||||
When a variable is mapped to a particular sign (like `{ "x": + }`),
|
||||
what that really says is that the value of `x` is greater than zero. Recalling
|
||||
from [the post about our language's semantics]({{< relref "05_spa_agda_semantics#notation-for-environments" >}})
|
||||
that we use the symbol \(\rho\) to represent mappings of variables to
|
||||
their values, we might write this claim as:
|
||||
|
||||
{{< latex >}}
|
||||
\rho(\texttt{x}) > 0
|
||||
{{< /latex >}}
|
||||
|
||||
This is a good start, but it's a little awkward defining the meaning of "plus"
|
||||
by referring to the context in which it's used (the `{ "x": ... }` portion
|
||||
of the expression above). Instead, let's associate with each sign (like \(+\)) a
|
||||
predicate: a function that takes a value, and makes a claim about that value
|
||||
("this is positive"):
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket + \rrbracket\ v = v > 0
|
||||
{{< /latex >}}
|
||||
|
||||
The notation above is a little weird unless you, like me, have a background in
|
||||
Programming Language Theory (❤️). This comes from [denotational semantics](https://en.wikipedia.org/wiki/Denotational_semantics);
|
||||
generally, one writes:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \text{thing} \rrbracket = \text{the meaning of the thing}
|
||||
{{< /latex >}}
|
||||
|
||||
Where \(\llbracket \cdot \rrbracket\) is really a function (we call it
|
||||
the _semantic function_) that maps things to
|
||||
their meaning. Then, the above equation is similar to the more familiar
|
||||
\(f(x) = x+1\): function and arguments on the left, definition on the right. When
|
||||
the "meaning of the thing" is itself a function, we could write it explicitly
|
||||
using lambda-notation:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \text{thing} \rrbracket = \lambda x.\ \text{body of the function}
|
||||
{{< /latex >}}
|
||||
|
||||
Or, we could use the Haskell style and write the new variable on the left of
|
||||
the equality:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \text{thing} \rrbracket\ x = \text{body of the function}
|
||||
{{< /latex >}}
|
||||
|
||||
That is precisely what I'm doing above with \(\llbracket + \rrbracket\).
|
||||
With this in mind, we could define the entire semantic function for the
|
||||
sign lattice as follows:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket + \rrbracket\ v = v\ \texttt{>}\ 0 \\
|
||||
\llbracket 0 \rrbracket\ v = v\ \texttt{=}\ 0 \\
|
||||
\llbracket - \rrbracket\ v = v\ \texttt{<}\ 0 \\
|
||||
\llbracket \top \rrbracket\ v = \text{true} \\
|
||||
\llbracket \bot \rrbracket\ v = \text{false}
|
||||
{{< /latex >}}
|
||||
|
||||
In Agda, the integer type already distinguishes between "negative natural" or
|
||||
"positive natural" cases, which made it possible to define the semantic function
|
||||
{{< sidenote "right" "without-note" "without using inequalities." >}}
|
||||
Reasoning about inequalities is painful, sometimes requiring a number of
|
||||
lemmas to arrive at a result that is intuitively obvious. Coq has a powerful
|
||||
tactic called <a href="https://coq.inria.fr/doc/v8.11/refman/addendum/micromega.html#coq:tacn.lia"><code>lia</code></a>
|
||||
that automatically solves systems of inequalities, and I use it liberally.
|
||||
However, lacking such a tactic in Agda, I would like to avoid inequalities
|
||||
if they are not needed.
|
||||
{{< /sidenote >}}
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Sign.agda" 114 119 >}}
|
||||
|
||||
Notably, \(\llbracket \top \rrbracket\ v\) always holds, and
|
||||
\(\llbracket \bot \rrbracket\ v\) never does. __In general__, we will always
|
||||
need to define a semantic function for whatever lattice we are choosing for
|
||||
our analysis.
|
||||
|
||||
It's important to remember from the previous post that the sign lattice
|
||||
(or, more generally, our lattice \(L\)) is only a component of the
|
||||
[lattice we use to instantiate the analysis]({{< relref "08_spa_agda_forward#whole-lattice" >}}).
|
||||
We at least need to define what it means for the \(\text{Variable} \to \text{Sign}\)
|
||||
portion of that lattice to be correct. This way, we'll have correctness
|
||||
criteria for each key (CFG node) in the top-level \(\text{Info}\) lattice.
|
||||
Since a map from variables to their sign characterizes not a single value \(v\)
|
||||
but a whole environment \(\rho\), something like this is a good start:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \texttt{\{} x_1: s_1, ..., x_n: s_n \texttt{\}} \rrbracket\ \rho = \llbracket s_1 \rrbracket\ \rho(x_1)\ \text{and}\ ...\ \text{and}\ \llbracket s_n \rrbracket\ \rho(x_n)
|
||||
{{< /latex >}}
|
||||
|
||||
As a concrete example, we might get:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \texttt{\{} \texttt{x}: +, \texttt{y}: - \texttt{\}} \rrbracket\ \rho = \rho(\texttt{x})\ \texttt{>}\ 0\ \text{and}\ \rho(\texttt{y})\ \texttt{<}\ 0
|
||||
{{< /latex >}}
|
||||
|
||||
This is pretty good, but not quite right. For instance, the initial state of
|
||||
the program --- before running the analysis --- assigns \(\bot\) to each
|
||||
element. This is true because our fixed-point algorithm [starts with the least
|
||||
element of the lattice]({{< relref "04_spa_agda_fixedpoint#start-least" >}}).
|
||||
But even for a single-variable map `{x: ⊥ }`, the semantic function above would
|
||||
give:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \texttt{\{} \texttt{x}: \bot \texttt{\}} \rrbracket\ \rho = \text{false}
|
||||
{{< /latex >}}
|
||||
|
||||
That's clearly not right: our initial state should be possible, lest
|
||||
the entire proof be just a convoluted [_ex falso_](https://en.wikipedia.org/wiki/Principle_of_explosion)!
|
||||
|
||||
There is another tricky aspect of our analysis, which is primarily defined
|
||||
[using the join (\(\sqcup\)) operator]({{< relref "08_spa_agda_forward#join-preds" >}}).
|
||||
Observe the following example:
|
||||
|
||||
```C
|
||||
// initial state: { x: ⊥ }
|
||||
if b {
|
||||
x = 1; // state: { x: + }
|
||||
} else {
|
||||
// state unchanged: { x: ⊥ }
|
||||
}
|
||||
// state: { x: + } ⊔ { x: ⊥ } = { x: + }
|
||||
```
|
||||
|
||||
Notice that in the final state, the sign of `x` is `+`, even though when
|
||||
`b` is `false`, the variable is never set. In a simple language like ours,
|
||||
without variable declaration points, this is probably the best we could hope
|
||||
for. The crucial observation, though, is that the oddness only comes into
|
||||
play with variables that are not set. In the "initial state" case, none
|
||||
of the variables have been modified; in the `else` case of the conditional,
|
||||
`x` was never assigned to. We can thus relax our condition to an if-then:
|
||||
if a variable is in our environment \(\rho\), then the variable-sign lattice's
|
||||
interpretation accurately describes it.
|
||||
|
||||
{{< latex >}}
|
||||
\begin{array}{ccc}
|
||||
\llbracket \texttt{\{} x_1: s_1, ..., x_n: s_n \texttt{\}} \rrbracket\ \rho & = & & \textbf{if}\ x_1 \in \rho\ \textbf{then}\ \llbracket s_1 \rrbracket\ \rho(x_1)\ \\ & & \text{and} & ... \\ & & \text{and} & \textbf{if}\ x_n \in \rho\ \textbf{then}\ \llbracket s_n \rrbracket\ \rho(x_n)
|
||||
\end{array}
|
||||
{{< /latex >}}
|
||||
|
||||
The first "weird" case now results in the following:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \texttt{\{} \texttt{x}: \bot \texttt{\}} \rrbracket\ \rho = \textbf{if}\ \texttt{x} \in \rho\ \textbf{then}\ \text{false}
|
||||
{{< /latex >}}
|
||||
|
||||
Which is just another way of saying:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \texttt{\{} \texttt{x}: \bot \texttt{\}} \rrbracket\ \rho = \texttt{x} \notin \rho
|
||||
{{< /latex >}}
|
||||
|
||||
In the second case, the interpretation also results in a true statement:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \texttt{\{} \texttt{x}: + \texttt{\}} \rrbracket\ \rho = \textbf{if}\ \texttt{x} \in \rho\ \textbf{then}\ \texttt{x} > 0
|
||||
{{< /latex >}}
|
||||
|
||||
In Agda, I encode the fact that a verified analysis needs a semantic function
|
||||
\(\llbracket\cdot\rrbracket\) for its element lattice \(L\) by taking such
|
||||
a function as an argument called `⟦_⟧ˡ`:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Analysis/Forward.agda" 246 253 "hl_lines=5" >}}
|
||||
|
||||
I then define the semantic function for the variable-sign lattice in the following
|
||||
way, which eschews the "..." notation in favor of a more Agda-compatible (and
|
||||
equivalent) form:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Analysis/Forward.agda" 255 256 >}}
|
||||
|
||||
The above reads roughly as follows:
|
||||
|
||||
> For every variable `k` and sign [or, more generally, lattice element] `l` in
|
||||
> the variable map lattice, if `k` is in the environment `ρ`, then it satisfies
|
||||
> the predicate given by the semantic function applied to `l`.
|
||||
|
||||
Let's recap: we have defined a semantic function for our sign lattice, and
|
||||
noted that to define a verified analysis, we always need such a semantic function.
|
||||
We then showed how to construct a semantic function for a whole variable map
|
||||
(of type \(\text{Variable} \to \text{Sign}\), or \(\text{Variable}\to L\)
|
||||
in general). We also wrote some Agda code doing all this. As a result, we
|
||||
have filled in the missing piece for {{< internalref "correctly-described" >}}property{{< /internalref >}}.
|
||||
|
||||
However, the way that we brought in the semantic function in the Agda code
|
||||
above hints that there's more to be discussed. What's `latticeInterpretationˡ`?
|
||||
In answering that question, we'll provide evidence for
|
||||
{{< internalref "disjunction" >}}property{{< /internalref >}}
|
||||
and
|
||||
{{< internalref "equivalence" >}}property{{< /internalref >}}.
|
||||
|
||||
### Properties of the Semantic Function
|
||||
|
||||
As we briefly saw earlier, we loosened the notion of equality to that equivalences,
|
||||
which made it possible to ignore things like the ordering of key-value pairs
|
||||
in maps. That's great and all, but nothing is stopping us from defining semantic functions that violate our equivalence!
|
||||
Supposing \(a \approx f(a)\), as far
|
||||
as Agda is concerned, even though \(a\) and \(f(a)\) are "equivalent",
|
||||
\(\llbracket a \rrbracket\) and \(\llbracket f(a) \rrbracket\) may be
|
||||
totally different. For a semantic function to be correct, it must produce
|
||||
the same predicate for equivalent elements of lattice \(L\). That's
|
||||
{{< internalref "equivalence" >}}missing piece{{< /internalref >}}.
|
||||
|
||||
Another property of semantic functions that we will need to formalize
|
||||
is that \((\sqcup)\) represents disjunction.
|
||||
This comes into play when we reason about the correctness of predecessors in
|
||||
a Control Flow Graph. Recall that during the last step of processing a given node,
|
||||
when we are trying to move on to the next node in the trace, we have knowledge
|
||||
that the current node's variable map accurately describes the intermediate
|
||||
environment. In other words, \(\llbracket \textit{vs}_i \rrbracket\ \rho_2\) holds, where
|
||||
\(\textit{vs}_i\) is the variable map for the current node. We can generalize this
|
||||
kowledge a little, and get:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \textit{vs}_1 \rrbracket\ \rho_2\ \text{or}\ ...\ \text{or}\ \llbracket \textit{vs}_n \rrbracket\ \rho_2
|
||||
{{< /latex >}}
|
||||
|
||||
However, the assumption that we _need_ to hold when moving on to a new node
|
||||
is in terms of \(\textit{JOIN}\), which combines all the predecessors' maps
|
||||
\(\textit{vs}_1, ..., \textit{vs}_n\) using \((\sqcup)\). Thus, we will need to be in a world where
|
||||
the following claim is true:
|
||||
|
||||
{{< latex >}}
|
||||
\llbracket \textit{vs}_1 \sqcup ... \sqcup \textit{vs}_n \rrbracket\ \rho
|
||||
{{< /latex >}}
|
||||
|
||||
To get from one to the other, we will need to rely explicitly on the fact
|
||||
that \((\sqcup)\) encodes "or". It's not necessary for the forward analysis,
|
||||
but a similar property ought to hold for \((\sqcap)\) and "and". This
|
||||
constraint provides {{< internalref "disjunction" >}}missing piece{{< /internalref >}}.
|
||||
|
||||
I defined a new data type that bundles a semantic function with proofs of
|
||||
the properties in this section; that's precisely what `latticeInterpretationˡ`
|
||||
is:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language/Semantics.agda" 66 73 >}}
|
||||
|
||||
In short, to leverage the framework for verified analysis, you would need to
|
||||
provide a semantic function that interacts properly with `≈` and `∨`.
|
||||
|
||||
### Correctness of the Evaluator
|
||||
|
||||
All that's left is {{< internalref "abstract-interpretation" >}}the last missing piece, {{< /internalref >}},
|
||||
which requires that `eval` matches the semantics of our language. Recall
|
||||
the signature of `eval`:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 166 166 >}}
|
||||
|
||||
It operates on expressions and variable maps, which themselves associate a
|
||||
sign (or, generally, an element of lattice \(L\)), with each variable. The
|
||||
"real" evaluation judgement, on the other hand, is in the form
|
||||
\(\rho, e \Downarrow v\), and reads "expression \(e\) in environment \(\rho\)
|
||||
evaluates to value \(v\)". In Agda:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Language/Semantics.agda" 27 27 >}}
|
||||
|
||||
Let's line up the types of `eval` and the judgement. I'll swap the order of arguments
|
||||
for `eval` to make the correspondence easier to see:
|
||||
|
||||
{{< latex >}}
|
||||
\begin{array}{ccccccc}
|
||||
\text{eval} & : & (\text{Variable} \to \text{Sign}) & \to & \text{Expr} & \to & \text{Sign} \\
|
||||
\cdot,\cdot\Downarrow\cdot & : & (\text{Variable} \to \text{Value}) & \to & \text{Expr} & \to & \text{Value} & \to & \text{Set} \\
|
||||
& & \underbrace{\phantom{(\text{Variable} \to \text{Value})}}_{\text{environment-like inputs}} & & & & \underbrace{\phantom{Value}}_{\text{value-like outputs}}
|
||||
\end{array}
|
||||
{{< /latex >}}
|
||||
|
||||
Squinting a little, it's almost like the signature of `eval` is the signature
|
||||
for the evaluation judgement, but it forgets a few details (the exact values
|
||||
of the variables) in favor of abstractions (their signs). To show that `eval`
|
||||
behaves correctly, we'll want to prove that this forgetful correspondence holds.
|
||||
|
||||
Concretely, for any expression \(e\), take some environment \(\rho\), and "forget"
|
||||
the exact values, getting a sign map \(\textit{vs}\). Now, evaluate the expression
|
||||
to some value \(v\) using the semantics, and also, compute the expression's
|
||||
expected sign \(s\) using `eval`. The sign should be the same as forgetting
|
||||
\(v\)'s exact value. Mathematically,
|
||||
|
||||
{{< latex >}}
|
||||
\forall e, \rho, v, \textit{vs}.\ \textbf{if}\ \llbracket\textit{vs}\rrbracket \rho\ \text{and}\ \rho, e \Downarrow v\ \textbf{then}\ \llbracket \text{eval}\ \textit{vs}\ e\rrbracket v
|
||||
{{< /latex >}}
|
||||
|
||||
In Agda:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 286 287 >}}
|
||||
|
||||
For a concrete analysis, we need to prove the above claim. In the case of
|
||||
sign analysis, this boils down to a rather cumbersome proof by cases. I will collapse
|
||||
the proofs to save some space and avoid overwhelming the reader.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Sign.agda" 237 258 "" "**(Click here to expand the proof of correctness for plus)**" >}}
|
||||
{{< codelines "agda" "agda-spa/Analysis/Sign.agda" 261 282 "" "**(Click here to expand the proof of correctness for minus)**" >}}
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Sign.agda" 284 294 "" >}}
|
||||
|
||||
This completes {{< internalref "abstract-interpretation" >}}our last missing piece,{{< /internalref >}}.
|
||||
All that's left is to put everything together.
|
||||
|
||||
### Proving The Analysis Correct
|
||||
|
||||
#### Lifting Expression Evaluation Correctness to Statements
|
||||
The individual analyses (like the sign analysis) provide only an evaluation
|
||||
function for _expressions_, and thus only have to prove correctness of
|
||||
that function. However, our language is made up of statements, with judgements
|
||||
in the form \(\rho, s \Rightarrow \rho'\). Now that we've shown (or assumed)
|
||||
that `eval` behaves correctly when evaluating expressions, we should show
|
||||
that this correctness extends to evaluating statements, which in the
|
||||
forward analysis implementation is handled by the
|
||||
[`updateVariablesFromStmt` function]({{< relref "08_spa_agda_forward#define-updateVariablesFromStmt" >}}).
|
||||
|
||||
The property we need to show looks very similar to the property for `eval`:
|
||||
|
||||
{{< latex >}}
|
||||
\forall b, \rho, \rho', \textit{vs}.\ \textbf{if}\ \llbracket\textit{vs}\rrbracket \rho\ \text{and}\ \rho, b \Rightarrow \rho'\ \textbf{then}\ \llbracket \text{updateVariablesFromStmt}\ \textit{vs}\ b\rrbracket \rho'
|
||||
{{< /latex >}}
|
||||
|
||||
In Agda:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 291 291 >}}
|
||||
|
||||
The proof is straightforward, and relies on the semantics of the [map update]({{< relref "08_spa_agda_forward#generalized-update" >}}).
|
||||
Specifically, in the case of an assignment statement \(x \leftarrow e\), all we
|
||||
do is store the new sign computed from \(e\) into the map at \(x\). To
|
||||
prove the correctness of the entire final environment \(\rho'\), there are
|
||||
two cases to consider:
|
||||
|
||||
* A variable in question is the newly-updated \(x\). In this case, since
|
||||
`eval` produces correct signs, the variable clearly has the correct sign.
|
||||
This is the first highlighted chunk in the below code.
|
||||
* A variable in question is different from \(x\). In this case, its value
|
||||
in the environment \(\rho'\) should be the same as it was prior, and
|
||||
its sign in the updated variable map is the same as it was in the original.
|
||||
Since the original map correctly described the original environment, we know
|
||||
the sign is correct. This is the second highlighted chunk in the below
|
||||
code.
|
||||
|
||||
The corresponding Agda proof is as follows:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 291 305 "hl_lines=5-7 10-15" >}}
|
||||
|
||||
From this, it follows with relative ease that each basic block in the lattice,
|
||||
when evaluated, produces an environment that matches the prediction of our
|
||||
forward analysis.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 318 318 >}}
|
||||
|
||||
#### Walking the Trace
|
||||
|
||||
Finally, we get to the meat of the proof, which follows the [outline](#high-level-algorithm). First,
|
||||
let's take a look at `stepTrace`, which implements the second bullet in
|
||||
our iterative procedure. I'll show the code, then we can discuss it
|
||||
in detail.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 324 342 >}}
|
||||
|
||||
The first `let`-bound variable, `⟦joinAll-result⟧ρ₁` is kind of an intermediate
|
||||
result, which I was forced to introduced because `rewrite` caused Agda to
|
||||
allocate ~100GB of memory. It simply makes use of the fact that `joinAll`, the
|
||||
function that performs predecessor joining for each node in the CFG, sets
|
||||
every key of the map accordingly.
|
||||
|
||||
The second `let`-bound variable, `⟦analyze-result⟧`, steps through a given
|
||||
node's basic block and leverages our proof of statement-correctness to validate
|
||||
that the final environment `ρ₂` matches the predication of the analyzer.
|
||||
|
||||
The last two `let`-bound variables apply the equation we wrote above:
|
||||
|
||||
{{< latex >}}
|
||||
r = \text{update}(\text{join}(r))
|
||||
{{< /latex >}}
|
||||
|
||||
Recall that `analyze` is the combination of `update` and `join`:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 226 227 >}}
|
||||
|
||||
Finally, the `in` portion of the code uses `⟦⟧ᵛ-respects-≈ᵛ`, a proof
|
||||
of {{< internalref "equivalence" >}}property{{< /internalref >}}, to produce
|
||||
the final claim in terms of the `result` map.
|
||||
|
||||
Knowing how to step, we can finally walk the entire trace, implementing
|
||||
the iterative process:
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 344 357 >}}
|
||||
|
||||
The first step --- assuming that one of the predecessors of the
|
||||
current node satisfies the initial environment `ρ₁` --- is captured by
|
||||
the presence of the argument `⟦joinForKey-s₁⟧ρ₁`. We expect the calling code
|
||||
to provide a proof of that.
|
||||
|
||||
The second step, in both cases, is implemented using `stepTrace`,
|
||||
as we saw above. That results in a proof that at the end of the current basic
|
||||
block, the final environment `ρ₂` is accurately described.
|
||||
|
||||
From there, we move on to the third iterative step, if necessary. The
|
||||
sub-expression `edge⇒incoming s₁→s₂` validates that, since we have an edge
|
||||
from the current node to the next, we are listed as a predecessor. This,
|
||||
in turn, means that we are included in the list of states-to-join for the
|
||||
\(\textit{JOIN}\) function. That fact is stored in `s₁∈incomingStates`.
|
||||
Finally, relying on
|
||||
{{< internalref "disjunction" >}}property{{< /internalref >}},
|
||||
we construct an assumption fit for a recursive invocation of `walkTrace`,
|
||||
and move on to the next CFG node. The `foldr` here is motivated by the fact
|
||||
that "summation" using \((\sqcup)\) is a fold.
|
||||
|
||||
When the function terminates, what we have is a proof that the final program
|
||||
state is accurately described by the results of our program analysis. All
|
||||
that's left is to kick off the walk. To do that, observe that the initial state
|
||||
has no predecessors (how could it, if it's at the beginning of the program?).
|
||||
That, in turn, means that this state maps every variable to the bottom element.
|
||||
Such a variable configuration only permits the empty environment \(\rho = \varnothing\).
|
||||
If the program evaluation starts in an empty environment, we have the assumption
|
||||
needed to kick off the iteration.
|
||||
|
||||
{{< codelines "agda" "agda-spa/Analysis/Forward.agda" 359 366 "hl_lines=7" >}}
|
||||
|
||||
Take a look at the highlighted line in the above code block in particular.
|
||||
It states precisely what we were hoping to see: that, when evaluating
|
||||
a program, the final state when it terminates is accurately described by
|
||||
the `result` of our static program analysis at the `finalState` in the CFG.
|
||||
We have done it!
|
||||
|
||||
### Future Work
|
||||
|
||||
It took a lot of machinery to get where we are, but there's still lots of
|
||||
things to do.
|
||||
|
||||
1. __Correctness beyond the final state__: the statement we've arrived at
|
||||
only shows that the final state of the program matches the results of
|
||||
the analysis. In fact, the property hold for all intermediate states, too.
|
||||
The only snag is that it's more difficult to _state_ such a claim.
|
||||
|
||||
To do something like that, we probably need a notion of "incomplete evaluations"
|
||||
of our language, which run our program but stop at some point before the end.
|
||||
A full execution would be a special case of such an "incomplete evaluation"
|
||||
that stops in the final state. Then, we could restate `analyze-correct`
|
||||
in terms of partial evaluations, which would strengthen it.
|
||||
2. __A more robust language and evaluation process__: we noted above that
|
||||
our join-based analysis is a little bit weird, particularly in the
|
||||
cases of uninitialized variables. There are ways to adjust our language
|
||||
(e.g., introducing variable declaration points) and analysis functions
|
||||
(e.g., only allowing assignment for declared variables) to reduce
|
||||
the weirdness somewhat. They just lead to a more complicated language.
|
||||
3. __A more general correctness condition__: converting lattice elements into
|
||||
predicates on values gets us far. However, some types of analyses make claims
|
||||
about more than the _current_ values of variables. For instance, _live variable
|
||||
analysis_ checks if a variable's current value is going to be used in the
|
||||
future. Such an analysis can help guide register (re)allocation. To
|
||||
talk about future uses of a variable, the predicate will need to be formulated
|
||||
in terms of the entire evaluation proof tree. This opens a whole can
|
||||
of worms that I haven't begun to examine.
|
||||
|
||||
Now that I'm done writing up my code so far, I will start exploring these
|
||||
various avenues of work. In the meantime, though, thanks for reading!
|
||||
@@ -92,7 +92,7 @@ as well as a surrounding "context" that says what file the code is from,
|
||||
which lines are listed, and where to find the full code (e.g., on my Git server).
|
||||
It looks something like this:
|
||||
|
||||
{{< codelines "Agda" "agda-spa/Language.agda" 20 27 >}}
|
||||
{{< codelines "Agda" "agda-spa/Language/Base.agda" 12 20 >}}
|
||||
|
||||
In summary:
|
||||
|
||||
|
||||
@@ -3,11 +3,11 @@ title: "Bergamot: Exploring Programming Language Inference Rules"
|
||||
date: 2023-12-22T18:16:44-08:00
|
||||
tags: ["Project", "Programming Languages"]
|
||||
description: "In this post, I show off Bergamot, a tiny logic programming language and an idea for teaching inference rules."
|
||||
bergamot:
|
||||
render_presets:
|
||||
default: "bergamot/rendering/lc.bergamot"
|
||||
---
|
||||
|
||||
{{< katex_component_js >}}
|
||||
{{< bergamot_js_css >}}
|
||||
|
||||
### Inference Rules and the Study of Programming Languages
|
||||
In this post, I will talk about _inference rules_, particularly in the field
|
||||
of programming language theory. The first question to get out of the way is
|
||||
@@ -261,7 +261,7 @@ It has two modes:
|
||||
`type(empty, ?e, tpair(number, string))` to search for expressions that have
|
||||
the type "a pair of a number and a string".
|
||||
|
||||
{{< bergamot_widget id="widget" query="" prompt="PromptConverter @ prompt(type(empty, ?term, ?t)) <- input(?term);" >}}
|
||||
{{< bergamot_widget id="widget" query="" prompt="type(empty, TERM, ?t)" >}}
|
||||
section "" {
|
||||
TNumber @ type(?Gamma, lit(?n), number) <- num(?n);
|
||||
TString @ type(?Gamma, lit(?s), string) <- str(?s);
|
||||
|
||||
591
content/blog/chapel_runtime_types.md
Normal file
591
content/blog/chapel_runtime_types.md
Normal file
@@ -0,0 +1,591 @@
|
||||
---
|
||||
title: "Chapel's Runtime Types as an Interesting Alternative to Dependent Types"
|
||||
date: 2025-03-02T22:52:01-08:00
|
||||
tags: ["Chapel", "C++", "Idris", "Programming Languages"]
|
||||
description: "In this post, I discuss Chapel's runtime types as a limited alternative to dependent types."
|
||||
---
|
||||
|
||||
One day, when I was in graduate school, the Programming Languages research
|
||||
group was in a pub for a little gathering. Amidst beers, fries, and overpriced
|
||||
sandwiches, the professor and I were talking about [dependent types](https://en.wikipedia.org/wiki/Dependent_type). Speaking
|
||||
loosely and imprecisely, these are types that are somehow constructed from
|
||||
_values_ in a language, like numbers.
|
||||
|
||||
For example, in C++, [`std::array`](https://en.cppreference.com/w/cpp/container/array)
|
||||
is a dependent type. An instantiation of the _type_ `array`, like `array<string, 3>`
|
||||
is constructed from the type of its elements (here, `string`) and a value
|
||||
representing the number of elements (here, `3`). This is in contrast with types
|
||||
like `std::vector`, which only depends on a type (e.g., `vector<string>` would
|
||||
be a dynamically-sized collection of strings).
|
||||
|
||||
I was extolling the virtues of general dependent types, like you might find
|
||||
in [Idris](https://www.idris-lang.org/) or [Agda](https://agda.readthedocs.io/en/latest/getting-started/what-is-agda.html):
|
||||
more precise function signatures! The
|
||||
{{< sidenote "right" "curry-howard-note" "Curry-Howard isomorphism!" >}}
|
||||
The Curry-Howard isomorphism is a common theme on this blog. I've
|
||||
<a href="{{< relref "typesafe_interpreter_revisited#curry-howard-correspondence" >}}">
|
||||
written about it myself</a>, but you can also take a look at the
|
||||
<a href="https://en.wikipedia.org/wiki/Curry%E2%80%93Howard_correspondence">
|
||||
Wikipedia page</a>.
|
||||
{{< /sidenote >}} The professor was skeptical. He had been excited about
|
||||
dependent types in the past, but nowadays he felt over them. They were cool, he
|
||||
said, but there are few practical uses. In fact, he posed a challenge:
|
||||
|
||||
> Give me one good reason to use dependent types in practice that doesn't
|
||||
> involve keeping track of bounds for lists and matrices!
|
||||
{#bounds-quote}
|
||||
|
||||
This challenge alludes to fixed-length lists -- [vectors](https://agda.github.io/agda-stdlib/master/Data.Vec.html)
|
||||
-- which are one of the first dependently-typed data structures one learns about.
|
||||
Matrices are effectively vectors-of-vectors. In fact, even in giving my introductory
|
||||
example above, I demonstrated the C++ equivalent of a fixed-length list, retroactively
|
||||
supporting the professor's point.
|
||||
|
||||
It's not particularly important to write down how I addressed the challenge;
|
||||
suffice it to say that the notion resonated with some of the other
|
||||
students present in the pub. In the midst of practical development, how much
|
||||
of dependent types' power can you leverage, and how much power do you pay
|
||||
for but never use?
|
||||
|
||||
A second round of beers arrived. The argument was left largely unresolved,
|
||||
and conversation flowed to other topics. Eventually, I graduated, and started
|
||||
working on the [Chapel language](https://chapel-lang.org/) team (I also
|
||||
[write on the team's blog](https://chapel-lang.org/blog/authors/daniel-fedorin/)).
|
||||
|
||||
When I started looking at Chapel programs, I could not believe my eyes...
|
||||
|
||||
### A Taste of Chapel's Array Types
|
||||
|
||||
Here's a simple Chapel program that creates an array of 10 integers.
|
||||
|
||||
```Chapel
|
||||
var A: [0..9] int;
|
||||
```
|
||||
|
||||
Do you see the similarity to the `std::array` example above? Of course, the
|
||||
syntax is quite different, but in _essence_ I think the resemblance is
|
||||
uncanny. Let's mangle the type a bit --- producing invalid Chapel programs ---
|
||||
just for the sake of demonstration.
|
||||
|
||||
```Chapel
|
||||
var B: array(0..9, int); // first, strip the syntax sugar
|
||||
var C: array(int, 0..9); // swap the order of the arguments to match C++
|
||||
```
|
||||
|
||||
Only one difference remains: in C++, arrays are always indexed from zero. Thus,
|
||||
writing `array<int, 10>` would implicitly create an array whose indices start
|
||||
with `0` and end in `9`. In Chapel, array indices can start at values other
|
||||
than zero (it happens to be useful for elegantly writing numerical programs),
|
||||
so the type explicitly specifies a lower and a higher bound. Other than that,
|
||||
though, the two types look very similar.
|
||||
|
||||
In general, Chapel arrays have a _domain_, typically stored in variables like `D`.
|
||||
The domain of `A` above is `{0..9}`. This domain is part of the array's type.
|
||||
|
||||
Before I move on, I'd like to pause and state a premise that is crucial
|
||||
for the rest of this post: __I think knowing the size of a data structure,
|
||||
like `std::array` or Chapel's `[0..9] int`, is valuable__. If this premise
|
||||
were not true, there'd be no reason to prefer `std::array` to `std::vector`, or
|
||||
care that Chapel has indexed arrays. However, having this information
|
||||
can help in numerous ways, such as:
|
||||
|
||||
* __Enforcing compatible array shapes.__ For instance, the following Chapel
|
||||
code would require two arrays passed to function `foo` to have the same size.
|
||||
|
||||
```Chapel
|
||||
proc doSomething(people: [?D] person, data: [D] personInfo) {}
|
||||
```
|
||||
|
||||
Similarly, we can enforce the fact that an input to a function has the same shape
|
||||
as the output:
|
||||
|
||||
```Chapel
|
||||
proc transform(input: [?D] int): [D] string;
|
||||
```
|
||||
* __Consistency in generics__. Suppose you have a generic function that declares
|
||||
a new variable of a given type, and just returns it:
|
||||
|
||||
```Chapel
|
||||
proc defaultValue(type argType) {
|
||||
var x: argType;
|
||||
return x;
|
||||
}
|
||||
```
|
||||
|
||||
Code like this exists in "real" Chapel software, by the way --- the example
|
||||
is not contrived. By including the bounds etc. into the array type, we can
|
||||
ensure that `x` is appropriately allocated. Then, `defaultValue([1,2,3].type)`
|
||||
would return an array of three default-initialized integers.
|
||||
* __Eliding boundary checking__. Boundary checking is useful for safety,
|
||||
since it ensures that programs don't read or write past the end of allocated
|
||||
memory. However, bounds checking is also slow. Consider the following function that
|
||||
sums two arrays:
|
||||
|
||||
```Chapel
|
||||
proc sumElementwise(A: [?D] int, B: [D] int) {
|
||||
var C: [D] int;
|
||||
for idx in D do
|
||||
C[idx] = A[idx] + B[idx];
|
||||
}
|
||||
```
|
||||
|
||||
Since arrays `A`, `B`, and `C` have the same domain `D`, we don't need
|
||||
to do bound checking when accessing any of their elements. I don't believe
|
||||
this is currently an optimisation in Chapel, but it's certainly on the
|
||||
table.
|
||||
* __Documentation__. Including the size of the array as part of type
|
||||
signature clarifies the intent of the code being written. For instance,
|
||||
in the following function:
|
||||
|
||||
```Chapel
|
||||
proc sendEmails(numEmails: int, destinationAddrs: [1..numEmails] address) { /* ... */ }
|
||||
```
|
||||
|
||||
It's clear from the type of the `destinationAddrs`s that there ought to
|
||||
be exactly as many `destinationAddrs` as the number of emails that should
|
||||
be sent.
|
||||
|
||||
Okay, recap: C++ has `std::array`, which is a dependently-typed container
|
||||
that represents an array with a fixed number of elements. Chapel has something
|
||||
similar. I think these types are valuable.
|
||||
|
||||
At this point, it sort of looks like I'm impressed with Chapel for copying a C++
|
||||
feature from 2011. Not so! As I played with Chapel programs more and more,
|
||||
arrays miraculously supported patterns that I knew I couldn't write in C++.
|
||||
The underlying foundation of Chapel's array types is quite unlike any other.
|
||||
Before we get to that, though, let's take a look at how dependent types
|
||||
are normally used (by us mere mortal software engineers).
|
||||
|
||||
### Difficulties with Dependent Types
|
||||
|
||||
Let's start by looking at a simple operation on fixed-length lists: reversing them.
|
||||
One might write a reverse function for "regular" lists, ignoring details
|
||||
like ownership, copying, that looks like this:
|
||||
|
||||
```C++
|
||||
std::vector<int> reverse(std::vector<int>);
|
||||
```
|
||||
|
||||
This function is not general: it won't help us reverse lists of
|
||||
strings, for instance. The "easy fix" is to replace `int` with some kind
|
||||
of placeholder that can be replaced with any type.
|
||||
|
||||
```C++
|
||||
std::vector<T> reverse(std::vector<T>);
|
||||
```
|
||||
|
||||
You can try compiling this code, but you will immediately run into an error.
|
||||
What the heck is `T`? Normally,
|
||||
when we name a variable, function, or type (e.g., by writing `vector`, `reverse`),
|
||||
we are referring to its declaration somewhere else. At this time, `T` is not
|
||||
declared anywhere. It just "appears" in our function's type. To fix this,
|
||||
we add a declaration for `T` by turning `reverse` into a template:
|
||||
|
||||
```C++
|
||||
template <typename T>
|
||||
std::vector<T> reverse(std::vector<T>);
|
||||
```
|
||||
|
||||
The new `reverse` above takes two arguments: a type and a list of values of
|
||||
that type. So, to _really_ call this `reverse`, we need to feed the type
|
||||
of our list's elements into it. This is normally done automatically
|
||||
(in C++ and otherwise) but under the hood, invocations might look like this:
|
||||
|
||||
```C++
|
||||
reverse<int>({1,2,3}); // produces 3, 2, 1
|
||||
reverse<string>({"world", "hello"}) // produces "hello", "world"
|
||||
```
|
||||
|
||||
This is basically what we have to do to write `reverse` on `std::array`, which,
|
||||
includes an additional parameter that encodes its length. We might start with
|
||||
the following (using `n` as a placeholder for length, and observing that
|
||||
reversing an array doesn't change its length):
|
||||
|
||||
```C++
|
||||
std::array<T, n> reverse(std::array<T, n>);
|
||||
```
|
||||
|
||||
Once again, to make this compile, we need to add template parameters for `T` and `n`.
|
||||
|
||||
```C++
|
||||
template <typename T, size_t n>
|
||||
std::array<T, n> reverse(std::array<T, n>);
|
||||
```
|
||||
|
||||
Now, you might be asking...
|
||||
|
||||
{{< dialog >}}
|
||||
{{< message "question" "reader" >}}
|
||||
This section is titled "Difficulties with Dependent Types". What's the difficulty?
|
||||
{{< /message >}}
|
||||
{{< /dialog >}}
|
||||
|
||||
Well, here's the kicker. C++ templates are a __compile-time mechanism__. As
|
||||
a result, arguments to `template` (like `T` and `n`) must be known when the
|
||||
program is being compiled. This, in turn, means
|
||||
{{< sidenote "right" "deptype-note" "the following program doesn't work:" >}}
|
||||
The observant reader might have noticed that one of the Chapel programs we
|
||||
saw above, <code>sendEmails</code>, does something similar. The
|
||||
<code>numEmails</code> argument is used in the type of the
|
||||
<code>destinationAddrs</code> parameter. That program is valid Chapel.
|
||||
{{< /sidenote >}}
|
||||
|
||||
```C++
|
||||
void buildArray(size_t len) {
|
||||
std::array<int, len> myArray;
|
||||
// do something with myArray
|
||||
}
|
||||
```
|
||||
|
||||
You can't use these known-length types like `std::array` with any length
|
||||
that is not known at compile-time. But that's a lot of things! If you're reading
|
||||
from an input file, chances are, you don't know how big that file is. If you're
|
||||
writing a web server, you likely don't know the length the HTTP requests.
|
||||
With every setting a user can tweak when running your code, you sacrifice the
|
||||
ability to use templated types.
|
||||
|
||||
Also, how do you _return_ a `std::array`? If the size of the returned array is
|
||||
known in advance, you just list that size:
|
||||
|
||||
```C++
|
||||
std::array<int, 10> createArray();
|
||||
```
|
||||
|
||||
If the size is not known at compile-time, you might want to do something like
|
||||
the following --- using an argument `n` in the type of the returned array ---
|
||||
but it would not compile:
|
||||
|
||||
```C++
|
||||
auto computeNNumbers(size_t n) -> std::array<int, n>; // not valid C++
|
||||
```
|
||||
|
||||
Moreover, you actually can't use `createArray` to figure out the required
|
||||
array size, and _then_ return an array that big, even if in the end you
|
||||
only used compile-time-only computations in the body of `createArray`.
|
||||
What you would need is to provide a "bundle" of a value and a type that is somehow
|
||||
built from that value.
|
||||
|
||||
```C++
|
||||
// magic_pair is invented syntax, will not even remotely work
|
||||
auto createArray() -> magic_pair<size_t size, std::array<int, size>>;
|
||||
```
|
||||
|
||||
This pair contains a `size` (suppose it's known at compilation time for
|
||||
the purposes of appeasing C++) as well as an array that uses that `size`
|
||||
as its template argument. This is not real C++ -- not even close -- but
|
||||
such pairs are a well-known concept. They are known as
|
||||
[dependent pairs](https://unimath.github.io/agda-unimath/foundation.dependent-pair-types.html),
|
||||
or, if you're trying to impress people, \(\Sigma\)-types. In Idris, you
|
||||
could write `createArray` like this:
|
||||
|
||||
```Idris
|
||||
createArray : () -> (n : Nat ** Vec n Int)
|
||||
```
|
||||
|
||||
There are languages out there -- that are not C++, alas -- that support
|
||||
dependent pairs, and as a result make it more convenient to use types that
|
||||
depend on values. Not only that, but a lot of these languages do not force
|
||||
dependent types to be determined at compile-time. You could write that
|
||||
coveted `readArrayFromFile` function:
|
||||
|
||||
```Idris
|
||||
readArrayFromFile : String -> IO (n : Nat ** Vec n String)
|
||||
```
|
||||
|
||||
Don't mind `IO`; in pure languages like Idris, this type is a necessity when
|
||||
interacting when reading data in and sending it out. The key is that
|
||||
`readArrayFromFile` produces, at runtime, a pair of `n`, which is the size
|
||||
of the resulting array, and a `Vec` of that many `String`s (e.g., one string
|
||||
per line of the file).
|
||||
|
||||
Dependent pairs are cool and very general. However, the end result of
|
||||
types with bounds which are not determined at compile-time is that you're
|
||||
_required_ to use dependent pairs. Thus, you must always carry the array's length
|
||||
together with the array itself.
|
||||
|
||||
The bottom line is this:
|
||||
|
||||
* In true dependently typed languages, a type that depends on a value (like `Vec`
|
||||
in Idris) lists that value in its type. When this value is listed by
|
||||
referring to an identifier --- like `n` in `Vec n String` above --- this
|
||||
identifier has to be defined somewhere, too. This necessitates dependent pairs,
|
||||
in which the first element is used syntactically as the "definition point"
|
||||
of a type-level value. For example, in the following piece of code:
|
||||
|
||||
```Idris
|
||||
(n : Nat ** Vec n String)
|
||||
```
|
||||
|
||||
The `n : Nat` part of the pair serves both to say that the first element
|
||||
is a natural number, and to introduce a variable `n` that refers to
|
||||
this number so that the second type (`Vec n String`) can refer to it.
|
||||
|
||||
A lot of the time, you end up carrying this extra value (bound to `n` above)
|
||||
with your type.
|
||||
* In more mainstream languages, things are even more restricted: dependently
|
||||
typed values are a compile-time property, and thus, cannot be used with
|
||||
runtime values like data read from a file, arguments passed in to a function,
|
||||
etc..
|
||||
|
||||
### Hiding Runtime Values from the Type
|
||||
|
||||
Let's try to think of ways to make things more convenient. First of all, as
|
||||
we saw, in Idris, it's possible to use runtime values in types. Not only that,
|
||||
but Idris is a compiled language, so presumably we can compile dependently typed programs
|
||||
with runtime-enabled dependent types. The trick is to forget some information:
|
||||
turn a vector `Vec n String` into two values (the size of the vector and the
|
||||
vector itself), and forget -- for the purposes of generating code -- that they're
|
||||
related. Whenever you pass in a `Vec n String`, you can compile that similarly
|
||||
to how you'd compile passing in a `Nat` and `List String`. Since the program has
|
||||
already been type checked, you can be assured that you don't encounter cases
|
||||
when the size and the actual vector are mismatched, or anything else of that
|
||||
nature.
|
||||
|
||||
Additionally, you don't always need the length of the vector at all. In a
|
||||
good chunk of Idris code, the size arguments are only used to ensure type
|
||||
correctness and rule out impossible cases; they are never accessed at runtime.
|
||||
As a result, you can _erase_ the size of the vector altogether. In fact,
|
||||
[Idris 2](https://github.com/idris-lang/Idris2/) leans on [Quantitative Type Theory](https://bentnib.org/quantitative-type-theory.html)
|
||||
to make erasure easier.
|
||||
|
||||
At this point, one way or another, we've "entangled" the vector with a value
|
||||
representing its size:
|
||||
|
||||
* When a vector of some (unknown, but fixed) length needs to be produced from
|
||||
a function, we use dependent pairs.
|
||||
* Even in other cases, when compiling, we end up treating a vector as a
|
||||
length value and the vector itself.
|
||||
|
||||
Generally speaking, a good language design practice is to hide extraneous
|
||||
complexity, and to remove as much boilerplate as necessary. If the size
|
||||
value of a vector is always joined at the hip with the vector, can we
|
||||
avoid having to explicitly write it?
|
||||
|
||||
This is pretty much exactly what Chapel does. It _allows_ explicitly writing
|
||||
the domain of an array as part of its type, but doesn't _require_ it. When
|
||||
you do write it (re-using my original snippet above):
|
||||
|
||||
```Chapel
|
||||
var A: [0..9] int;
|
||||
```
|
||||
|
||||
What you are really doing is creating a value (the [range](https://chapel-lang.org/docs/primers/ranges.html) `0..9`),
|
||||
and entangling it with the type of `A`. This is very similar to what a language
|
||||
like Idris would do under the hood to compile a `Vec`, though it's not quite
|
||||
the same.
|
||||
|
||||
At the same time, you can write code that omits the bounds altogether:
|
||||
|
||||
```Chapel
|
||||
proc processArray(A: [] int): int;
|
||||
proc createArray(): [] int;
|
||||
```
|
||||
|
||||
In all of these examples, there is an implicit runtime value (the bounds)
|
||||
that is associated with the array's type. However, we are never forced to
|
||||
explicitly thread through or include a size. Where reasoning about them is not
|
||||
necessary, Chapel's domains are hidden away. Chapel refers to the implicitly
|
||||
present value associated with an array type as its _runtime type_.
|
||||
|
||||
I hinted earlier that things are not quite the same in this representation
|
||||
as they are in my simplified model of Idris. In Idris, as I mentioned earlier,
|
||||
the values corresponding to vectors' indices can be erased if they are not used.
|
||||
In Chapel, this is not the case --- a domain always exists at runtime. At the
|
||||
surface level, this means that you may pay for more than what you use. However,
|
||||
domains enable a number of interesting patterns of array code. We'll get
|
||||
to that in a moment; first, I want to address a question that may be on
|
||||
your mind:
|
||||
|
||||
{{< dialog >}}
|
||||
{{< message "question" "reader" >}}
|
||||
At this point, this looks just like keeping a <code>.length</code> field as
|
||||
part of the array value. Most languages do this. What's the difference
|
||||
between this and Chapel's approach?
|
||||
{{< /message >}}
|
||||
{{< /dialog >}}
|
||||
|
||||
This is a fair question. The key difference is that the length exists even if an array
|
||||
does not. The following is valid Chapel code (re-using the `defaultValue`
|
||||
snippet above):
|
||||
|
||||
```Chapel
|
||||
proc defaultValue(type argType) {
|
||||
var x: argType;
|
||||
return x;
|
||||
}
|
||||
|
||||
proc doSomething() {
|
||||
type MyArray = [1..10] int;
|
||||
var A = defaultValue(MyArray);
|
||||
}
|
||||
```
|
||||
|
||||
Here, we created an array `A` with the right size (10 integer elements)
|
||||
without having another existing array as a reference. This might seem like
|
||||
a contrived example (I could've just as well written `var A: [1..10] int`),
|
||||
but the distinction is incredibly helpful for generic programming. Here's
|
||||
a piece of code from the Chapel standard library, which implements
|
||||
a part of Chapel's [reduction](https://chapel-lang.org/docs/primers/reductions.html) support:
|
||||
|
||||
{{< githubsnippet "chapel-lang/chapel" "e8ff8ee9a67950408cc6d4c3220ac647817ddae3" "modules/internal/ChapelReduce.chpl" "Chapel" 146 >}}
|
||||
inline proc identity {
|
||||
var x: chpl__sumType(eltType); return x;
|
||||
}
|
||||
{{< /githubsnippet >}}
|
||||
|
||||
Identity elements are important when performing operations like sums and products,
|
||||
for many reasons. For one, they tell you what the sum (e.g.) should be when there
|
||||
are no elements at all. For another, they can be used as an initial value for
|
||||
an accumulator. In Chapel, when you are performing a reduction, there is a
|
||||
good chance you will need several accumulators --- one for each thread performing
|
||||
a part of the reduction.
|
||||
|
||||
That `identity` function looks almost like `defaultValue`! Since it builds the
|
||||
identity element from the type, and since the type includes the array's dimensions,
|
||||
summing an array-of-arrays, even if it's empty, will produce the correct output.
|
||||
|
||||
```Chapel
|
||||
type Coordinate = [1..3] real;
|
||||
|
||||
var Empty: [0..<0] Coordinate;
|
||||
writeln(+ reduce Empty); // sum up an empty list of coordinates
|
||||
```
|
||||
|
||||
As I mentioned before, having the domain be part of the type can also enable
|
||||
indexing optimizations --- without any need for [interprocedural analysis](https://en.wikipedia.org/wiki/Interprocedural_optimization) ---
|
||||
in functions like `sumElementwise`:
|
||||
|
||||
```Chapel
|
||||
proc sumElementwise(A: [?D] int, B: [D] int) {
|
||||
var C: [D] int;
|
||||
for idx in D do
|
||||
C[idx] = A[idx] + B[idx];
|
||||
}
|
||||
```
|
||||
|
||||
The C++ equivalent of this function -- using `vectors` to enable arbitrary-size
|
||||
lists of numbers read from user input, and `.at` to enable bounds checks ---
|
||||
does not include enough information for this optimization to be possible.
|
||||
|
||||
```C++
|
||||
void sumElementwise(std::vector<int> A, std::vector<int> B) {
|
||||
std::vector<int> C(A.size());
|
||||
|
||||
for (size_t i = 0; i < A.size(); i++) {
|
||||
C.at(i) = A.at(i) + B.at(i);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
All in all, this makes for a very interesting mix of features:
|
||||
|
||||
* __Chapel arrays have their bounds as part of types__, like `std::array` in C++
|
||||
and `Vec` in Idris. This enables all the benefits I've described above.
|
||||
* __The bounds don't have to be known at compile-time__, like all dependent
|
||||
types in Idris. This means you can read arrays from files (e.g.) and still
|
||||
reason about their bounds as part of the type system.
|
||||
* __Domain information can be hidden when it's not used__, and does not require
|
||||
explicit additional work like template parameters or dependent pairs.
|
||||
|
||||
Most curiously, runtime types only extend to arrays and domains. In that sense,
|
||||
they are not a general purpose replacement for dependent types. Rather,
|
||||
they make arrays and domains special, and single out the exact case my
|
||||
professor was [talking about in the introduction](#bounds-quote). Although
|
||||
at times I've [twisted Chapel's type system in unconventional ways](https://chapel-lang.org/blog/posts/linear-multistep/)
|
||||
to simulate dependent types, rarely have I felt a need for them while
|
||||
programming in Chapel. In that sense --- and in the "practical software engineering"
|
||||
domain --- I may have been proven wrong.
|
||||
|
||||
### Pitfalls of Runtime Types
|
||||
|
||||
Should all languages do things the way Chapel does? I don't think so. Like
|
||||
most features, runtime types like that in Chapel are a language design
|
||||
tradeoff. Though I've covered their motivation and semantics, perhaps
|
||||
I should mention the downsides.
|
||||
|
||||
The greatest downside is that, generally speaking, _types are not always a
|
||||
compile-time property_. We saw this earlier with `MyArray`:
|
||||
|
||||
```Chapel
|
||||
type MyArray = [1..10] int;
|
||||
```
|
||||
|
||||
Here, the domain of `MyArray` (one-dimensional with bounds `1..10`) is a runtime
|
||||
value. It has an
|
||||
{{< sidenote "right" "dce-note" "execution-time cost." >}}
|
||||
The execution-time cost is, of course, modulo <a href="https://en.wikipedia.org/wiki/Dead-code_elimination">dead code elimination</a> etc.. If
|
||||
my snippet made up the entire program being compiled, the end result would
|
||||
likely do nothing, since <code>MyArray</code> isn't used anywhere.
|
||||
{{< /sidenote >}}
|
||||
Moreover, types that serve as arguments to functions (like `argType` for
|
||||
`defaultValue`), or as their return values (like the result of `chpl__sumType`)
|
||||
also have an execution-time backing. This is quite different from most
|
||||
compiled languages. For instance, in C++, templates are "stamped out" when
|
||||
the program is compiled. A function with a `typename T` template parameter
|
||||
called with type `int`, in terms of generated code, is always the same as
|
||||
a function where you search-and-replaced `T` with `int`. This is called
|
||||
[monomorphization](https://en.wikipedia.org/wiki/Monomorphization), by the
|
||||
way. In Chapel, however, if the function is instantiated with an array type,
|
||||
it will have an additional parameter, which represents the runtime component
|
||||
of the array's type.
|
||||
|
||||
The fact that types are runtime entities means that compile-time type checking
|
||||
is insufficient. Take, for instance, the above `sendEmails` function:
|
||||
|
||||
```Chapel
|
||||
proc sendEmails(numEmails: int, destinationAddrs: [1..numEmails] address) { /* ... */ }
|
||||
```
|
||||
|
||||
Since `numEmails` is a runtime value (it's a regular argument!), we can't ensure
|
||||
at compile-time that a value of some array matches the `[1..numEmails] address`
|
||||
type. As a result, Chapel defers bounds checking to when the `sendEmails`
|
||||
function is invoked.
|
||||
|
||||
This leads to some interesting performance considerations. Take two Chapel records
|
||||
(similar to `struct`s in C++) that simply wrap a value. In one of them,
|
||||
we provide an explicit type for the field, and in the other, we leave the field
|
||||
type generic.
|
||||
|
||||
```Chapel
|
||||
record R1 { var field: [1..10] int; }
|
||||
record R2 { var field; }
|
||||
|
||||
var A = [1,2,3,4,5,6,7,8,9,10];
|
||||
var r1 = new R1(A);
|
||||
var r2 = new R2(A);
|
||||
```
|
||||
|
||||
In a conversation with a coworker, I learned that these are not the same.
|
||||
That's because the record `R1` explicitly specifies a type
|
||||
for `field`. Since the type has a runtime component, the constructor
|
||||
of `R1` will actually perform a runtime check to ensure that the argument
|
||||
has 10 elements. `R2` will not do this, since there isn't any other type
|
||||
to check against.
|
||||
|
||||
Of course, the mere existence of an additional runtime component is a performance
|
||||
consideration. To ensure that Chapel programs perform as well as possible,
|
||||
the Chapel standard library attempts to avoid using runtime components
|
||||
wherever possible. This leads to a distinction between a "static type"
|
||||
(known at compile-time) and a "dynamic type" (requiring a runtime value).
|
||||
The `chpl__sumType` function we saw mentioned above uses static components of
|
||||
types, because we don't want each call to `+ reduce` to attempt to run a number
|
||||
of extraneous runtime queries.
|
||||
|
||||
### Conclusion
|
||||
|
||||
Though runtime types are not a silver bullet, I find them to be an elegant
|
||||
middle-ground solution to the problem of tracking array bounds. They enable
|
||||
optimizations, generic programming, and more, without the complexity of
|
||||
a fully dependently-typed language. They are also quite unlike anything I've
|
||||
seen in any other language.
|
||||
|
||||
What's more, this post only scratches the surface of what's possible using
|
||||
arrays and domains. Besides encoding array bounds, domains include information
|
||||
about how an array is distributed across several nodes (see the
|
||||
[distributions primer](https://chapel-lang.org/docs/primers/distributions.html)),
|
||||
and how it's stored in memory (see the [sparse computations](https://chapel-lang.org/blog/posts/announcing-chapel-2.3/#sparse-computations)
|
||||
section of the recent 2.3 release announcement). In general, they are a very
|
||||
flavorful component to Chapel's "special sauce" as a language for parallel
|
||||
computing.
|
||||
|
||||
You can read more about arrays and domains in the [corresponding primer](https://chapel-lang.org/docs/primers/arrays.html).
|
||||
BIN
content/blog/i_love_programming_languages/cardeli-products.png
Normal file
BIN
content/blog/i_love_programming_languages/cardeli-products.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 81 KiB |
415
content/blog/i_love_programming_languages/index.md
Normal file
415
content/blog/i_love_programming_languages/index.md
Normal file
@@ -0,0 +1,415 @@
|
||||
---
|
||||
title: "Reasons to Love the Field of Programming Languages"
|
||||
date: 2025-12-31
|
||||
tags: ["Programming Languages", "Compilers", "Type Systems"]
|
||||
---
|
||||
|
||||
I work at HPE on the
|
||||
[Chapel Programming Language](https://chapel-lang.org). Recently, another HPE
|
||||
person asked me:
|
||||
|
||||
> So, you work on the programming language. What's next for you?
|
||||
|
||||
This caught me off-guard because I hadn't even conceived of moving on.
|
||||
I don't want to move on, because __I love the field of programming languages__.
|
||||
In addition, I have come to think there is something in PL for everyone, from
|
||||
theorists to developers to laypeople.
|
||||
So, in that spirit, I am writing this list as a non-exhaustive survey that holds
|
||||
the dual purpose of explaining my personal infatuation with PL, and providing
|
||||
others with ways to engage with PL that align with their existing interests.
|
||||
I try to provide rationale for each claim, but you can just read the reasons
|
||||
themselves and skip the rest.
|
||||
|
||||
My general thesis goes something like this: programming languages are a unique
|
||||
mix of the __inherently human and social__ and the __deeply mathematical__,
|
||||
a mix that often remains deeply grounded in the practical, __low-level realities of
|
||||
our hardware__.
|
||||
|
||||
Personally, I find all of these properties equally important, but we have to
|
||||
start somewhere. Let's begin with the human aspect of programming languages.
|
||||
|
||||
### Human Aspects of PL
|
||||
|
||||
> Programs must be written for people to read, and only incidentally for machines
|
||||
> to execute.
|
||||
>
|
||||
> --- Abelson & Sussman, _Structure and Interpretation of Computer Programs_.
|
||||
|
||||
As we learn more about the other creatures that inhabit our world, we discover
|
||||
that they are similar to us in ways that we didn't expect. However, our
|
||||
language is unique to us. It gives us the ability to go far beyond
|
||||
the simple sharing of information: we communicate abstract concepts,
|
||||
social dynamics, stories. In my view, storytelling is our birthright more
|
||||
so than anything else.
|
||||
|
||||
I think this has always been reflected in the broader discipline of programming.
|
||||
_Code should always tell a story_, I've heard throughout my education and career.
|
||||
_It should explain itself_. In paradigms such as
|
||||
[literate programming](https://en.wikipedia.org/wiki/Literate_programming),
|
||||
we explicitly mix prose and code. Notebook technologies
|
||||
like [Jupyter](https://jupyter.org/) intersperse computation with explanations
|
||||
thereof.
|
||||
|
||||
* __Reason 1__: programming languages provide the foundation of expressing
|
||||
human thought and stories through code.
|
||||
|
||||
From flowery prose to clinical report, human expression takes a wide variety
|
||||
of forms. The need to vary our descriptions is also well-served by the diversity
|
||||
of PL paradigms. From stateful transformations in languages like Python and C++,
|
||||
through pure and immutable functions in Haskell and Lean, to fully declarative
|
||||
statements-of-fact in Nix, various languages have evolved to
|
||||
support the many ways in which we wish to describe our world and our needs.
|
||||
|
||||
* __Reason 2__: diverse programming languages enable different perspectives
|
||||
and ways of storytelling, allowing us choice in how to express our thoughts
|
||||
and solve our problems.
|
||||
|
||||
Those human thoughts of ours are not fundamentally grounded in logic,
|
||||
mathematics, or anything else. They are a product of millennia of evolution
|
||||
through natural selection, of adaptation to ever-changing conditions.
|
||||
Our cognition is limited, rife with blind spots, and partial to the subject
|
||||
matter at hand. We lean on objects, actors, contracts, and more as helpful,
|
||||
mammal-compatible analogies. I find this to be beautiful; here is something
|
||||
we can really call ours.
|
||||
|
||||
* __Reason 3__: programming languages imbue the universe's fundamental rules of
|
||||
computation with humanity's identity and idiosyncrasies. They carve out
|
||||
a home for us within impersonal reality.
|
||||
|
||||
Storytelling (and, more generally, writing) is not just about communicating
|
||||
with others. Writing helps clarify one's own thoughts, and to think deeper.
|
||||
In his 1979 Turing Award lecture,
|
||||
[Notation as a Tool of Thought](https://www.eecg.utoronto.ca/~jzhu/csc326/readings/iverson.pdf),
|
||||
Kenneth Iverson, the creator of [APL](https://tryapl.org/), highlighted ways
|
||||
in which programming languages, with their notation, can help express patterns
|
||||
and facilitate thinking.
|
||||
|
||||
Throughout computing history, programming languages built abstractions that ---
|
||||
together with advances in hardware --- made it possible to create ever more
|
||||
complex software. Dijkstra's
|
||||
[structured programming](https://en.wikipedia.org/wiki/Structured_programming)
|
||||
crystallized the familiar patterns of `if`/`else` and `while` out of
|
||||
a sea of control flow. Structures and objects partitioned data and state
|
||||
into bundles that could be reasoned about, or put out of mind when irrelevant.
|
||||
Recently, I dare say that notions of ownership and lifetimes popularized
|
||||
by Rust have clarified how we think about memory.
|
||||
|
||||
* __Reason 4__: programming languages combat complexity, and give us tools to
|
||||
think and reason about unwieldy and difficult problems.
|
||||
|
||||
The fight against complexity occurs on more battlegrounds than PL design alone.
|
||||
Besides its syntax and semantics, a programming language is comprised of its
|
||||
surrounding tooling: its interpreter or compiler, perhaps its package manager
|
||||
or even its editor. Language designers and developers take great care to
|
||||
[improve the quality of error messages](https://elm-lang.org/news/compiler-errors-for-humans),
|
||||
to provide [convenient editor tooling](https://chapel-lang.org/blog/posts/chapel-lsp/),
|
||||
and build powerful package managers
|
||||
like [Yarn](https://yarnpkg.com/). Thus, in each language project, there is
|
||||
room for folks who, even if they are not particularly interested in grammars or
|
||||
semantics, care about the user experience.
|
||||
|
||||
* __Reason 5__: programming languages provide numerous opportunities for
|
||||
thoughtful forays into the realms of User Experience and Human-Computer
|
||||
Interaction.
|
||||
|
||||
I hope you agree, by this point, that programming languages are fundamentally
|
||||
tethered to the human. Like any human endeavor, then, they don't exist in
|
||||
isolation. To speak a language, one usually wants a partner who understands
|
||||
and speaks that same language. Likely, one wants a whole community, topics
|
||||
to talk about, or even a set of shared beliefs or mythologies. This desire
|
||||
maps onto the realm of programming languages. When using a particular PL,
|
||||
you want to talk to others about your code, implement established design patterns,
|
||||
use existing libraries.
|
||||
|
||||
I mentioned mythologies earlier. In some ways, language
|
||||
communities do more than share know-how about writing code. In many
|
||||
cases, I think language communities rally around ideals embodied by their
|
||||
language. The most obvious example seems to be Rust. From what I've seen,
|
||||
the Rust community believes in language design that protects its users
|
||||
from the pitfalls of low-level programming. The Go community
|
||||
believes in radical simplicity. Julia actively incorporates contributions from
|
||||
diverse research projects into an interoperable set of scientific packages.
|
||||
|
||||
* __Reason 6__: programming languages are complex collaborative social projects
|
||||
that have the power to champion innovative ideas within the field of
|
||||
computer science.
|
||||
|
||||
So far, I've presented interpretations of the field of PL as tools for expression and thought,
|
||||
human harbor to the universe's ocean, and collaborative social projects.
|
||||
These interpretations coexist and superimpose, but they are only a fraction of
|
||||
the whole. What has kept me enamored with PL is that it blends these human
|
||||
aspects with a mathematical ground truth, through fundamental connections to
|
||||
computation and mathematics.
|
||||
|
||||
### The Mathematics of PL
|
||||
|
||||
> Like buses: you wait two thousand years for a definition of “effectively
|
||||
> calculable”, and then three come along at once.
|
||||
>
|
||||
> --- Philip Wadler, _Propositions as Types_
|
||||
|
||||
There are two foundations,
|
||||
[lambda calculus](https://en.wikipedia.org/wiki/Lambda_calculus) and
|
||||
[Turing machines](https://en.wikipedia.org/wiki/Turing_machine), that underpin
|
||||
most modern PLs. The abstract notion of Turing machines
|
||||
is closely related to, and most similar among the "famous" computational models,
|
||||
to the
|
||||
[von Neumann Architecture](https://en.wikipedia.org/wiki/Von_Neumann_architecture).
|
||||
Through bottom-up organization of "control unit instructions" into
|
||||
"structured programs" into the imperative high-level languages today, we can
|
||||
trace the influence of Turing machines in C++, Python, Java, and many others.
|
||||
At the same time, and running on the same hardware functional programming
|
||||
languages like Haskell represent a chain of succession from the lambda calculus,
|
||||
embellished today with types and numerous other niceties. These two lineages
|
||||
are inseparably linked: they have been mathematically proven to be equivalent.
|
||||
They are two worlds coexisting.
|
||||
|
||||
The two foundations have a crucial property in common: they are descriptions
|
||||
of what can be computed. Both were developed initially as mathematical formalisms.
|
||||
They are rooted not only in pragmatic concerns of "what can I do with
|
||||
these transistors?", but in the deeper questions of "what can be done
|
||||
with a computer?".
|
||||
|
||||
* __Reason 7__: general-purpose programming languages are built on foundations of computation,
|
||||
and wield the power to compute anything we consider "effectively computable at all".
|
||||
|
||||
Because of these mathematical beginnings, we have long had precise and powerful
|
||||
ways to talk about what code written in a particular language _means_.
|
||||
This is the domain of _semantics_. Instead of reference implementations
|
||||
of languages (CPython for Python, `rustc` for Rust), and instead of textual
|
||||
specifications, we can explicitly map constructs in languages either to
|
||||
mathematical objects ([denotational semantics](https://en.wikipedia.org/wiki/Denotational_semantics))
|
||||
or to (abstractly) execute them ([operational semantics](https://en.wikipedia.org/wiki/Operational_semantics)).
|
||||
|
||||
To be honest, the precise and mathematical nature of these tools is, for me,
|
||||
justification enough to love them. However, precise semantics for languages
|
||||
have real advantages. For one, they allow us to compare programs' real
|
||||
behavior with what we _expect_, giving us a "ground truth" when trying to
|
||||
fix bugs or evolve the language. For another, they allow us to confidently
|
||||
make optimizations: if you can _prove_ that a transformation won't affect
|
||||
a program's behavior, but make it faster, you can safely use it. Finally,
|
||||
the discipline of formalizing programming language semantics usually entails
|
||||
boiling them down to their most essential components. Stripping the
|
||||
[syntax sugar](https://en.wikipedia.org/wiki/Syntactic_sugar) helps clarify
|
||||
how complex combinations of features should behave together.
|
||||
|
||||
Some of these techniques bear a noticeable resemblance to the study of
|
||||
semantics in linguistics. Given our preceding discussion on the humanity
|
||||
of programming languages, perhaps that's not too surprising.
|
||||
|
||||
* __Reason 8__: programming languages can be precisely formalized, giving
|
||||
exact, mathematical descriptions of how they should work.
|
||||
|
||||
In talking about how programs behave, we run into an important limitation
|
||||
of reasoning about Turing machines and lambda calculus, stated precisely in
|
||||
[Rice's theorem](https://en.wikipedia.org/wiki/Rice%27s_theorem):
|
||||
all non-trivial semantic properties of programs (termination, throwing errors)
|
||||
are undecidable. There will always be programs that elude not only human analysis,
|
||||
but algorithmic understanding.
|
||||
|
||||
It is in the context of this constraint that I like to think about type systems.
|
||||
The beauty of type systems, to me, is in how they tame the impossible.
|
||||
Depending on the design of a type system, a well-typed program may well be
|
||||
guaranteed not to produce any errors, or produce only the "expected" sort of
|
||||
errors. By constructing reasonable _approximations_ of program
|
||||
behavior, type systems allow us to verify that programs are well-behaved in
|
||||
spite of Rice's theorem. Much of the time, too, we can do so in a way that is
|
||||
straightforward for humans to understand and machines to execute.
|
||||
|
||||
* __Reason 9__: in the face of the fundamentally impossible, type systems
|
||||
pragmatically grant us confidence in our programs for surprisingly little
|
||||
conceptual cost.
|
||||
|
||||
At first, type systems look like engineering formalisms. That
|
||||
may well be the original intention, but in our invention of type systems,
|
||||
we have actually completed a quadrant of a deeper connection: the
|
||||
[Curry-Howard isomorphism](https://en.wikipedia.org/wiki/Curry%E2%80%93Howard_correspondence).
|
||||
[Propositions](https://en.wikipedia.org/wiki/Proposition), in the logical sense,
|
||||
correspond one-to-one with types of programs, and proofs of these propositions
|
||||
correspond to programs that have the matching type.
|
||||
|
||||
This is an incredibly deep connection. In adding parametric polymorphism
|
||||
to a type system (think Java generics, or C++ templates without specialization),
|
||||
we augment the corresponding logic with the "for all x" (\(\forall x\)) quantifier.
|
||||
Restrict the copying of values in a way similar to Rust, and you get an
|
||||
[affine logic](https://en.wikipedia.org/wiki/Affine_logic), capable of reasoning about resources and their use.
|
||||
In languages like Agda with [dependent types](https://en.wikipedia.org/wiki/Dependent_type),
|
||||
you get a system powerful enough [to serve as a foundation for mathematics](https://en.wikipedia.org/wiki/Intuitionistic_type_theory).
|
||||
Suddenly, you can write code and mathematically prove properties about that
|
||||
code in the same language. I've done this in my work with
|
||||
[formally-verified static program analysis]({{< relref "series/static-program-analysis-in-agda" >}}).
|
||||
|
||||
This connection proves appealing even from the perspective of "regular"
|
||||
mathematics. We have developed established engineering practices
|
||||
for writing code: review, deployment, documentation. What if we could use
|
||||
the same techniques for doing mathematics? What if, through the deep
|
||||
connection of programming languages to logic, we could turn mathematics
|
||||
into a computer-verified, collaborative endeavor?
|
||||
I therefore present:
|
||||
|
||||
* __Reason 10__: type systems for programming languages deeply correspond
|
||||
to logic, allowing us to mathematically prove properties about code,
|
||||
using code, and to advance mathematics through the practices of software engineering.
|
||||
|
||||
{{< details summary="Bonus meta-reason to love the mathy side of PL!" >}}
|
||||
In addition to the theoretical depth, I also find great enjoyment in the way that PL is practiced.
|
||||
Here more than elsewhere, creativity and artfulness come into
|
||||
play. In PL, [inference rules](https://en.wikipedia.org/wiki/Rule_of_inference) are a
|
||||
lingua franca through which the formalisms I've mentioned above are expressed
|
||||
and shared. They are such a central tool in the field that I've
|
||||
developed [a system for exploring them interactively]({{< relref "blog/bergamot" >}})
|
||||
on this blog.
|
||||
|
||||
In me personally, inference rules spark joy. They are a concise and elegant
|
||||
way to do much of the formal heavy-lifting I described in this section;
|
||||
we use them for operational semantics, type systems, and sometimes more.
|
||||
When navigating the variety and complexity of the many languages and type
|
||||
systems out there, we can count on inference rules to take us directly to
|
||||
what we need to know. This same variety naturally demands flexibility in
|
||||
how rules are constructed, and what notation is used. Though this can sometimes
|
||||
be troublesome (one [paper](https://labs.oracle.com/pls/apex/f?p=LABS%3A0%3A%3AAPPLICATION_PROCESS%3DGETDOC_INLINE%3A%3A%3ADOC_ID%3A959")
|
||||
I've seen describes __27__ different ways of writing the simple operation of substitution in literature!),
|
||||
it also creates opportunities for novel and elegant ways of formalizing
|
||||
PL.
|
||||
|
||||
* __Bonus Reason__: the field of programming languages has a standard technique
|
||||
for expressing its formalisms, which precisely highlights core concepts
|
||||
and leaves room for creative expression and elegance.
|
||||
{{< /details >}}
|
||||
|
||||
I know that mathematics is a polarizing subject. Often, I find myself
|
||||
torn between wanting precision and eschewing overzealous formalism. The
|
||||
cusp between the two is probably determined by my own tolerance for abstraction.
|
||||
Regardless of how much abstraction you are interested in learning about,
|
||||
PL has another dimension, close to the ground: more often than not, our languages
|
||||
need to execute on real hardware.
|
||||
|
||||
### Pragmatics of PL
|
||||
|
||||
Your perfectly-designed language can be completely useless if there is no
|
||||
way to
|
||||
{{< sidenote "right" "execute-note" "execute it" >}}
|
||||
Technically, there are language that don't care if you execute them at all.
|
||||
Many programs in theorem-proving languages like Agda and Rocq exist only
|
||||
to be type-checked. So, you could nitpick this claim; or, you could take
|
||||
it more generally: your language can be useless if there's no
|
||||
way to make it efficiently do what it's been made to do.
|
||||
{{< /sidenote >}} efficiently. Thus, the field of PL subsumes not only
|
||||
the theoretical foundations of languages and their human-centric design; it
|
||||
includes also their realization as software.
|
||||
|
||||
The overall point of this section is that there is much depth to the techniques
|
||||
involved in bringing a programming language to life. If you are a tinkerer
|
||||
or engineer at heart, you will never run out of avenues of exploration.
|
||||
The reasons are all framed from this perspective.
|
||||
|
||||
One fascinating aspect to programming languages is the "direction" from
|
||||
which they have grown. On one side, you have languages that came
|
||||
together from the need to control and describe hardware. I'd say that
|
||||
this is the case for C and C++, Fortran, and others. More often than not,
|
||||
these languages are compiled to machine code. Still subject to human
|
||||
constraints, these languages often evolve more user-facing features as time
|
||||
goes on. On the other side, you have languages developed to enable
|
||||
people to write software, later faced constraints of actually working
|
||||
efficiently. These are languages like Python, Ruby, and JavaScript. These
|
||||
languages are often interpreted (executed by a dedicated program), with
|
||||
techniques such as [just-in-time compilation](https://en.wikipedia.org/wiki/Just-in-time_compilation).
|
||||
There is no one-size-fits-all way to execute a language, and as a result,
|
||||
|
||||
* __Reason 11__: the techniques of executing programming languages are varied
|
||||
and rich. From compilation, to JIT, to interpretation, the field
|
||||
has many sub-disciplines, each with its own know-hows and tricks.
|
||||
|
||||
At the same time, someone whose goal is to actually develop a compiler
|
||||
likely doesn't want to develop everything from scratch. To do so would
|
||||
be a daunting task, especially if you want the compiler to run beyond
|
||||
the confines of a personal machine. CPU [architectures](https://en.wikipedia.org/wiki/Instruction_set_architecture)
|
||||
and operating system differences are hard for any individual to keep up with.
|
||||
Fortunately, we have a gargantuan ongoing effort in the field:
|
||||
the [LLVM Project](https://llvm.org/). LLVM spans numerous architectures
|
||||
and targets, and has become a common back-end for languages like C++
|
||||
(via [Clang](https://clang.llvm.org/get_started.html)), Swift, and Rust.
|
||||
LLVM helps share and distribute the load of keeping up with the ongoing
|
||||
march of architectures and OSes. It also provides a shared playground upon
|
||||
which to experiment with language implementations, optimizations, and more.
|
||||
|
||||
* __Reason 12__: large projects like LLVM enable language designers to
|
||||
lean on decades of precedent to develop a compiler for their language.
|
||||
|
||||
Though LLVM is powerful, it does not automatically grant languages implemented
|
||||
with it good performance. In fact, no other tool does. To make a language
|
||||
run fast requires a deep understanding of the language itself, the hardware
|
||||
upon which it runs, and the tools used to execute it. That is a big ask!
|
||||
Modern computers are extraordinarily complex. Techniques such as
|
||||
[out-of-order execution](https://en.wikipedia.org/wiki/Out-of-order_execution),
|
||||
[caching](https://en.wikipedia.org/wiki/Cache_(computing)#HARDWARE),
|
||||
and [speculative execution](https://en.wikipedia.org/wiki/Speculative_execution)
|
||||
are constantly at play. This means that any program is subject to hard-to-predict
|
||||
and often unintuitive effects. On top of that, depending on your language's
|
||||
capabilities, performance work can often entail working with additional
|
||||
hardware, such as GPUs and NICs, which have their own distinct performance
|
||||
characteristics. This applies both to compiled and interpreted languages.
|
||||
Therefore, I give you:
|
||||
|
||||
* __Reason 13__: improving the performance of a programming language is rife
|
||||
with opportunities to engage with low-level details of the hardware
|
||||
and operating system.
|
||||
|
||||
In the [mathematics section](#the-mathematics-of-pl), we talked about how constructing correct
|
||||
optimizations requires an understanding of the language's semantics. It
|
||||
was one of the practical uses for having a mathematical definition of a language.
|
||||
Reason 13 is where that comes in, but the synthesis is not automatic. In fact,
|
||||
a discipline sits in-between defining how a language behaves and
|
||||
optimizing programs: program analysis. Algorithms that analyze
|
||||
properties of programs such as [reaching definitions](https://en.wikipedia.org/wiki/Reaching_definition)
|
||||
enable optimizations such as [loop-invariant code motion](https://en.wikipedia.org/wiki/Loop-invariant_code_motion),
|
||||
which can have very significant performance impact. At the same time, for an
|
||||
analysis to be correct, it must be grounded in the program's mathematical
|
||||
semantics. There are many fascinating techniques in this discipline,
|
||||
including [ones that use lattice theory](https://cs.au.dk/~amoeller/spa/spa.pdf).
|
||||
|
||||
* __Reason 14__: the sub-discipline of program analysis serves as a grounded
|
||||
application of PL theory to PL practice, enabling numerous optimizations
|
||||
and transformations.
|
||||
|
||||
The programs your compiler generates are software, and, as we just saw,
|
||||
may need to be tweaked for performance. But the compiler and/or interpreter
|
||||
is itself a piece of software, and its own performance. Today's language
|
||||
implementations are subject to demands that hadn't been there historically.
|
||||
For instance, languages are used to provide [language servers](https://microsoft.github.io/language-server-protocol/)
|
||||
to enable editors to give users deeper insights into their code. Today,
|
||||
a language implementation may be called upon every keystroke to provide
|
||||
a typing user live updates. This has led to the introduction of
|
||||
techniques like the [query architecture](https://ollef.github.io/blog/posts/query-based-compilers.html)
|
||||
(see also [salsa](https://github.com/salsa-rs/salsa)) to avoid
|
||||
redundant work and re-used intermediate results. New language implementations
|
||||
like that of [Carbon](https://github.com/carbon-language/carbon-lang)
|
||||
are exploring alternative representations of programs in memory. In
|
||||
short,
|
||||
|
||||
* __Reason 15__: language implementations are themselves pieces of software,
|
||||
subject to unique constraints and requiring careful and innovative
|
||||
engineering.
|
||||
|
||||
### Conclusion
|
||||
|
||||
I've now given a tour of ways in which I found the PL field compelling,
|
||||
organized across three broad categories. There is just one more reason
|
||||
I'd like to share.
|
||||
|
||||
I was 16 years old when I got involved with the world of programming
|
||||
languages and compilers. Though I made efforts to learn about it through
|
||||
literature (the _Dragon Book_, and _Modern Compiler Design_), I simply
|
||||
didn't have the background to find these resources accessible. However, all
|
||||
was not lost. The PL community online has been, and still is, a vibrant and
|
||||
enthusiastic place. I have found it to be welcoming of folks with backgrounds
|
||||
spanning complete beginners and experts alike. Back then, it gave me
|
||||
accessible introductions to anything I wanted. Now, every week I see new
|
||||
articles go by that challenge my intuitions, teach me new things, or take PL
|
||||
ideas to absurd and humorous extremes. So, my final reason:
|
||||
|
||||
* __Reason 16__: the programming languages community is full of brilliant,
|
||||
kind, welcoming and enthusiastic people, who dedicate much of their
|
||||
time to spreading the joy of the field.
|
||||
|
||||
I ❤️ you.
|
||||
@@ -17,7 +17,8 @@ spend time explaining dependent types, nor the syntax for them in Idris,
|
||||
which is the language I'll use in this article. Below are a few resources
|
||||
that should help you get up to speed.
|
||||
|
||||
{{< todo >}}List resources{{< /todo >}}
|
||||
> [!TODO]
|
||||
> List resources
|
||||
|
||||
We've seen that, given a function `F a -> a`, we can define a function
|
||||
`B -> a`, if `F` is a base functor of the type `B`. However, what if
|
||||
|
||||
@@ -6,6 +6,6 @@ summary = """
|
||||
in Agda. The goal is to have a formally verified, yet executable, static
|
||||
analyzer for a simple language.
|
||||
"""
|
||||
status = "ongoing"
|
||||
status = "complete"
|
||||
divider = ": "
|
||||
+++
|
||||
|
||||
110
content/writing/thevoid.md
Normal file
110
content/writing/thevoid.md
Normal file
@@ -0,0 +1,110 @@
|
||||
---
|
||||
title: "Untitled Short Story"
|
||||
date: 2024-08-01T20:31:18-07:00
|
||||
type: thevoid
|
||||
---
|
||||
|
||||
> I'm losing my edge to the art-school Brooklynites in little jackets and\
|
||||
> _borrowed nostalgia for the unremembered Eighties_
|
||||
|
||||
The {{< thevoid "Everpresent Void" >}} was first discovered at a children's birthday party.
|
||||
|
||||
Among the laughter and alluring warbling of an arcade, a party was preparing to take their seats at a worn table. The food was french fries, mediocre cheese pizza, and hamburgers; the sort of diet that would be frowned upon at home, and as a result was now highly coveted. The main event, however, turned out to be the self-serve soda machine. It provided unlimited amounts of sugary beverages at the slightest provocation, which was evidenced by the sticky layer of dried drinks that covered the table.
|
||||
|
||||
It was an unusual sight: such machines were still somewhat rare in those days. Soon, the children were drunk on Coca-Cola and power. Cups were filled, emptied, spilled, dropped on the floor, and used as musical instruments all while the group crowded around the soda dispenser. The birthday girl soon found a new dimension along which the machine could be abused. One cup needed not contain a single drink.
|
||||
|
||||
This new discovery reignited the drinking frenzy. Sensible combinations soon gave way to outrageous mixes. Drinks were paired up, tripled, quadrupled. Soon, everyone was rushing to mix every flavor together, telling stories of a chemical reaction that would occur when they were combined with precise proportions. No such reaction came.
|
||||
|
||||
The children were not satisfied with this conclusion. They continued their search for the missing ingredient. Having exhausted the products of the soda machine, they had to broaden their horizons. Ketchup and mustard were the first additions to their repertoire. The boys made shows of tasting and being revolted by their mustard-root-cola, while the girls squealed with disapproval and laughter. Having still failed to perform their act of alchemy, the kids braved yet further frontiers, dropping leftover pieces of cheese and torn fragments of napkins into their cup-cauldrons.
|
||||
|
||||
Then, it worked.
|
||||
|
||||
When one of the children looked back at his cup, having been distracted by another's exaggerated gagging, he found it to contain a uniformily black fluid. This intrigued the boy; he went to prod it with his fork, but it never reached the side of the cup. Startled, he dropped the utensil, and watched it sink out of sight. This too was intriguing: the fork was noticeably longer than the container.
|
||||
|
||||
The others soon crowded around him to examine what was later understood to be the first instance of the {{< thevoid "Everpresent Void" >}}. They dropped straws, arcade tickets, cheap toys (purchased with arcade tickets), and coins into the cup, all of which disappeared without a sound. The boy found himself at the center of attention, and took great pleasure in recounting his latest recipe. Soon, the {{< thevoid "Void" >}} was replicated in the cups of everyone at the party.
|
||||
|
||||
---
|
||||
|
||||
During the first week after that incident, teachers and janitors had a particularly difficult time. Various quantities of the {{< thevoid "Void" >}} were smuggled into schools. When the staff caught on to peculiarly black water bottles, smugglers switched to more creative techniques involving Ziploc bags and photographic film tubes. Like crystals around an impurity, crowded islands formed at lunch tables with {{< thevoid "Void" >}} at their centers. The featureless and endless substance drew all attention away from civil wars, derivatives, and Steinbeck novels.
|
||||
|
||||
Only, the {{< thevoid "Void" >}} was not entirely featureless and endless. As kids spent entire lunch breaks gazing into the darkness of the fluid, some thought they saw something. As more took on the arduous task of sitting perfectly still and staring into space, it became clear that this was no mere trick of the mind.
|
||||
|
||||
With time, a light show emerged from the emptiness of the {{< thevoid "Void" >}}. It was not unlike pressing down on one’s eyes: colorful particles swirled in the darkness forming spirals and fractals. These gradually changed colors, appearing at times red-and-beige, at times blue-and-green, and everything in-between.
|
||||
|
||||
The display was polarizing. Swaths of children, though initially enthralled by the mysterious properties of the {{< thevoid "Void" >}}, were not sufficiently re-captured by some flashing colors. In the later parts of the week, they would leave lunch halls early to study, practice, or socialize. There were, they thought, better, more normal things to do. A minority, however, only grew more enamored with their philosopher's stones.
|
||||
|
||||
---
|
||||
|
||||
Like alchemists of the past, many of the remaining experimenters had a tendency to obsess. Even as the world --- with its track meets, birthday parties, and dances --- went on around them, they continued their close observation of the mysterious substance. The {{< thevoid "Void" >}} proved worthy of this sustained attention. The patterns that swirled in its depths were not entirely random: they responded, reluctantly and sluggishly, to the observer's mind. Anger and frustration tended to produce redder hues; sadness manifested as a snotty shade of green. Focusing on a particular color made it more likely to appear, as well. Following its own peculiar kind of intuition, the {{< thevoid "Void" >}} responded faster when more individuals were present.
|
||||
|
||||
Other promising avenues of research also grew in popularity over the following days and weeks. The precise recipe for the {{< thevoid "Void" >}} was not, it turned out, very strict. Though soda and fast food remained a constant fixture, the precise ingredients could be substituted for alternates. Pieces of swiss cheese worked just as well as cheddar. A fragment of a turkey patty replaced the traditional 100% Angus beef in a pinch. The resulting substance was as opaque and inscrutable as ever.
|
||||
|
||||
Following much trial error, adolescent adventurers mapped the frontiers of {{< thevoid "Void" >}} synthesis. Though the full specification is not particularly relevant, of note was the requirement for the base to be made of a mixture of sodas, and another for the final concoction to contain at least two sandwich ingredients. Orange juice, though sweet and liquid, did not catalyze the reaction, but Orange Fanta did, even if it was completely flat.
|
||||
|
||||
---
|
||||
|
||||
If all properties hereto described were the only notable aspects of the {{< thevoid "Void" >}}, it would have been a mere curiosity. Late-night History Channel shows might have shown it along theories of ancient aliens or telepathy, filling inattentive viewers' dimly lit homes with comfortable background noise. The substance, however, had one final, crucial aspect. The discovery was made -- as is often the case -- in the midst of conflict.
|
||||
|
||||
The two parties could not be more different. One group consistent of the boys from Mr. Thompson's physics class. They were skinny, bespectacled, and dressed in graphic t-shirts and jeans. The other group was made of the boys from Mrs. Leonard's biology class; they were skinny, bespectacled, and dressed in graphic t-shirts and jeans. Naturally, the two factions were sworn enemies.
|
||||
|
||||
One rainy West Coast day, the two groups were engaging in customary lunch-break {{< thevoid "Void" >}}-viewing. By then, participants in the activity were relegated to the floors of a hallway in the back of the school, their sustained interest in staring-into-space taking on toll on their social standing. They were making use of advanced techniques; by then, experts were able to influence the {{< thevoid "Void" >}} not only to change color, but to form into shimmering images. The Thompsonians were constructing an image of a Christmas tree; the Leonardese were working on The Funniest Image Image to Ever Exist (something phallic).
|
||||
|
||||
Neither group was having much luck. The tree's trunk was consistently much too short, and its bottom too curvy. The phallus, conversely, was quite sharp and somehow sickly looking. Each side mocked the other relentlessly. It took the insult-hurlers from the two camps several back-and-forth trips to realize the images they were ridiculing and the images their friends were conjuring were one and the same.
|
||||
|
||||
---
|
||||
|
||||
The {{< thevoid "Void" >}} was interconnected. By repeating a specific and precise recipe, one could reliably come back over and over to the same "place" in the infinite blackness. Painstakingly painted pictures persisted into the next day, and anyone with the same recipe could tune in to see. Enthusiasts rushed to claim recipes most affordable on modest allowances. Registries of locations and ingredients were posted on bulletin boards, written in bathroom stalls, tossed around as crumpled paper balls in class. Even those who had previously shunned the alchemists were drawn back into the fold by the promise of conjuring images of their own for others to see.
|
||||
|
||||
---
|
||||
|
||||
It was not until weeks later that the first glimpses of a post-{{< thevoid "Void" >}} future revealed themselves, to no one's attention or particular interest. A groggy, not-yet-caffeinated Mrs. Leonard walked into class one day to find a sea of red fabric. Nearly every girl in the morning section showed up to class that day waring a red dress. Not a single one of the students could provide a concrete method by which they chose the day's wardrobe; feelings, whims, and even coin-flipping were cited as reasons for wearing the outfit. What's more, the same happened in Mr. Thompson's class, and in a number of scattered schools throughout the country.
|
||||
|
||||
Being a scientist at heart, and rejecting wholeheartedly the possibility of coincidence of paranormal involvement, Mrs. Leonard spent the rest of the day distractedly overcorrecting for her earlier lack of coffee. A satisfying answer eluded her, and she came home jittery and defeated. Walking past her son's room she noted that he was indulging in his habitual {{< thevoid "Void" >}}-gazing. In the depths of the pitch-black bowl, she glimpsed swirls of that very same shade of red.
|
||||
|
||||
---
|
||||
|
||||
The mechanism that precipitated the red-dress curiosity was not all that sinister. The {{< thevoid "Void" >}} was not unlike an ocean, absorbing and releasing heat, mitigating changes in its environment. After a day in which red was prevalent in the collective thoughts of {{< thevoid "Void" >}}-viewers, the color dissipated like heat through the dark realm, was stirred somehow by the convection of its hidden currents, and re-entered the minds of practitioners in its altered form. They averted their gazes and went to put on red clothes.
|
||||
|
||||
There was another way in which the {{< thevoid "Void" >}} resembled an ocean. Locations within it drifted through the darkness like rafts. Each day, some recipes would move closer or further apart. Whenever others occupied a nearby place in that ocean, their thoughts echoed in the silences between one's own. {{< thevoid "Void" >}}-voyagers, their eyes directed at customary, comforting blackness, would encounter each other there, often without knowing. With each encounter, they left unnoticeable imprimnts upon one another.
|
||||
|
||||
If the Thompsonians or Leonardese were chemists rather than physicists and biologists, and if they were more inclined towards introspection or calm, deliberate thought, they might have observed this gradual exchange, and seen in it the physical process of diffusion, with its particles and collisions. They might have thought of drops of dye in water, swirling in beautiful patterns until finally there were no recognizable shapes, nothing to see at all except a gentle haze of red in an erlenmeyer flask. The final stage of diffusion was uniformity.
|
||||
|
||||
---
|
||||
|
||||
The viewers of the {{< thevoid "Void" >}} were as much connected to each other as they were disconnected from the rest of the world. They were distant, sitting in exile at lunch, looking for hours on end with mild expressions into their bowls of inky soup, talking about their latest journeys with each other on the phone. They spoke in references to landmarks in {{< thevoid "The Ocean" >}}, to happenings in their shared dimension. At the same time, they knew little of parties and dances, and they seldom --- if ever --- went to cheer for their peers in athletic events. It was hard for others to hold a conversation with them.
|
||||
|
||||
It was not only children that sought to explore the {{< thevoid "Everpresent Void" >}}; adult interest in the substance was growing. Grownups heard about it from their children, their students, or news reports. They could understand the desire to be seen. More than that, though, they say potential in the {{< thevoid "Void" >}}.
|
||||
|
||||
The substance was unprecedented. It was one thing to call someone on the phone, or come to their door; it was something else altogether to create a scene, in three crisp (with practice) dimensions, and to set it adrift in the populous sea of black. The {{< thevoid "Void" >}} was an invaluable tool for promotional material, advertising, and even storefronts. Adults adept at {{< thevoid "Void" >}} manipulation found themselves employed with various large companies, and sometimes even created their own. Though preconceptions about them --- largely negative, and often substantiated --- successfully made the jump across the age gap, these men and women became sought after and well-rewarded for their talents.
|
||||
|
||||
With adult influence, of course, came adult concerns. Though recipes for their windows into the other world remained permanent reminders of that first day at the arcade, older practitioners were too used to thinking about elections, taxes, and mortgages. What's worse, the presidential candidates, tax collectors, and banks did not stop thinking about _them_. As months went by, it became more and more common to see blue donkeys and red elephants as motifs in the {{< thevoid "Void" >}}. These drops of color swirled with all others, splashing from raft to raft on their predetermined path to joining the haze.
|
||||
|
||||
---
|
||||
|
||||
Among some practitioners, there was a growing sense that the {{< thevoid "Everpresent Void" >}} was alive. It didn't speak, or think, or breathe. Sometimes, though, its movements and currents were too deliberate to be mere chance. The connections that it made, the glimpses of nearby islands that viewers saw in the corners of their eyes, must've been chosen on purpose; chosen to entice. {{< thevoid "The Void wanted to be seen">}}. It spoke to its sailors in echoes of others' words, it showed them films whose frames were others' images. Encountering another voyager with his nearby recipe, it was insufficient to simply extricate his thoughts from one's own; it was also necessary to determine why he was sent as the {{< thevoid "Void" >}}'s emissary.
|
||||
|
||||
The echoes or films were not sent to convey a message. The {{< thevoid "Void" >}} was not aware of human logic or values, or even of the physical reality outside of its own darkness. It was indifferent to such things, and continued to behave according to some incomprehensible laws. Nevertheless, somewhere near the core of these laws was the desire to command human attention.
|
||||
|
||||
Nature did not bestow upon humanity the mechanisms to defend against something as otherworldly as the {{< thevoid "Void" >}}. The stories they learned each day were spoken by a chorus of voices, so loud and numerous that it seemed the whole world was speaking to them. In truth, however, each human argument sounded within that ocean's surf --- as did its refutation. Each voyager heard fragments they were used to hearing, stories they wanted to learn. Though the {{< thevoid "Void" >}} reflected no light, staring at it was looking into an endless mirror.
|
||||
|
||||
---
|
||||
|
||||
Through this process, the modern-day alchemists' demeanor began to resemble their ancient counterparts' mercury-induced insanity. They spoke in baffling absolutisms. Their language, already rich with {{< thevoid "Void">}}-specific jargon, grew further removed from the words spoken still in coffee shops and bars. Anger and anxiety attracted attention, and so they were angry and anxious, exploding at times at seemingly innocuous occurrences. Sometimes, as with the red-dress incident, hundreds of alchemists were compelled to eat a certain food, or dress in a certain outfit. They swayed like kelp with the invisible waves of the {{< thevoid "Void" >}}.
|
||||
|
||||
Concurrently, the {{< thevoid "Void" >}}'s influence grew, its versatility and power proving impossible to surrender. More and more learned to create viewports into the blackness. As they did, the prevalence of madness increased. Soon everyone knew somebody affected by the lunacy. The victims remained entirely human; they kept their fond memories, their endearing mannerisms, their connections. The {{< thevoid "Void" >}} reflected their light like a carnival mirror and amplified thoughts, but it could not cultivate that which was not already there. There was nothing to do but to stand by them and hope that with time, their features would recede back into their former shape.
|
||||
|
||||
---
|
||||
|
||||
Years later, on a chill November night, a weary Mrs. Leonard re-entered her home, one lit house in an entire city of houses that were dark. Her son was away at college now, and her husband out with friends, leaving her to collapse onto the couch and revisit the events of the day. Her students had done well on their exams, and were rewarded with a day off; in class, they watched a nature documentary. The subject was the Amazon rainforest, and among its inhabitants were the leaf-cutter ants.
|
||||
|
||||
Mrs. Leonard thought about the ants. Day after day they scoured the rainforest, collecting leaves to feed to a fungal garden in their colony. Day after day, the fungus emitted chemicals that diffused from the garden, swirling in the air currents that permeated the rest of the colony. Sensing changes, the ants altered their routes to look for different sources of food.
|
||||
|
||||
There was, she thought, a first day to all of this, even a first moment. Before that moment, they were _just_ ants, going about their day as all their relatives do to this day. Then, perhaps, a worker returned accompanied by a spore, and changed the course of the colony's history.
|
||||
|
||||
Outside, it was storming. In the dark, roads were discernible only through streaking reflections of stoplights in puddles. Rain drummed with increasing urgency against the house's windows; larger drops left craters in the waterscape forming on the glass. The resulting texture was not unlike mycelium.
|
||||
|
||||
Mrs. Leonard wondered whether the first of the leaf-cutter ants were wary of the transformation occurring around them, whether the incursion of hyphae into their familiar tunnels concerned them. Something new was beginning to live alongside them, something decidedly un-ant-like. It thought nothing of workers, queens, or larvae, and it was unconcerned with conquest, hunting, or foraging. The fungus only swelled more with each day, entangling itself deeper into their lives. Were the ants really not afraid of this {{< thevoid "thing" >}}? It certainly scared her.
|
||||
|
||||
It was over 45 million years ago, the narrator of the documentary had said, that the colonies likely began their new mutualistic way of life. That November night, the ants were still there, tending to their gardens. The fungus nourished their larvae in exchange for their protection. Perhaps, in some way neither she nor the ants could understand, that symbiosis warded off dangers that their competitors succumbed to.
|
||||
|
||||
Upon returning home, Mr. Leonard found his wife still on the couch, embers of a dying fireplace casting playful shadows across her face. He had no way of knowing, but she was dreaming of gathering leaves in a warm, humid jungle.
|
||||
18
convert.rb
18
convert.rb
@@ -25,13 +25,16 @@ class KatexRenderer
|
||||
end
|
||||
|
||||
def substitute(content)
|
||||
found_any = false
|
||||
rendered = content.gsub /\\\(((?:[^\\]|\\[^\)])*)\\\)/ do |match|
|
||||
found_any = true
|
||||
render(false, $~[1])
|
||||
end
|
||||
rendered = rendered.gsub /\$\$((?:[^\$]|$[^\$])*)\$\$/ do |match|
|
||||
found_any = true
|
||||
render(true, $~[1])
|
||||
end
|
||||
return rendered
|
||||
return rendered, found_any
|
||||
end
|
||||
end
|
||||
|
||||
@@ -58,8 +61,19 @@ renderer = KatexRenderer.new(katex)
|
||||
files.each do |file|
|
||||
puts "Rendering file: #{file}"
|
||||
document = Nokogiri::HTML.parse(File.open(file))
|
||||
found_any = false
|
||||
document.search('//*[not(ancestor-or-self::code or ancestor-or-self::script)]/text()').each do |t|
|
||||
t.replace(renderer.substitute(t.content))
|
||||
rendered, found_any_in_text = renderer.substitute(t.content)
|
||||
found_any ||= found_any_in_text
|
||||
t.replace(rendered)
|
||||
end
|
||||
|
||||
found_any ||= document.at('meta[name="needs-latex"]')
|
||||
|
||||
# If we didn't find any mathematical equations, no need to include KaTeX CSS.
|
||||
unless found_any
|
||||
document.css('link[href$="katex.css"], link[href$="katex.min.css"]').each(&:remove)
|
||||
end
|
||||
|
||||
File.write(file, document.to_html(encoding: 'UTF-8'))
|
||||
end
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"agda-spa": "https://dev.danilafe.com/DanilaFe/agda-spa/src/commit/f0da9a902005b24db4e03a89c2862493735467c4",
|
||||
"agda-spa": "https://dev.danilafe.com/DanilaFe/agda-spa/src/commit/828b652d3b9266e27ef7cf5a8a7fb82e3fd3133f",
|
||||
"aoc-2020": "https://dev.danilafe.com/Advent-of-Code/AdventOfCode-2020/src/commit/7a8503c3fe1aa7e624e4d8672aa9b56d24b4ba82",
|
||||
"blog-static-flake": "https://dev.danilafe.com/Nix-Configs/blog-static-flake/src/commit/67b47d9c298e7476c2ca211aac5c5fd961637b7b",
|
||||
"compiler": "https://dev.danilafe.com/DanilaFe/bloglang/src/commit/137455b0f4365ba3fd11c45ce49781cdbe829ec3",
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
<div class="bergamot-exercise">
|
||||
<details open>
|
||||
<summary>
|
||||
<span class="bergamot-exercise-label">
|
||||
<span class="bergamot-exercise-number"></span>
|
||||
{{ if or (eq (.Get "label") "") (eq (.Get "label") nil) }}{{ else }}({{ .Get "label" }}){{ end }}:
|
||||
</span>
|
||||
</summary>
|
||||
{{ transform.Markdownify .Inner }}
|
||||
|
||||
<div class="bergamot-button-group">
|
||||
{{ if or (eq (.Get "preset") "") (eq (.Get "preset") nil) }}
|
||||
{{ else }}
|
||||
<button class="bergamot-button bergamot-play" onclick='window.Bergamot.runPreset(this.parentElement, "bergamot-widget-container-{{ .Get "id" }}", "{{ .Get "preset" }}")'>{{ partial "icon.html" "play" }}Start Bergamot</button>
|
||||
<button class="bergamot-button bergamot-close bergamot-hidden" onclick='window.Bergamot.close(this.parentElement, "bergamot-widget-container-{{ .Get "id" }}")'>{{ partial "icon.html" "x" }}Close Bergamot</button>
|
||||
{{ end }}
|
||||
</div>
|
||||
|
||||
<div id="bergamot-widget-container-{{ .Get "id" }}"></div>
|
||||
</details>
|
||||
</div>
|
||||
@@ -1,125 +0,0 @@
|
||||
<script defer src="{{ .Site.Params.bergamotJsUrl }}"></script>
|
||||
{{ $style := resources.Get "scss/bergamot.scss" | resources.ToCSS | resources.Minify }}
|
||||
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
||||
<script defer>
|
||||
const renderRules =
|
||||
`
|
||||
PrecApp @ prec(app(?l, ?r), 100, left) <-;
|
||||
PrecPlus @ prec(plus(?l, ?r), 80, either) <-;
|
||||
PrecAbs @ prec(abs(?x, ?t, ?e), 0, right) <-;
|
||||
PrecArr @ prec(tarr(?l, ?r), 0, right) <-;
|
||||
|
||||
SelectHead @ select(cons([?t, ?v], ?rest), ?default, ?v) <- ?t;
|
||||
SelectTail @ select(cons([?t, ?v], ?rest), ?default, ?found) <- not(?t), select(?rest, ?default, ?found);
|
||||
SelectEmpty @ select(nil, ?default, ?default) <-;
|
||||
|
||||
ParenthAssocLeft @ parenthassoc(?a_i, left, right) <-;
|
||||
ParenthAssocRight @ parenthassoc(?a_i, right, left) <-;
|
||||
ParenthAssocNone @ parenthassoc(?a_i, none, ?pos) <-;
|
||||
ParenthAssocNeq @ parenthassoc(?a_i, ?a_o, ?pos) <- not(symeq(?a_i, ?a_o));
|
||||
|
||||
Parenth @ parenth(?inner, ?outer, ?pos, ?strin, ?strout) <-
|
||||
prec(?inner, ?p_i, ?a_i), prec(?outer, ?p_o, ?a_o),
|
||||
join(["(", ?strin, ")"], ?strinparen),
|
||||
select([ [less(?p_i, ?p_o), strinparen], [less(?p_o, ?p_i), ?strin], [ parenthassoc(?a_i, ?a_o, ?pos), ?strinparen ] ], ?strin, ?strout);
|
||||
ParenthFallback @ parenth(?inner, ?outer, ?pos, ?strin, ?strin) <-;
|
||||
|
||||
LatexListNil @ latexlist(nil, nil) <-;
|
||||
LatexListCons @ latexlist(cons(?x, ?xs), cons(?l_x, ?l_s)) <- latex(?x, ?l_x), latexlist(?xs, ?l_s);
|
||||
|
||||
IntercalateNil @ intercalate(?sep, nil, nil) <-;
|
||||
IntercalateConsCons @ intercalate(?sep, cons(?x_1, cons(?x_2, ?xs)), cons(?x_1, cons(?sep, ?ys))) <- intercalate(?sep, cons(?x_2, ?xs), ?ys);
|
||||
IntercalateConsNil @ intercalate(?sep, cons(?x, nil), cons(?x, nil)) <-;
|
||||
|
||||
NonEmpty @ nonempty(cons(?x, ?xs)) <-;
|
||||
|
||||
LatexInt @ latex(?i, ?l) <- int(?i), tostring(?i, ?l);
|
||||
LatexFloat @ latex(?f, ?l) <- float(?f), tostring(?f, ?l);
|
||||
LatexStr @ latex(?s, ?l) <- str(?s), escapestring(?s, ?l_1), latexifystring(?s, ?l_2), join(["\\\\texttt{\\"", ?l_2, "\\"}"], ?l);
|
||||
LatexMeta @ latex(metavariable(?l), ?l) <-;
|
||||
LatexLit @ latex(lit(?i), ?l) <- latex(?i, ?l);
|
||||
LatexVar @ latex(var(?s), ?l) <- latex(?s, ?l);
|
||||
LatexPlus @ latex(plus(?e_1, ?e_2), ?l) <-
|
||||
latex(?e_1, ?l_1), latex(?e_2, ?l_2),
|
||||
parenth(?e_1, plus(?e_1, ?e_2), left, ?l_1, ?lp_1),
|
||||
parenth(?e_2, plus(?e_1, ?e_2), right, ?l_2, ?lp_2),
|
||||
join([?lp_1, " + ", ?lp_2], ?l);
|
||||
LatexPair @ latex(pair(?e_1, ?e_2), ?l) <- latex(?e_1, ?l_1), latex(?e_2, ?l_2), join(["(", ?l_1, ", ", ?l_2, ")"], ?l);
|
||||
LatexAbs @ latex(abs(?x, ?t, ?e), ?l) <- latex(?e, ?l_e), latex(?t, ?l_t), latex(?x, ?l_x), join(["\\\\lambda ", ?l_x, " : ", ?l_t, " . ", ?l_e], ?l);
|
||||
LatexApp @ latex(app(?e_1, ?e_2), ?l) <-
|
||||
latex(?e_1, ?l_1), latex(?e_2, ?l_2),
|
||||
parenth(?e_1, app(?e_1, ?e_2), left, ?l_1, ?lp_1),
|
||||
parenth(?e_2, app(?e_1, ?e_2), right, ?l_2, ?lp_2),
|
||||
join([?lp_1, " \\\\enspace ", ?lp_2], ?l);
|
||||
|
||||
LatexTInt @ latex(tint, "\\\\text{tint}") <-;
|
||||
LatexTStr @ latex(tstr, "\\\\text{tstr}") <-;
|
||||
LatexTArr @ latex(tarr(?t_1, ?t_2), ?l) <-
|
||||
latex(?t_1, ?l_1), latex(?t_2, ?l_2),
|
||||
parenth(?t_1, tarr(?t_1, ?t_2), left, ?l_1, ?lp_1),
|
||||
parenth(?t_2, tarr(?t_1, ?t_2), right, ?l_2, ?lp_2),
|
||||
join([?lp_1, " \\\\to ", ?lp_2], ?l);
|
||||
LatexTPair @ latex(tpair(?t_1, ?t_2), ?l) <- latex(?t_1, ?l_1), latex(?t_2, ?l_2), join(["(", ?l_1, ", ", ?l_2, ")"], ?l);
|
||||
|
||||
LatexTypeEmpty @ latex(empty, "\\\\varnothing") <-;
|
||||
LatexTypeExtend @ latex(extend(?a, ?b, ?c), ?l) <- latex(?a, ?l_a), latex(?b, ?l_b), latex(?c, ?l_c), join([?l_a, " , ", ?l_b, " : ", ?l_c], ?l);
|
||||
LatexTypeInenv @ latex(inenv(?x, ?t, ?G), ?l) <-latex(?x, ?l_x), latex(?t, ?l_t), latex(?G, ?l_G), join([?l_x, " : ", ?l_t, " \\\\in ", ?l_G], ?l);
|
||||
|
||||
LatexTypeBin @ latex(type(?e, ?t), ?l) <- latex(?e, ?l_e), latex(?t, ?l_t), join([?l_e, " : ", ?l_t], ?l);
|
||||
LatexTypeTer @ latex(type(?G, ?e, ?t), ?l) <- latex(?G, ?l_G), latex(?e, ?l_e), latex(?t, ?l_t), join([?l_G, " \\\\vdash ", ?l_e, " : ", ?l_t], ?l);
|
||||
|
||||
LatexConverts @ latex(converts(?f, ?t), ?l) <- latex(?f, ?l_f), latex(?t, ?l_t), join([?l_f, " \\\\preceq ", ?l_t], ?l);
|
||||
|
||||
LatexIsInt @ latex(int(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\\\in \\\\texttt{Int}"], ?l);
|
||||
LatexIsFloat @ latex(float(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\\\in \\\\texttt{Float}"], ?l);
|
||||
LatexIsNum @ latex(num(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\\\in \\\\texttt{Num}"], ?l);
|
||||
LatexIsStr @ latex(str(?e), ?l) <- latex(?e, ?l_e), join([?l_e, " \\\\in \\\\texttt{Str}"], ?l);
|
||||
LatexSym @ latex(?s, ?l) <- sym(?s), tostring(?s, ?l_1), join(["\\\\text{", ?l_1,"}"], ?l);
|
||||
LatexCall @ latex(?c, ?l) <- call(?c, ?n, ?ts), nonempty(?ts), latexlist(?ts, ?lts_1), intercalate(", ", ?lts_1, ?lts_2), join(?lts_2, ?lts_3), join(["\\\\text{", ?n, "}", "(", ?lts_3, ")"], ?l);
|
||||
`;
|
||||
|
||||
const loadedWidgets = {};
|
||||
const getButtons = (inputGroup) => {
|
||||
return {
|
||||
play: inputGroup.querySelector(".bergamot-play"),
|
||||
reset: inputGroup.querySelector(".bergamot-reset"),
|
||||
close: inputGroup.querySelector(".bergamot-close"),
|
||||
}
|
||||
}
|
||||
const setRunning = (inputGroup, running) => {
|
||||
if (inputGroup !== null) {
|
||||
const buttons = getButtons(inputGroup);
|
||||
if (buttons.play) buttons.play.classList.toggle("bergamot-hidden", running);
|
||||
if (buttons.reset) buttons.reset.classList.toggle("bergamot-hidden", !running);
|
||||
if (buttons.close) buttons.close.classList.toggle("bergamot-hidden", !running);
|
||||
}
|
||||
}
|
||||
|
||||
window.Bergamot = {};
|
||||
window.Bergamot.run = (inputGroup, nodeId, inputPrompt, rules, renderRules, query) => {
|
||||
var app = Elm.Main.init({
|
||||
node: document.getElementById(nodeId),
|
||||
flags: { renderRules, inputRules: inputPrompt, rules , query }
|
||||
});
|
||||
loadedWidgets[nodeId] = { app, parentNode: inputGroup ? inputGroup.parentElement : null };
|
||||
setRunning(inputGroup, true);
|
||||
};
|
||||
window.Bergamot.runPreset = (inputGroup, nodeId, presetName) => {
|
||||
const preset = window.Bergamot.presets[presetName];
|
||||
window.Bergamot.run(inputGroup, nodeId, preset.inputPrompt, preset.rules, renderRules, preset.query || "");
|
||||
};
|
||||
window.Bergamot.close = (inputGroup, nodeId) => {
|
||||
if (!(nodeId in loadedWidgets)) return;
|
||||
|
||||
const placeholderDiv = document.createElement('div');
|
||||
placeholderDiv.id = nodeId;
|
||||
|
||||
const widget = loadedWidgets[nodeId];
|
||||
const elmRoot = widget.parentNode.querySelector(".bergamot-root");
|
||||
elmRoot.replaceWith(placeholderDiv)
|
||||
delete loadedWidgets[nodeId];
|
||||
setRunning(inputGroup, false);
|
||||
}
|
||||
window.Bergamot.presets = {};
|
||||
|
||||
</script>
|
||||
@@ -1,7 +0,0 @@
|
||||
<script>
|
||||
window.Bergamot.presets['{{ .Get "name" }}'] = {
|
||||
rules: '{{ .Inner }}',
|
||||
inputPrompt: '{{ .Get "prompt" }}',
|
||||
query: '{{ .Get "query" }}',
|
||||
};
|
||||
</script>
|
||||
@@ -1,6 +0,0 @@
|
||||
<div id="{{ .Get "id" }}"></div>
|
||||
<script>
|
||||
window.addEventListener('load', function() {
|
||||
window.Bergamot.run(null, '{{ .Get "id" }}', '{{ .Get "prompt" }}', '{{ .Inner }}', renderRules, '{{ .Get "query" }}');
|
||||
});
|
||||
</script>
|
||||
@@ -1,2 +1,2 @@
|
||||
{{ $style := resources.Get "scss/donate.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $style := resources.Get "scss/donate.scss" | css.Sass | resources.Minify }}
|
||||
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
{{ $style := resources.Get "scss/gmachine.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $style := resources.Get "scss/gmachine.scss" | css.Sass | resources.Minify }}
|
||||
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
<script defer src="{{ .Site.Params.katexJsUrl }}" crossorigin="anonymous"></script>
|
||||
<script defer>
|
||||
class KatexExpressionShim extends HTMLElement {
|
||||
static observedAttributes = ["expression", "katex-options"];
|
||||
targetSpan;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
}
|
||||
|
||||
doRender() {
|
||||
if (!this.targetSpan) return;
|
||||
|
||||
const options = this.hasAttribute("katex-options") ?
|
||||
this.getAttribute("katex-options") : {};
|
||||
katex.render(
|
||||
this.getAttribute("expression"),
|
||||
this.targetSpan,
|
||||
JSON.parse(options)
|
||||
);
|
||||
}
|
||||
|
||||
connectedCallback() {
|
||||
this.targetSpan = document.createElement('span');
|
||||
this.appendChild(this.targetSpan);
|
||||
this.doRender();
|
||||
}
|
||||
|
||||
attributeChangedCallback(name, oldValue, newValue) {
|
||||
this.doRender();
|
||||
}
|
||||
}
|
||||
customElements.define("katex-expression", KatexExpressionShim);
|
||||
</script>
|
||||
@@ -1,2 +1,2 @@
|
||||
{{ $style := resources.Get "scss/stack.scss" | resources.ToCSS | resources.Minify }}
|
||||
{{ $style := resources.Get "scss/stack.scss" | css.Sass | resources.Minify }}
|
||||
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
||||
|
||||
2
layouts/shortcodes/thevoid.html
Normal file
2
layouts/shortcodes/thevoid.html
Normal file
@@ -0,0 +1,2 @@
|
||||
<span class="glitch" data-text="{{ .Get 0 }}"><span>{{ .Get 0 }}</span></span>
|
||||
{{- /* Remove trailing white space */ -}}
|
||||
20
layouts/thevoid/baseof.html
Normal file
20
layouts/thevoid/baseof.html
Normal file
@@ -0,0 +1,20 @@
|
||||
{{- /* Note: changing the baseof template because the title, tags, etc. of a regular post are still valid. */ -}}
|
||||
<!DOCTYPE html>
|
||||
<html lang="{{ .Site.Language.Lang }}">
|
||||
{{- partial "head.html" . -}}
|
||||
<body>
|
||||
{{ $voidcss := resources.Get "scss/thevoid.scss" | css.Sass | resources.Minify }}
|
||||
<link rel="stylesheet" href="{{ $voidcss.Permalink }}">
|
||||
{{- partial "header.html" . -}}
|
||||
<div class="container"><hr class="header-divider"></div>
|
||||
<main class="container">
|
||||
|
||||
{{- if .Draft -}}
|
||||
{{- partial "warning.html" (i18n "postDraft") -}}
|
||||
{{- end -}}
|
||||
|
||||
{{- block "main" . }}{{- end }}
|
||||
</main>
|
||||
{{- block "after" . }}{{- end }}
|
||||
</body>
|
||||
</html>
|
||||
Submodule themes/vanilla updated: acaa00754b...5460d759b0
Reference in New Issue
Block a user