Edit, finish, and publish post on PL

Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>
Do some aggressive trimming and editing.
2025-12-31 21:27:55 -08:00 · 2025-12-30 00:06:17 -08:00 · 2025-12-29 23:07:55 -08:00 · 2025-04-12 17:56:44 -07:00 · 2025-03-30 23:15:54 -07:00 · 2025-03-02 22:56:10 -08:00
23 changed files with 1735 additions and 21 deletions
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -3,11 +3,11 @@ GEM
  specs:
    duktape (2.7.0.0)
    execjs (2.9.1)
-    mini_portile2 (2.8.6)
+    mini_portile2 (2.8.8)
-    nokogiri (1.15.6)
+    nokogiri (1.18.3)
      mini_portile2 (~> 2.8.2)
      racc (~> 1.4)
-    racc (1.8.0)
+    racc (1.8.1)
 PLATFORMS
  ruby
--- a/agda.rb
+++ b/agda.rb
@@ -23,7 +23,7 @@ class AgdaContext
    return @file_infos[file] if @file_infos.include? file
    @file_infos[file] = line_infos = {}
-    unless File.exists?(file)
+    unless File.exist?(file)
      return line_infos
    end
@@ -160,6 +160,14 @@ class FileGroup
        line_range = 1..
      end
      # Sometimes, code is deeply nested in the source file, but we don't
      # want to show the leading space. In that case, the generator sets
      # data-source-offset with how much leading space was stripped off.
      initial_offset = 0
      if source_offset_attr = t.attribute("data-source-offset")
        initial_offset = source_offset_attr.to_s.to_i
      end
      full_path = t.attribute("data-file-path").to_s
      full_path_dirs = Pathname(full_path).each_filename.to_a
@@ -195,7 +203,7 @@ class FileGroup
        line_info = agda_info[line_no]
        next unless line_info
-        offset = 0
+        offset = initial_offset
        line.traverse do |lt|
          if lt.text?
            content = lt.content
--- a/analyze.rb
+++ b/analyze.rb
@@ -43,7 +43,7 @@ files.each do |file|
  tags = []
  group = 1
  draft = false
-  next unless File.exists?(file)
+  next unless File.exist?(file)
  value = File.size(file)
  url = file.gsub(/^content/, "https://danilafe.com").delete_suffix("/index.md").delete_suffix(".md")
  File.readlines(file).each do |l|
--- a/assets/scss/thevoid.scss
+++ b/assets/scss/thevoid.scss
@@ -2,6 +2,7 @@
 body {
  background-color: #1c1e26;
  --text-color: white;
  font-family: $font-code;
 }
--- a/build-agda-html.rb
+++ b/build-agda-html.rb
@@ -26,7 +26,7 @@ files = ARGV
 code_paths = Dir.entries(root_path).select do |f|
  File.directory?(File.join(root_path, f)) and f != '.' and f != '..'
 end.to_set
-code_paths += JSON.parse(File.read(data_file)).keys if File.exists? data_file
+code_paths += JSON.parse(File.read(data_file)).keys if File.exist? data_file
 # Extending code_paths from submodules.json means that nested Agda modules
 # have their root dir correctly set.
--- a/chatgpt-subset-feather-icon.rb
+++ b/chatgpt-subset-feather-icon.rb
@@ -0,0 +1,49 @@
 #!/usr/bin/env ruby
 # frozen_string_literal: true
 require 'nokogiri'
 require 'set'
 # 1) Process all files passed in from the command line
 svgpath = ARGV[0]
 files = ARGV[1..]
 # 2) Extract used Feather icons
 used_icons = Set.new
 files.each do |file|
  # Parse each HTML file
  doc = File.open(file, "r:UTF-8") { |f| Nokogiri::HTML(f) }
  # Look for <use xlink:href="/feather-sprite.svg#iconName">
  doc.css("use").each do |use_tag|
    href = use_tag["xlink:href"] || use_tag["href"]
    if href && href.start_with?("/feather-sprite.svg#")
      icon_name = href.split("#").last
      used_icons << icon_name
    end
  end
 end
 puts "Found #{used_icons.size} unique icons: #{used_icons.to_a.join(', ')}"
 # 3) Load the full feather-sprite.svg as XML
 sprite_doc = File.open(svgpath, "r:UTF-8") { |f| Nokogiri::XML(f) }
 # 4) Create a new SVG with only the required symbols
 new_svg = Nokogiri::XML::Document.new
 svg_tag = Nokogiri::XML::Node.new("svg", new_svg)
 svg_tag["xmlns"] = "http://www.w3.org/2000/svg"
 new_svg.add_child(svg_tag)
 sprite_doc.css("symbol").each do |symbol_node|
  if used_icons.include?(symbol_node["id"])
    # Duplicate the symbol node (so it can be inserted in the new document)
    svg_tag.add_child(symbol_node.dup)
  end
 end
 # 5) Save the subset sprite
 File.open(svgpath, "w:UTF-8") do |f|
  f.write(new_svg.to_xml)
 end
--- a/chatgpt-subset-one-go.py
+++ b/chatgpt-subset-one-go.py
@@ -0,0 +1,69 @@
 import os
 import sys
 from bs4 import BeautifulSoup
 from fontTools.subset import Subsetter, Options
 from fontTools.ttLib import TTFont
 FONT_EXTENSIONS = (".ttf", ".woff", ".woff2", ".otf")  # Font file types
 def extract_text_from_html(file_path):
    """Extract text content from a single HTML file."""
    with open(file_path, "r", encoding="utf-8") as f:
        soup = BeautifulSoup(f.read(), "html.parser")
        return soup.get_text()
 def get_used_characters(files):
    """Collect unique characters from all .html files in the given directory."""
    char_set = set()
    for file in files:
        text = extract_text_from_html(file)
        char_set.update(text)
    return "".join(sorted(char_set))
 def find_font_files(directory):
    """Find all font files in the given directory, recursively."""
    font_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(FONT_EXTENSIONS):
                font_files.append(os.path.join(root, file))
    return font_files
 def subset_font_in_place(font_path, characters):
    """Subsets the given font file to include only the specified characters."""
    # Convert characters to their integer code points
    unicode_set = {ord(c) for c in characters}
    font = TTFont(font_path)
    options = Options()
    options.drop_tables += ["DSIG"]
    options.drop_tables += ["LTSH", "VDMX", "hdmx", "gasp"]
    options.unicodes = unicode_set
    options.variations = False
    options.drop_variations = True
    options.layout_features = ["*"]  # keep all OT features
    options.hinting = False
    # Preserve original format if it was WOFF/WOFF2
    if font_path.endswith(".woff2"):
        options.flavor = "woff2"
    elif font_path.endswith(".woff"):
        options.flavor = "woff"
    subsetter = Subsetter(options)
    subsetter.populate(unicodes=unicode_set)
    subsetter.subset(font)
    # Overwrite the original font file
    font.save(font_path)
    print(f"Subsetted font in place: {font_path}")
 if __name__ == "__main__":
    used_chars = get_used_characters(sys.argv[2:])
    print(f"Extracted {len(used_chars)} unique characters from {len(sys.argv[2:])} HTML files.")
    font_files = find_font_files(sys.argv[1])
    print(f"Found {len(font_files)} font files to subset.")
    for font_file in font_files:
        subset_font_in_place(font_file, used_chars)
--- a/content/blog/02_types_variables.md
+++ b/content/blog/02_types_variables.md
@@ -339,9 +339,8 @@ this means the rule applies to (object) variables declared to have type
 our system. A single rule takes care of figuring the types of _all_
 variables.
-{{< todo >}}
+> [!TODO]
-The rest of this, but mostly statements.
+> The rest of this, but mostly statements.
 {{< /todo >}}
 ### This Page at a Glance
 #### Metavariables
--- a/content/blog/04_spa_agda_fixedpoint.md
+++ b/content/blog/04_spa_agda_fixedpoint.md
@@ -55,6 +55,7 @@ Now, let's start with the least element of our lattice, denoted \(\bot\).
 A lattice of finite height is guaranteed to have such an element. If it didn't,
 we could always extend chains by tacking on a smaller element to their bottom,
 and then the lattice wouldn't have a finite height anymore.
 {#start-least}
 Now, apply \(f\) to \(\bot\) to get \(f(\bot)\). Since \(\bot\) is the least
 element, it must be true that \(\bot \le f(\bot)\). Now, if it's "less than or equal",
--- a/content/blog/05_spa_agda_semantics/index.md
+++ b/content/blog/05_spa_agda_semantics/index.md
@@ -262,6 +262,7 @@ expressions, and the letter \(v\) to stand for values. Finally, we'll write
 \(\rho, e \Downarrow v\) to say that "in an environment \(\rho\), expression \(e\)
 evaluates to value \(v\)". Our two previous examples of evaluating `x+1` can
 thus be written as follows:
 {#notation-for-environments}
 {{< latex >}}
 \{ \texttt{x} \mapsto 42 \}, \texttt{x}+1 \Downarrow 43 \\
--- a/content/blog/08_spa_agda_forward/index.md
+++ b/content/blog/08_spa_agda_forward/index.md
@@ -4,13 +4,26 @@ series: "Static Program Analysis in Agda"
 description: "In this post, I use the monotone lattice framework and verified CFGs to define a sign analysis"
 date: 2024-12-01T15:09:07-08:00
 tags: ["Agda", "Programming Languages"]
 draft: true
 ---
 In the previous post, I showed that the Control Flow graphs we built of our
 programs match how they are really executed. This means that we can rely
 on these graphs to compute program information. In this post, we finally
-get to compute that information. Let's jump right into it!
+get to compute that information. Here's a quick bit paraphrasing from last time
 that provides a summary of our approach:
 1. We will construct a finite-height lattice. Every single element of this
   lattice will contain information about each variable at each node in the
   Control Flow Graph.
 2. We will then define a monotonic function that update this information using
   the structure encoded in the CFG’s edges and nodes.
 3. Then, using the fixed-point algorithm, we will find the least element of the
   lattice, which will give us a precise description of all program variables at
   all points in the program.
 4. Because we have just validated our CFGs to be faithful to the language’s
   semantics, we’ll be able to prove that our algorithm produces accurate results.
 Let's jump right into it!
 ### Choosing a Lattice
 A lot of this time, we have been [talking about lattices]({{< relref "01_spa_agda_lattices" >}}),
@@ -59,6 +72,7 @@ split our programs into sequences of statements that are guaranteed to execute
 together --- basic blocks. For our analysis, we'll keep per-variable for
 each basic block in the program. Since basic blocks are nodes in the Control Flow
 Graph of our program, our whole lattice will be as follows:
 {#whole-lattice}
 {{< latex >}}
 \text{Info} \triangleq \text{NodeId} \to (\text{Variable} \to \text{Sign})
@@ -112,6 +126,7 @@ exact values of `x`, `y`, and `z`, leaving only their signs). The exact details
 of how this partial evaluation is done are analysis-specific; in general, we
 simply require an analysis to provide an evaluator. We will define
 [an evaluator for the sign lattice below](#instantiating-with-the-sign-lattice).
 {#general-evaluator}
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 166 167 >}}
@@ -136,6 +151,7 @@ current block. Early on, we saw that [the \((\sqcup)\) operator models disjuncti
 \((\sqcup)\) to the variable-sign maps of all predecessors. The
 [reference _Static Program Analysis_ text](https://cs.au.dk/~amoeller/spa/)
 calls this operation \(\text{JOIN}\):
 {#join-preds}
 {{< latex >}}
 \textit{JOIN}(v) = \bigsqcup_{w \in \textit{pred}(v)} \llbracket w \rrbracket
@@ -272,6 +288,7 @@ Actually, we haven't yet seen that `updateVariablesFromStmt`. This is
 a function that we can define using the user-provided abtract interpretation
 `eval`. Specifically, it handles the job of updating the sign of a variable
 once it has been assigned to (or doing nothing if the statement is a no-op).
 {#define-updateVariablesFromStmt}
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 191 193 >}}
--- a/content/blog/09_spa_agda_verified_forward/index.md
+++ b/content/blog/09_spa_agda_verified_forward/index.md
@@ -0,0 +1,547 @@
 ---
 title: "Implementing and Verifying \"Static Program Analysis\" in Agda, Part 9: Verifying the Forward Analysis"
 series: "Static Program Analysis in Agda"
 description: "In this post, I prove that the sign analysis from the previous is correct"
 date: 2024-12-25T19:00:00-08:00
 tags: ["Agda", "Programming Languages"]
 left_align_code: true
 ---
 In the previous post, we put together a number of powerful pieces of machinery
 to construct a sign analysis. However, we still haven't verified that this
 analysis produces correct results. For the most part, we already have the
 tools required to demonstrate correctness; the most important one
 is the [validity of our CFGs]({{< relref "07_spa_agda_semantics_and_cfg" >}})
 relative to [the semantics of the little language]({{< relref "05_spa_agda_semantics" >}}).
 ### High-Level Algorithm
 We'll keep working with the sign lattice as an example, keeping in mind
 how what we do generalizes to a any lattice \(L\) describing a variable's
 state. The general shape of our argument will be as follows, where I've underlined and
 numbered assumptions or aspects that we have yet to provide.
 1. Our fixed-point analysis from the previous section gave us a result \(r\)
   that satisfies the following equation:
   {{< latex >}}
   r = \text{update}(\text{join}(r))
   {{< /latex >}}
   Above \(\text{join}\) applies the [predecessor-combining function]({{< relref "08_spa_agda_forward#join-preds" >}})
   from the previous post to each state (corresponding to `joinAll` in Agda)
   and \(\text{update}\) performs one round of abstract interpretation.
 2. Because of the [correspondence of our semantics and CFGs]({{< relref "07_spa_agda_semantics_and_cfg" >}}),
   each program evaluation in the form \(\rho, s \Rightarrow \rho'\)
   corresponds to a path through the Control Flow Graph. Along the path,
   each node contains simple statements, which correspond to intermediate steps
   in evaluating the program. These will also be in the form
   \(\rho_1, b \Rightarrow \rho_2\).
 3. We will proceed iteratively, stepping through the trace one basic block at
   a time. At each node in the graph:
   * We will assume that the beginning state (the variables in \(\rho_1\)) are
     {{< internal "correctly-described" >}}
     correctly described
     {{< /internal >}}
     by one of the predecessors of the current node. Since
     {{< internal "disjunction" >}}
     joining represents "or"
     {{< /internal >}},
     that is the same
     as saying that \(\text{join}(r)\)
     contains an accurate description of \(\rho_1\).
   * Because
     {{< internal "abstract-interpretation" >}}
     the abstract interpretation function preserves accurate descriptions
     {{< /internal >}},
     if \(\text{join}(r)\) contains an accurate description \(\rho_1\), then applying our
     abstract interpretation function via \(\text{update}\) should result in
     a map that contains an accurate-described \(\rho_2\). In other words, \(\text{update}(\text{join}(r))\)
     describes \(\rho_2\) at the current block.
     {{< internal "equivalence" >}}
     By the equation above
     {{< /internal >}}, that's the same as saying
     \(r\) describes \(\rho_2\) at the current block.
   * Since the trace is a path through a graph, there must be an edge from
     the current basic block to the next. This means that the current basic
     block is a predecessor of the next one. From the previous point, we know
     that \(\rho_2\) is accurately described by this predecessor, fulfilling
     our earlier assumption and allowing us to continue iteration.
 So, what are the missing pieces?
 1. We need to define what it means for a lattice (like our sign lattice)
   to "correctly describe" what happens when evaluating a program for real.
   For example, the \(+\) in sign analysis describes values that are bigger than zero,
   and a map like `{x:+}` states that `x` can only take on positive values.
 2. We've seen before [the \((\sqcup)\) operator models disjunction
   ("A or B")]({{< relref "01_spa_agda_lattices#lub-glub-or-and" >}}), but
   that was only an informal observation; we'll need to specify it preceisely.
 3. Each analysis [provides an abstract interpretation `eval` function]({{< relref "08_spa_agda_forward#general-evaluator" >}}).
   However, until now, nothing has formally constrained this function; we could
   return \(+\) in every case, even though that would not be accurate. We will
   need, for each analysis, a proof that its `eval` preserves accurate descriptions.
 4. The equalities between our lattice elements [are actually equivalences]({{< relref "01_spa_agda_lattices#definitional-equality" >}}),
   which helps us use simpler representations of data structures. Thus, even
   in statements of the fixed point algorithm, our final result is a value \(a\)
   such that \(a \approx f(a)\). We need to prove that our notion of equivalent
   lattice elements plays nicely with correctness.
 Let's start with the first bullet point.
 ### A Formal Definition of Correctness
 When a variable is mapped to a particular sign (like `{ "x": + }`),
 what that really says is that the value of `x` is greater than zero. Recalling
 from [the post about our language's semantics]({{< relref "05_spa_agda_semantics#notation-for-environments" >}})
 that we use the symbol \(\rho\) to represent mappings of variables to
 their values, we might write this claim as:
 {{< latex >}}
 \rho(\texttt{x}) > 0
 {{< /latex >}}
 This is a good start, but it's a little awkward defining the meaning of "plus"
 by referring to the context in which it's used (the `{ "x": ... }` portion
 of the expression above). Instead, let's associate with each sign (like \(+\)) a
 predicate: a function that takes a value, and makes a claim about that value
 ("this is positive"):
 {{< latex >}}
 \llbracket + \rrbracket\ v = v > 0
 {{< /latex >}}
 The notation above is a little weird unless you, like me, have a background in
 Programming Language Theory (❤️). This comes from [denotational semantics](https://en.wikipedia.org/wiki/Denotational_semantics);
 generally, one writes:
 {{< latex >}}
 \llbracket \text{thing} \rrbracket = \text{the meaning of the thing}
 {{< /latex >}}
 Where \(\llbracket \cdot \rrbracket\) is really a function (we call it
 the _semantic function_) that maps things to
 their meaning. Then, the above equation is similar to the more familiar
 \(f(x) = x+1\): function and arguments on the left, definition on the right. When
 the "meaning of the thing" is itself a function, we could write it explicitly
 using lambda-notation:
 {{< latex >}}
 \llbracket \text{thing} \rrbracket = \lambda x.\ \text{body of the function}
 {{< /latex >}}
 Or, we could use the Haskell style and write the new variable on the left of
 the equality:
 {{< latex >}}
 \llbracket \text{thing} \rrbracket\ x = \text{body of the function}
 {{< /latex >}}
 That is precisely what I'm doing above with \(\llbracket + \rrbracket\).
 With this in mind, we could define the entire semantic function for the
 sign lattice as follows:
 {{< latex >}}
 \llbracket + \rrbracket\ v = v\ \texttt{>}\ 0 \\
 \llbracket 0 \rrbracket\ v = v\ \texttt{=}\ 0 \\
 \llbracket - \rrbracket\ v = v\ \texttt{<}\ 0 \\
 \llbracket \top \rrbracket\ v = \text{true} \\
 \llbracket \bot \rrbracket\ v = \text{false}
 {{< /latex >}}
 In Agda, the integer type already distinguishes between "negative natural" or
 "positive natural" cases, which made it possible to define the semantic function
 {{< sidenote "right" "without-note" "without using inequalities." >}}
 Reasoning about inequalities is painful, sometimes requiring a number of
 lemmas to arrive at a result that is intuitively obvious. Coq has a powerful
 tactic called <a href="https://coq.inria.fr/doc/v8.11/refman/addendum/micromega.html#coq:tacn.lia"><code>lia</code></a>
 that automatically solves systems of inequalities, and I use it liberally.
 However, lacking such a tactic in Agda, I would like to avoid inequalities
 if they are not needed.
 {{< /sidenote >}}
 {{< codelines "agda" "agda-spa/Analysis/Sign.agda" 114 119 >}}
 Notably, \(\llbracket \top \rrbracket\ v\) always holds, and
 \(\llbracket \bot \rrbracket\ v\) never does. __In general__, we will always
 need to define a semantic function for whatever lattice we are choosing for
 our analysis.
 It's important to remember from the previous post that the sign lattice
 (or, more generally, our lattice \(L\)) is only a component of the
 [lattice we use to instantiate the analysis]({{< relref "08_spa_agda_forward#whole-lattice" >}}).
 We at least need to define what it means for the \(\text{Variable} \to \text{Sign}\)
 portion of that lattice to be correct. This way, we'll have correctness
 criteria for each key (CFG node) in the top-level \(\text{Info}\) lattice.
 Since a map from variables to their sign characterizes not a single value \(v\)
 but a whole environment \(\rho\), something like this is a good start:
 {{< latex >}}
 \llbracket \texttt{\{} x_1: s_1, ..., x_n: s_n \texttt{\}} \rrbracket\ \rho = \llbracket s_1 \rrbracket\ \rho(x_1)\ \text{and}\ ...\ \text{and}\ \llbracket s_n \rrbracket\ \rho(x_n)
 {{< /latex >}}
 As a concrete example, we might get:
 {{< latex >}}
 \llbracket \texttt{\{} \texttt{x}: +, \texttt{y}: - \texttt{\}} \rrbracket\ \rho = \rho(\texttt{x})\ \texttt{>}\ 0\ \text{and}\ \rho(\texttt{y})\ \texttt{<}\ 0
 {{< /latex >}}
 This is pretty good, but not quite right. For instance, the initial state of
 the program --- before running the analysis --- assigns \(\bot\) to each
 element. This is true because our fixed-point algorithm [starts with the least
 element of the lattice]({{< relref "04_spa_agda_fixedpoint#start-least" >}}).
 But even for a single-variable map `{x: ⊥ }`, the semantic function above would
 give:
 {{< latex >}}
 \llbracket \texttt{\{} \texttt{x}: \bot \texttt{\}} \rrbracket\ \rho = \text{false}
 {{< /latex >}}
 That's clearly not right: our initial state should be possible, lest
 the entire proof be just a convoluted [_ex falso_](https://en.wikipedia.org/wiki/Principle_of_explosion)!
 There is another tricky aspect of our analysis, which is primarily defined
 [using the join (\(\sqcup\)) operator]({{< relref "08_spa_agda_forward#join-preds" >}}).
 Observe the following example:
 ```C
 // initial state: { x: ⊥ }
 if b {
  x = 1; // state: { x: + }
 } else {
  // state unchanged: { x: ⊥ }
 }
 // state: { x: + } ⊔ { x: ⊥ } = { x: + }
 ```
 Notice that in the final state, the sign of `x` is `+`, even though when
 `b` is `false`, the variable is never set. In a simple language like ours,
 without variable declaration points, this is probably the best we could hope
 for. The crucial observation, though, is that the oddness only comes into
 play with variables that are not set. In the "initial state" case, none
 of the variables have been modified; in the `else` case of the conditional,
 `x` was never assigned to. We can thus relax our condition to an if-then:
 if a variable is in our environment \(\rho\), then the variable-sign lattice's
 interpretation accurately describes it.
 {{< latex >}}
 \begin{array}{ccc}
 \llbracket \texttt{\{} x_1: s_1, ..., x_n: s_n \texttt{\}} \rrbracket\ \rho & = & & \textbf{if}\ x_1 \in \rho\ \textbf{then}\ \llbracket s_1 \rrbracket\ \rho(x_1)\ \\ & & \text{and} & ... \\ & & \text{and} &  \textbf{if}\ x_n \in \rho\ \textbf{then}\ \llbracket s_n \rrbracket\ \rho(x_n)
 \end{array}
 {{< /latex >}}
 The first "weird" case now results in the following:
 {{< latex >}}
 \llbracket \texttt{\{} \texttt{x}: \bot \texttt{\}} \rrbracket\ \rho = \textbf{if}\ \texttt{x} \in \rho\ \textbf{then}\ \text{false}
 {{< /latex >}}
 Which is just another way of saying:
 {{< latex >}}
 \llbracket \texttt{\{} \texttt{x}: \bot \texttt{\}} \rrbracket\ \rho = \texttt{x} \notin \rho
 {{< /latex >}}
 In the second case, the interpretation also results in a true statement:
 {{< latex >}}
 \llbracket \texttt{\{} \texttt{x}: + \texttt{\}} \rrbracket\ \rho = \textbf{if}\ \texttt{x} \in \rho\ \textbf{then}\ \texttt{x} > 0
 {{< /latex >}}
 In Agda, I encode the fact that a verified analysis needs a semantic function
 \(\llbracket\cdot\rrbracket\) for its element lattice \(L\) by taking such
 a function as an argument called `⟦_⟧ˡ`:
 {{< codelines "Agda" "agda-spa/Analysis/Forward.agda" 246 253 "hl_lines=5" >}}
 I then define the semantic function for the variable-sign lattice in the following
 way, which eschews the "..." notation in favor of a more Agda-compatible (and
 equivalent) form:
 {{< codelines "Agda" "agda-spa/Analysis/Forward.agda" 255 256 >}}
 The above reads roughly as follows:
 > For every variable `k` and sign [or, more generally, lattice element] `l` in
 > the variable map lattice, if `k` is in the environment `ρ`, then it satisfies
 > the predicate given by the semantic function applied to `l`.
 Let's recap: we have defined a semantic function for our sign lattice, and
 noted that to define a verified analysis, we always need such a semantic function.
 We then showed how to construct a semantic function for a whole variable map
 (of type \(\text{Variable} \to \text{Sign}\), or \(\text{Variable}\to L\)
 in general). We also wrote some Agda code doing all this. As a result, we
 have filled in the missing piece for {{< internalref "correctly-described" >}}property{{< /internalref >}}.
 However, the way that we brought in the semantic function in the Agda code
 above hints that there's more to be discussed. What's `latticeInterpretationˡ`?
 In answering that question, we'll provide evidence for
 {{< internalref "disjunction" >}}property{{< /internalref >}}
 and
 {{< internalref "equivalence" >}}property{{< /internalref >}}.
 ### Properties of the Semantic Function
 As we briefly saw earlier, we loosened the notion of equality to that equivalences,
 which made it possible to ignore things like the ordering of key-value pairs
 in maps. That's great and all, but nothing is stopping us from defining semantic functions that violate our equivalence!
 Supposing \(a \approx f(a)\), as far
 as Agda is concerned, even though \(a\) and \(f(a)\) are "equivalent",
 \(\llbracket a \rrbracket\) and \(\llbracket f(a) \rrbracket\) may be
 totally different. For a semantic function to be correct, it must produce
 the same predicate for equivalent elements of lattice \(L\). That's
 {{< internalref "equivalence" >}}missing piece{{< /internalref >}}.
 Another property of semantic functions that we will need to formalize
 is that \((\sqcup)\) represents disjunction.
 This comes into play when we reason about the correctness of predecessors in
 a Control Flow Graph. Recall that during the last step of processing a given node,
 when we are trying to move on to the next node in the trace, we have knowledge
 that the current node's variable map accurately describes the intermediate
 environment. In other words, \(\llbracket \textit{vs}_i \rrbracket\ \rho_2\) holds, where
 \(\textit{vs}_i\) is the variable map for the current node. We can generalize this
 kowledge a little, and get:
 {{< latex >}}
 \llbracket \textit{vs}_1 \rrbracket\ \rho_2\ \text{or}\ ...\ \text{or}\ \llbracket \textit{vs}_n \rrbracket\ \rho_2
 {{< /latex >}}
 However, the assumption that we _need_ to hold when moving on to a new node
 is in terms of \(\textit{JOIN}\), which combines all the predecessors' maps
 \(\textit{vs}_1, ..., \textit{vs}_n\) using \((\sqcup)\). Thus, we will need to be in a world where
 the following claim is true:
 {{< latex >}}
 \llbracket \textit{vs}_1 \sqcup ... \sqcup \textit{vs}_n \rrbracket\ \rho
 {{< /latex >}}
 To get from one to the other, we will need to rely explicitly on the fact
 that \((\sqcup)\) encodes "or". It's not necessary for the forward analysis,
 but a similar property ought to hold for \((\sqcap)\) and "and". This
 constraint provides {{< internalref "disjunction" >}}missing piece{{< /internalref >}}.
 I defined a new data type that bundles a semantic function with proofs of
 the properties in this section; that's precisely what `latticeInterpretationˡ`
 is:
 {{< codelines "Agda" "agda-spa/Language/Semantics.agda" 66 73 >}}
 In short, to leverage the framework for verified analysis, you would need to
 provide a semantic function that interacts properly with `≈` and `∨`.
 ### Correctness of the Evaluator
 All that's left is {{< internalref "abstract-interpretation" >}}the last missing piece, {{< /internalref >}},
 which requires that `eval` matches the semantics of our language. Recall
 the signature of `eval`:
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 166 166 >}}
 It operates on expressions and variable maps, which themselves associate a
 sign (or, generally, an element of lattice \(L\)), with each variable. The
 "real" evaluation judgement, on the other hand, is in the form
 \(\rho, e \Downarrow v\), and reads "expression \(e\) in environment \(\rho\)
 evaluates to value \(v\)". In Agda:
 {{< codelines "agda" "agda-spa/Language/Semantics.agda" 27 27 >}}
 Let's line up the types of `eval` and the judgement. I'll swap the order of arguments
 for `eval` to make the correspondence easier to see:
 {{< latex >}}
 \begin{array}{ccccccc}
 \text{eval} & : & (\text{Variable} \to \text{Sign}) & \to & \text{Expr} & \to & \text{Sign} \\
 \cdot,\cdot\Downarrow\cdot & : & (\text{Variable} \to \text{Value}) & \to & \text{Expr} & \to & \text{Value} & \to & \text{Set} \\
 & & \underbrace{\phantom{(\text{Variable} \to \text{Value})}}_{\text{environment-like inputs}} & & & & \underbrace{\phantom{Value}}_{\text{value-like outputs}}
 \end{array}
 {{< /latex >}}
 Squinting a little, it's almost like the signature of `eval` is the signature
 for the evaluation judgement, but it forgets a few details (the exact values
 of the variables) in favor of abstractions (their signs). To show that `eval`
 behaves correctly, we'll want to prove that this forgetful correspondence holds.
 Concretely, for any expression \(e\), take some environment \(\rho\), and "forget"
 the exact values, getting a sign map \(\textit{vs}\). Now, evaluate the expression
 to some value \(v\) using the semantics, and also, compute the expression's
 expected sign \(s\) using `eval`. The sign should be the same as forgetting
 \(v\)'s exact value. Mathematically,
 {{< latex >}}
 \forall e, \rho, v, \textit{vs}.\ \textbf{if}\ \llbracket\textit{vs}\rrbracket \rho\ \text{and}\ \rho, e \Downarrow v\ \textbf{then}\ \llbracket \text{eval}\ \textit{vs}\ e\rrbracket v
 {{< /latex >}}
 In Agda:
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 286 287 >}}
 For a concrete analysis, we need to prove the above claim. In the case of
 sign analysis, this boils down to a rather cumbersome proof by cases. I will collapse
 the proofs to save some space and avoid overwhelming the reader.
 {{< codelines "agda" "agda-spa/Analysis/Sign.agda" 237 258 "" "**(Click here to expand the proof of correctness for plus)**" >}}
 {{< codelines "agda" "agda-spa/Analysis/Sign.agda" 261 282 "" "**(Click here to expand the proof of correctness for minus)**" >}}
 {{< codelines "agda" "agda-spa/Analysis/Sign.agda" 284 294 "" >}}
 This completes {{< internalref "abstract-interpretation" >}}our last missing piece,{{< /internalref >}}.
 All that's left is to put everything together.
 ### Proving The Analysis Correct
 #### Lifting Expression Evaluation Correctness to Statements
 The individual analyses (like the sign analysis) provide only an evaluation
 function for _expressions_, and thus only have to prove correctness of
 that function. However, our language is made up of statements, with judgements
 in the form \(\rho, s \Rightarrow \rho'\). Now that we've shown (or assumed)
 that `eval` behaves correctly when evaluating expressions, we should show
 that this correctness extends to evaluating statements, which in the
 forward analysis implementation is handled by the
 [`updateVariablesFromStmt` function]({{< relref "08_spa_agda_forward#define-updateVariablesFromStmt" >}}).
 The property we need to show looks very similar to the property for `eval`:
 {{< latex >}}
 \forall b, \rho, \rho', \textit{vs}.\ \textbf{if}\ \llbracket\textit{vs}\rrbracket \rho\ \text{and}\ \rho, b \Rightarrow \rho'\ \textbf{then}\ \llbracket \text{updateVariablesFromStmt}\ \textit{vs}\ b\rrbracket \rho'
 {{< /latex >}}
 In Agda:
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 291 291 >}}
 The proof is straightforward, and relies on the semantics of the [map update]({{< relref "08_spa_agda_forward#generalized-update" >}}).
 Specifically, in the case of an assignment statement \(x \leftarrow e\), all we
 do is store the new sign computed from \(e\) into the map at \(x\). To
 prove the correctness of the entire final environment \(\rho'\), there are
 two cases to consider:
 * A variable in question is the newly-updated \(x\). In this case, since
  `eval` produces correct signs, the variable clearly has the correct sign.
  This is the first highlighted chunk in the below code.
 * A variable in question is different from \(x\). In this case, its value
  in the environment \(\rho'\) should be the same as it was prior, and
  its sign in the updated variable map is the same as it was in the original.
  Since the original map correctly described the original environment, we know
  the sign is correct. This is the second highlighted chunk in the below
  code.
 The corresponding Agda proof is as follows:
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 291 305 "hl_lines=5-7 10-15" >}}
 From this, it follows with relative ease that each basic block in the lattice,
 when evaluated, produces an environment that matches the prediction of our
 forward analysis.
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 318 318 >}}
 #### Walking the Trace
 Finally, we get to the meat of the proof, which follows the [outline](#high-level-algorithm). First,
 let's take a look at `stepTrace`, which implements the second bullet in
 our iterative procedure. I'll show the code, then we can discuss it
 in detail.
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 324 342 >}}
 The first `let`-bound variable, `⟦joinAll-result⟧ρ₁` is kind of an intermediate
 result, which I was forced to introduced because `rewrite` caused Agda to
 allocate ~100GB of memory. It simply makes use of the fact that `joinAll`, the
 function that performs predecessor joining for each node in the CFG, sets
 every key of the map accordingly.
 The second `let`-bound variable, `⟦analyze-result⟧`, steps through a given
 node's basic block and leverages our proof of statement-correctness to validate
 that the final environment `ρ₂` matches the predication of the analyzer.
 The last two `let`-bound variables apply the equation we wrote above:
 {{< latex >}}
 r = \text{update}(\text{join}(r))
 {{< /latex >}}
 Recall that `analyze` is the combination of `update` and `join`:
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 226 227 >}}
 Finally, the `in` portion of the code uses `⟦⟧ᵛ-respects-≈ᵛ`, a proof
 of {{< internalref "equivalence" >}}property{{< /internalref >}}, to produce
 the final claim in terms of the `result` map.
 Knowing how to step, we can finally walk the entire trace, implementing
 the iterative process:
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 344 357 >}}
 The first step --- assuming that one of the predecessors of the
 current node satisfies the initial environment `ρ₁` --- is captured by
 the presence of the argument `⟦joinForKey-s₁⟧ρ₁`. We expect the calling code
 to provide a proof of that.
 The second step, in both cases, is implemented using `stepTrace`,
 as we saw above. That results in a proof that at the end of the current basic
 block, the final environment `ρ₂` is accurately described.
 From there, we move on to the third iterative step, if necessary. The
 sub-expression `edge⇒incoming s₁→s₂` validates that, since we have an edge
 from the current node to the next, we are listed as a predecessor. This,
 in turn, means that we are included in the list of states-to-join for the
 \(\textit{JOIN}\) function. That fact is stored in `s₁∈incomingStates`.
 Finally, relying on
 {{< internalref "disjunction" >}}property{{< /internalref >}},
 we construct an assumption fit for a recursive invocation of `walkTrace`,
 and move on to the next CFG node. The `foldr` here is motivated by the fact
 that "summation" using \((\sqcup)\) is a fold.
 When the function terminates, what we have is a proof that the final program
 state is accurately described by the results of our program analysis. All
 that's left is to kick off the walk. To do that, observe that the initial state
 has no predecessors (how could it, if it's at the beginning of the program?).
 That, in turn, means that this state maps every variable to the bottom element.
 Such a variable configuration only permits the empty environment \(\rho = \varnothing\).
 If the program evaluation starts in an empty environment, we have the assumption
 needed to kick off the iteration.
 {{< codelines "agda" "agda-spa/Analysis/Forward.agda" 359 366 "hl_lines=7" >}}
 Take a look at the highlighted line in the above code block in particular.
 It states precisely what we were hoping to see: that, when evaluating
 a program, the final state when it terminates is accurately described by
 the `result` of our static program analysis at the `finalState` in the CFG.
 We have done it!
 ### Future Work
 It took a lot of machinery to get where we are, but there's still lots of
 things to do.
 1. __Correctness beyond the final state__: the statement we've arrived at
   only shows that the final state of the program matches the results of
   the analysis. In fact, the property hold for all intermediate states, too.
   The only snag is that it's more difficult to _state_ such a claim.
   To do something like that, we probably need a notion of "incomplete evaluations"
   of our language, which run our program but stop at some point before the end.
   A full execution would be a special case of such an "incomplete evaluation"
   that stops in the final state. Then, we could restate `analyze-correct`
   in terms of partial evaluations, which would strengthen it.
 2. __A more robust language and evaluation process__: we noted above that
   our join-based analysis is a little bit weird, particularly in the
   cases of uninitialized variables. There are ways to adjust our language
   (e.g., introducing variable declaration points) and analysis functions
   (e.g., only allowing assignment for declared variables) to reduce
   the weirdness somewhat. They just lead to a more complicated language.
 3. __A more general correctness condition__: converting lattice elements into
   predicates on values gets us far. However, some types of analyses make claims
   about more than the _current_ values of variables. For instance, _live variable
   analysis_ checks if a variable's current value is going to be used in the
   future. Such an analysis can help guide register (re)allocation. To
   talk about future uses of a variable, the predicate will need to be formulated
   in terms of the entire evaluation proof tree. This opens a whole can
   of worms that I haven't begun to examine.
 Now that I'm done writing up my code so far, I will start exploring these
 various avenues of work. In the meantime, though, thanks for reading!
--- a/content/blog/chapel_runtime_types.md
+++ b/content/blog/chapel_runtime_types.md
@@ -0,0 +1,591 @@
 ---
 title: "Chapel's Runtime Types as an Interesting Alternative to Dependent Types"
 date: 2025-03-02T22:52:01-08:00
 tags: ["Chapel", "C++", "Idris", "Programming Languages"]
 description: "In this post, I discuss Chapel's runtime types as a limited alternative to dependent types."
 ---
 One day, when I was in graduate school, the Programming Languages research
 group was in a pub for a little gathering. Amidst beers, fries, and overpriced
 sandwiches, the professor and I were talking about [dependent types](https://en.wikipedia.org/wiki/Dependent_type). Speaking
 loosely and imprecisely, these are types that are somehow constructed from
 _values_ in a language, like numbers.
 For example, in C++, [`std::array`](https://en.cppreference.com/w/cpp/container/array)
 is a dependent type. An instantiation of the _type_ `array`, like `array<string, 3>`
 is constructed from the type of its elements (here, `string`) and a value
 representing the number of elements (here, `3`). This is in contrast with types
 like `std::vector`, which only depends on a type (e.g., `vector<string>` would
 be a dynamically-sized collection of strings).
 I was extolling the virtues of general dependent types, like you might find
 in [Idris](https://www.idris-lang.org/) or [Agda](https://agda.readthedocs.io/en/latest/getting-started/what-is-agda.html):
 more precise function signatures! The
 {{< sidenote "right" "curry-howard-note" "Curry-Howard isomorphism!" >}}
 The Curry-Howard isomorphism is a common theme on this blog. I've
 <a href="{{< relref "typesafe_interpreter_revisited#curry-howard-correspondence" >}}">
 written about it myself</a>, but you can also take a look at the
 <a href="https://en.wikipedia.org/wiki/Curry%E2%80%93Howard_correspondence">
 Wikipedia page</a>.
 {{< /sidenote >}} The professor was skeptical. He had been excited about
 dependent types in the past, but nowadays he felt over them. They were cool, he
 said, but there are few practical uses. In fact, he posed a challenge:
 > Give me one good reason to use dependent types in practice that doesn't
 > involve keeping track of bounds for lists and matrices!
 {#bounds-quote}
 This challenge alludes to fixed-length lists -- [vectors](https://agda.github.io/agda-stdlib/master/Data.Vec.html)
 -- which are one of the first dependently-typed data structures one learns about.
 Matrices are effectively vectors-of-vectors. In fact, even in giving my introductory
 example above, I demonstrated the C++ equivalent of a fixed-length list, retroactively
 supporting the professor's point.
 It's not particularly important to write down how I addressed the challenge;
 suffice it to say that the notion resonated with some of the other
 students present in the pub. In the midst of practical development, how much
 of dependent types' power can you leverage, and how much power do you pay
 for but never use?
 A second round of beers arrived. The argument was left largely unresolved,
 and conversation flowed to other topics. Eventually, I graduated, and started
 working on the [Chapel language](https://chapel-lang.org/) team (I also
 [write on the team's blog](https://chapel-lang.org/blog/authors/daniel-fedorin/)).
 When I started looking at Chapel programs, I could not believe my eyes...
 ### A Taste of Chapel's Array Types
 Here's a simple Chapel program that creates an array of 10 integers.
 ```Chapel
 var A: [0..9] int;
 ```
 Do you see the similarity to the `std::array` example above? Of course, the
 syntax is quite different, but in _essence_ I think the resemblance is
 uncanny. Let's mangle the type a bit --- producing invalid Chapel programs ---
 just for the sake of demonstration.
 ```Chapel
 var B: array(0..9, int); // first, strip the syntax sugar
 var C: array(int, 0..9); // swap the order of the arguments to match C++
 ```
 Only one difference remains: in C++, arrays are always indexed from zero. Thus,
 writing `array<int, 10>` would implicitly create an array whose indices start
 with `0` and end in `9`. In Chapel, array indices can start at values other
 than zero (it happens to be useful for elegantly writing numerical programs),
 so the type explicitly specifies a lower and a higher bound. Other than that,
 though, the two types look very similar.
 In general, Chapel arrays have a _domain_, typically stored in variables like `D`.
 The domain of `A` above is `{0..9}`. This domain is part of the array's type.
 Before I move on, I'd like to pause and state a premise that is crucial
 for the rest of this post: __I think knowing the size of a data structure,
 like `std::array` or Chapel's `[0..9] int`, is valuable__. If this premise
 were not true, there'd be no reason to prefer `std::array` to `std::vector`, or
 care that Chapel has indexed arrays. However, having this information
 can help in numerous ways, such as:
 * __Enforcing compatible array shapes.__ For instance, the following Chapel
  code would require two arrays passed to function `foo` to have the same size.
  ```Chapel
  proc doSomething(people: [?D] person, data: [D] personInfo) {}
  ```
  Similarly, we can enforce the fact that an input to a function has the same shape
  as the output:
  ```Chapel
  proc transform(input: [?D] int): [D] string;
  ```
 * __Consistency in generics__. Suppose you have a generic function that declares
  a new variable of a given type, and just returns it:
  ```Chapel
  proc defaultValue(type argType) {
    var x: argType;
    return x;
  }
  ```
  Code like this exists in "real" Chapel software, by the way --- the example
  is not contrived. By including the bounds etc. into the array type, we can
  ensure that `x` is appropriately allocated. Then, `defaultValue([1,2,3].type)`
  would return an array of three default-initialized integers.
 * __Eliding boundary checking__. Boundary checking is useful for safety,
  since it ensures that programs don't read or write past the end of allocated
  memory. However, bounds checking is also slow. Consider the following function that
  sums two arrays:
  ```Chapel
  proc sumElementwise(A: [?D] int, B: [D] int) {
    var C: [D] int;
    for idx in D do
      C[idx] = A[idx] + B[idx];
  }
  ```
  Since arrays `A`, `B`, and `C` have the same domain `D`, we don't need
  to do bound checking when accessing any of their elements. I don't believe
  this is currently an optimisation in Chapel, but it's certainly on the
  table.
 * __Documentation__. Including the size of the array as part of type
  signature clarifies the intent of the code being written. For instance,
  in the following function:
  ```Chapel
  proc sendEmails(numEmails: int, destinationAddrs: [1..numEmails] address) { /* ... */ }
  ```
  It's clear from the type of the `destinationAddrs`s that there ought to
  be exactly as many `destinationAddrs` as the number of emails that should
  be sent.
 Okay, recap: C++ has `std::array`, which is a dependently-typed container
 that represents an array with a fixed number of elements. Chapel has something
 similar. I think these types are valuable.
 At this point, it sort of looks like I'm impressed with Chapel for copying a C++
 feature from 2011. Not so! As I played with Chapel programs more and more,
 arrays miraculously supported patterns that I knew I couldn't write in C++.
 The underlying foundation of Chapel's array types is quite unlike any other.
 Before we get to that, though, let's take a look at how dependent types
 are normally used (by us mere mortal software engineers).
 ### Difficulties with Dependent Types
 Let's start by looking at a simple operation on fixed-length lists: reversing them.
 One might write a reverse function for "regular" lists, ignoring details
 like ownership, copying, that looks like this:
 ```C++
 std::vector<int> reverse(std::vector<int>);
 ```
 This function is not general: it won't help us reverse lists of
 strings, for instance. The "easy fix" is to replace `int` with some kind
 of placeholder that can be replaced with any type.
 ```C++
 std::vector<T> reverse(std::vector<T>);
 ```
 You can try compiling this code, but you will immediately run into an error.
 What the heck is `T`? Normally,
 when we name a variable, function, or type (e.g., by writing `vector`, `reverse`),
 we are referring to its declaration somewhere else. At this time, `T` is not
 declared anywhere. It just "appears" in our function's type. To fix this,
 we add a declaration for `T` by turning `reverse` into a template:
 ```C++
 template <typename T>
 std::vector<T> reverse(std::vector<T>);
 ```
 The new `reverse` above takes two arguments: a type and a list of values of
 that type. So, to _really_ call this `reverse`, we need to feed the type
 of our list's elements into it. This is normally done automatically
 (in C++ and otherwise) but under the hood, invocations might look like this:
 ```C++
 reverse<int>({1,2,3});              // produces 3, 2, 1
 reverse<string>({"world", "hello"}) // produces "hello", "world"
 ```
 This is basically what we have to do to write `reverse` on `std::array`, which,
 includes an additional parameter that encodes its length. We might start with
 the following (using `n` as a placeholder for length, and observing that
 reversing an array doesn't change its length):
 ```C++
 std::array<T, n> reverse(std::array<T, n>);
 ```
 Once again, to make this compile, we need to add template parameters for `T` and `n`.
 ```C++
 template <typename T, size_t n>
 std::array<T, n> reverse(std::array<T, n>);
 ```
 Now, you might be asking...
 {{< dialog >}}
 {{< message "question" "reader" >}}
 This section is titled "Difficulties with Dependent Types". What's the difficulty?
 {{< /message >}}
 {{< /dialog >}}
 Well, here's the kicker. C++ templates are a __compile-time mechanism__. As
 a result, arguments to `template` (like `T` and `n`) must be known when the
 program is being compiled. This, in turn, means
 {{< sidenote "right" "deptype-note" "the following program doesn't work:" >}}
 The observant reader might have noticed that one of the Chapel programs we
 saw above, <code>sendEmails</code>, does something similar. The
 <code>numEmails</code> argument is used in the type of the
 <code>destinationAddrs</code> parameter. That program is valid Chapel.
 {{< /sidenote >}}
 ```C++
 void buildArray(size_t len) {
  std::array<int, len> myArray;
  // do something with myArray
 }
 ```
 You can't use these known-length types like `std::array` with any length
 that is not known at compile-time. But that's a lot of things! If you're reading
 from an input file, chances are, you don't know how big that file is. If you're
 writing a web server, you likely don't know the length the HTTP requests.
 With every setting a user can tweak when running your code, you sacrifice the
 ability to use templated types.
 Also, how do you _return_ a `std::array`? If the size of the returned array is
 known in advance, you just list that size:
 ```C++
 std::array<int, 10> createArray();
 ```
 If the size is not known at compile-time, you might want to do something like
 the following --- using an argument `n` in the type of the returned array ---
 but it would not compile:
 ```C++
 auto computeNNumbers(size_t n) -> std::array<int, n>; // not valid C++
 ```
 Moreover, you actually can't use `createArray` to figure out the required
 array size, and _then_ return an array that big, even if in the end you
 only used compile-time-only computations in the body of `createArray`.
 What you would need is to provide a "bundle" of a value and a type that is somehow
 built from that value.
 ```C++
 // magic_pair is invented syntax, will not even remotely work
 auto createArray() -> magic_pair<size_t size, std::array<int, size>>;
 ```
 This pair contains a `size` (suppose it's known at compilation time for
 the purposes of appeasing C++) as well as an array that uses that `size`
 as its template argument. This is not real C++ -- not even close -- but
 such pairs are a well-known concept. They are known as
 [dependent pairs](https://unimath.github.io/agda-unimath/foundation.dependent-pair-types.html),
 or, if you're trying to impress people, \(\Sigma\)-types. In Idris, you
 could write `createArray` like this:
 ```Idris
 createArray : () -> (n : Nat ** Vec n Int)
 ```
 There are languages out there -- that are not C++, alas -- that support
 dependent pairs, and as a result make it more convenient to use types that
 depend on values. Not only that, but a lot of these languages do not force
 dependent types to be determined at compile-time. You could write that
 coveted `readArrayFromFile` function:
 ```Idris
 readArrayFromFile : String -> IO (n : Nat ** Vec n String)
 ```
 Don't mind `IO`; in pure languages like Idris, this type is a necessity when
 interacting when reading data in and sending it out. The key is that
 `readArrayFromFile` produces, at runtime, a pair of `n`, which is the size
 of the resulting array, and a `Vec` of that many `String`s (e.g., one string
 per line of the file).
 Dependent pairs are cool and very general. However, the end result of
 types with bounds which are not determined at compile-time is that you're
 _required_ to use dependent pairs. Thus, you must always carry the array's length
 together with the array itself.
 The bottom line is this:
 * In true dependently typed languages, a type that depends on a value (like `Vec`
  in Idris) lists that value in its type. When this value is listed by
  referring to an identifier --- like `n` in `Vec n String` above --- this
  identifier has to be defined somewhere, too. This necessitates dependent pairs,
  in which the first element is used syntactically as the "definition point"
  of a type-level value. For example, in the following piece of code:
  ```Idris
  (n : Nat ** Vec n String)
  ```
  The `n : Nat` part of the pair serves both to say that the first element
  is a natural number, and to introduce a variable `n` that refers to
  this number so that the second type (`Vec n String`) can refer to it.
  A lot of the time, you end up carrying this extra value (bound to `n` above)
  with your type.
 * In more mainstream languages, things are even more restricted: dependently
  typed values are a compile-time property, and thus, cannot be used with
  runtime values like data read from a file, arguments passed in to a function,
  etc..
 ### Hiding Runtime Values from the Type
 Let's try to think of ways to make things more convenient. First of all, as
 we saw, in Idris, it's possible to use runtime values in types. Not only that,
 but Idris is a compiled language, so presumably we can compile dependently typed programs
 with runtime-enabled dependent types. The trick is to forget some information:
 turn a vector `Vec n String` into two values (the size of the vector and the
 vector itself), and forget -- for the purposes of generating code -- that they're
 related. Whenever you pass in a `Vec n String`, you can compile that similarly
 to how you'd compile passing in a `Nat` and `List String`. Since the program has
 already been type checked, you can be assured that you don't encounter cases
 when the size and the actual vector are mismatched, or anything else of that
 nature.
 Additionally, you don't always need the length of the vector at all. In a
 good chunk of Idris code, the size arguments are only used to ensure type
 correctness and rule out impossible cases; they are never accessed at runtime.
 As a result, you can _erase_ the size of the vector altogether. In fact,
 [Idris 2](https://github.com/idris-lang/Idris2/) leans on [Quantitative Type Theory](https://bentnib.org/quantitative-type-theory.html)
 to make erasure easier.
 At this point, one way or another, we've "entangled" the vector with a value
 representing its size:
 * When a vector of some (unknown, but fixed) length needs to be produced from
  a function, we use dependent pairs.
 * Even in other cases, when compiling, we end up treating a vector as a
  length value and the vector itself.
 Generally speaking, a good language design practice is to hide extraneous
 complexity, and to remove as much boilerplate as necessary. If the size
 value of a vector is always joined at the hip with the vector, can we
 avoid having to explicitly write it?
 This is pretty much exactly what Chapel does. It _allows_ explicitly writing
 the domain of an array as part of its type, but doesn't _require_ it. When
 you do write it (re-using my original snippet above):
 ```Chapel
 var A: [0..9] int;
 ```
 What you are really doing is creating a value (the [range](https://chapel-lang.org/docs/primers/ranges.html) `0..9`),
 and entangling it with the type of `A`. This is very similar to what a language
 like Idris would do under the hood to compile a `Vec`, though it's not quite
 the same.
 At the same time, you can write code that omits the bounds altogether:
 ```Chapel
 proc processArray(A: [] int): int;
 proc createArray(): [] int;
 ```
 In all of these examples, there is an implicit runtime value (the bounds)
 that is associated with the array's type. However, we are never forced to
 explicitly thread through or include a size. Where reasoning about them is not
 necessary, Chapel's domains are hidden away. Chapel refers to the implicitly
 present value associated with an array type as its _runtime type_.
 I hinted earlier that things are not quite the same in this representation
 as they are in my simplified model of Idris. In Idris, as I mentioned earlier,
 the values corresponding to vectors' indices can be erased if they are not used.
 In Chapel, this is not the case --- a domain always exists at runtime. At the
 surface level, this means that you may pay for more than what you use. However,
 domains enable a number of interesting patterns of array code. We'll get
 to that in a moment; first, I want to address a question that may be on
 your mind:
 {{< dialog >}}
 {{< message "question" "reader" >}}
 At this point, this looks just like keeping a <code>.length</code> field as
 part of the array value. Most languages do this. What's the difference
 between this and Chapel's approach?
 {{< /message >}}
 {{< /dialog >}}
 This is a fair question. The key difference is that the length exists even if an array
 does not. The following is valid Chapel code (re-using the `defaultValue`
 snippet above):
 ```Chapel
 proc defaultValue(type argType) {
  var x: argType;
  return x;
 }
 proc doSomething() {
  type MyArray = [1..10] int;
  var A = defaultValue(MyArray);
 }
 ```
 Here, we created an array `A` with the right size (10 integer elements)
 without having another existing array as a reference. This might seem like
 a contrived example (I could've just as well written `var A: [1..10] int`),
 but the distinction is incredibly helpful for generic programming. Here's
 a piece of code from the Chapel standard library, which implements
 a part of Chapel's [reduction](https://chapel-lang.org/docs/primers/reductions.html) support:
 {{< githubsnippet "chapel-lang/chapel" "e8ff8ee9a67950408cc6d4c3220ac647817ddae3" "modules/internal/ChapelReduce.chpl" "Chapel" 146 >}}
    inline proc identity {
      var x: chpl__sumType(eltType); return x;
    }
 {{< /githubsnippet >}}
 Identity elements are important when performing operations like sums and products,
 for many reasons. For one, they tell you what the sum (e.g.) should be when there
 are no elements at all. For another, they can be used as an initial value for
 an accumulator. In Chapel, when you are performing a reduction, there is a
 good chance you will need several accumulators --- one for each thread performing
 a part of the reduction.
 That `identity` function looks almost like `defaultValue`! Since it builds the
 identity element from the type, and since the type includes the array's dimensions,
 summing an array-of-arrays, even if it's empty, will produce the correct output.
 ```Chapel
 type Coordinate = [1..3] real;
 var Empty: [0..<0] Coordinate;
 writeln(+ reduce Empty); // sum up an empty list of coordinates
 ```
 As I mentioned before, having the domain be part of the type can also enable
 indexing optimizations --- without any need for [interprocedural analysis](https://en.wikipedia.org/wiki/Interprocedural_optimization) ---
 in functions like `sumElementwise`:
 ```Chapel
 proc sumElementwise(A: [?D] int, B: [D] int) {
  var C: [D] int;
  for idx in D do
    C[idx] = A[idx] + B[idx];
 }
 ```
 The C++ equivalent of this function -- using `vectors` to enable arbitrary-size
 lists of numbers read from user input, and `.at` to enable bounds checks ---
 does not include enough information for this optimization to be possible.
 ```C++
 void sumElementwise(std::vector<int> A, std::vector<int> B) {
  std::vector<int> C(A.size());
  for (size_t i = 0; i < A.size(); i++) {
    C.at(i) = A.at(i) + B.at(i);
  }
 }
 ```
 All in all, this makes for a very interesting mix of features:
 * __Chapel arrays have their bounds as part of types__, like `std::array` in C++
  and `Vec` in Idris. This enables all the benefits I've described above.
 * __The bounds don't have to be known at compile-time__, like all dependent
  types in Idris. This means you can read arrays from files (e.g.) and still
  reason about their bounds as part of the type system.
 * __Domain information can be hidden when it's not used__, and does not require
  explicit additional work like template parameters or dependent pairs.
 Most curiously, runtime types only extend to arrays and domains. In that sense,
 they are not a general purpose replacement for dependent types. Rather,
 they make arrays and domains special, and single out the exact case my
 professor was [talking about in the introduction](#bounds-quote). Although
 at times I've [twisted Chapel's type system in unconventional ways](https://chapel-lang.org/blog/posts/linear-multistep/)
 to simulate dependent types, rarely have I felt a need for them while
 programming in Chapel. In that sense --- and in the "practical software engineering"
 domain --- I may have been proven wrong.
 ### Pitfalls of Runtime Types
 Should all languages do things the way Chapel does? I don't think so. Like
 most features, runtime types like that in Chapel are a language design
 tradeoff. Though I've covered their motivation and semantics, perhaps
 I should mention the downsides.
 The greatest downside is that, generally speaking, _types are not always a
 compile-time property_. We saw this earlier with `MyArray`:
 ```Chapel
 type MyArray = [1..10] int;
 ```
 Here, the domain of `MyArray` (one-dimensional with bounds `1..10`) is a runtime
 value. It has an
 {{< sidenote "right" "dce-note" "execution-time cost." >}}
 The execution-time cost is, of course, modulo <a href="https://en.wikipedia.org/wiki/Dead-code_elimination">dead code elimination</a> etc.. If
 my snippet made up the entire program being compiled, the end result would
 likely do nothing, since <code>MyArray</code> isn't used anywhere.
 {{< /sidenote >}}
 Moreover, types that serve as arguments to functions (like `argType` for
 `defaultValue`), or as their return values (like the result of `chpl__sumType`)
 also have an execution-time backing. This is quite different from most
 compiled languages. For instance, in C++, templates are "stamped out" when
 the program is compiled. A function with a `typename T` template parameter
 called with type `int`, in terms of generated code, is always the same as
 a function where you search-and-replaced `T` with `int`. This is called
 [monomorphization](https://en.wikipedia.org/wiki/Monomorphization), by the
 way. In Chapel, however, if the function is instantiated with an array type,
 it will have an additional parameter, which represents the runtime component
 of the array's type.
 The fact that types are runtime entities means that compile-time type checking
 is insufficient. Take, for instance, the above `sendEmails` function:
 ```Chapel
 proc sendEmails(numEmails: int, destinationAddrs: [1..numEmails] address) { /* ... */ }
 ```
 Since `numEmails` is a runtime value (it's a regular argument!), we can't ensure
 at compile-time that a value of some array matches the `[1..numEmails] address`
 type. As a result, Chapel defers bounds checking to when the `sendEmails`
 function is invoked.
 This leads to some interesting performance considerations. Take two Chapel records
 (similar to `struct`s in C++) that simply wrap a value. In one of them,
 we provide an explicit type for the field, and in the other, we leave the field
 type generic.
 ```Chapel
 record R1 { var field: [1..10] int; }
 record R2 { var field; }
 var A = [1,2,3,4,5,6,7,8,9,10];
 var r1 = new R1(A);
 var r2 = new R2(A);
 ```
 In a conversation with a coworker, I learned that these are not the same.
 That's because the record `R1` explicitly specifies a type
 for `field`. Since the type has a runtime component, the constructor
 of `R1` will actually perform a runtime check to ensure that the argument
 has 10 elements. `R2` will not do this, since there isn't any other type
 to check against.
 Of course, the mere existence of an additional runtime component is a performance
 consideration. To ensure that Chapel programs perform as well as possible,
 the Chapel standard library attempts to avoid using runtime components
 wherever possible. This leads to a distinction between a "static type"
 (known at compile-time) and a "dynamic type" (requiring a runtime value).
 The `chpl__sumType` function we saw mentioned above uses static components of
 types, because we don't want each call to `+ reduce` to attempt to run a number
 of extraneous runtime queries.
 ### Conclusion
 Though runtime types are not a silver bullet, I find them to be an elegant
 middle-ground solution to the problem of tracking array bounds. They enable
 optimizations, generic programming, and more, without the complexity of
 a fully dependently-typed language. They are also quite unlike anything I've
 seen in any other language.
 What's more, this post only scratches the surface of what's possible using
 arrays and domains. Besides encoding array bounds, domains include information
 about how an array is distributed across several nodes (see the
 [distributions primer](https://chapel-lang.org/docs/primers/distributions.html)),
 and how it's stored in memory (see the [sparse computations](https://chapel-lang.org/blog/posts/announcing-chapel-2.3/#sparse-computations)
 section of the recent 2.3 release announcement). In general, they are a very
 flavorful component to Chapel's "special sauce" as a language for parallel
 computing.
 You can read more about arrays and domains in the [corresponding primer](https://chapel-lang.org/docs/primers/arrays.html).
--- a/content/blog/i_love_programming_languages/cardeli-products.png
+++ b/content/blog/i_love_programming_languages/cardeli-products.png
--- a/content/blog/i_love_programming_languages/index.md
+++ b/content/blog/i_love_programming_languages/index.md
@@ -0,0 +1,415 @@
 ---
 title: "Reasons to Love the Field of Programming Languages"
 date: 2025-12-31
 tags: ["Programming Languages", "Compilers", "Type Systems"]
 ---
 I work at HPE on the
 [Chapel Programming Language](https://chapel-lang.org). Recently, another HPE
 person asked me:
 > So, you work on the programming language. What's next for you?
 This caught me off-guard because I hadn't even conceived of moving on.
 I don't want to move on, because __I love the field of programming languages__.
 In addition, I have come to think there is something in PL for everyone, from
 theorists to developers to laypeople.
 So, in that spirit, I am writing this list as a non-exhaustive survey that holds
 the dual purpose of explaining my personal infatuation with PL, and providing
 others with ways to engage with PL that align with their existing interests.
 I try to provide rationale for each claim, but you can just read the reasons
 themselves and skip the rest.
 My general thesis goes something like this: programming languages are a unique
 mix of the __inherently human and social__ and the __deeply mathematical__,
 a mix that often remains deeply grounded in the practical, __low-level realities of
 our hardware__.
 Personally, I find all of these properties equally important, but we have to
 start somewhere. Let's begin with the human aspect of programming languages.
 ### Human Aspects of PL
 > Programs must be written for people to read, and only incidentally for machines
 > to execute.
 >
 > --- Abelson & Sussman, _Structure and Interpretation of Computer Programs_.
 As we learn more about the other creatures that inhabit our world, we discover
 that they are similar to us in ways that we didn't expect. However, our
 language is unique to us. It gives us the ability to go far beyond
 the simple sharing of information: we communicate abstract concepts,
 social dynamics, stories. In my view, storytelling is our birthright more
 so than anything else.
 I think this has always been reflected in the broader discipline of programming.
 _Code should always tell a story_, I've heard throughout my education and career.
 _It should explain itself_. In paradigms such as
 [literate programming](https://en.wikipedia.org/wiki/Literate_programming),
 we explicitly mix prose and code. Notebook technologies
 like [Jupyter](https://jupyter.org/) intersperse computation with explanations
 thereof.
 * __Reason 1__: programming languages provide the foundation of expressing
  human thought and stories through code.
 From flowery prose to clinical report, human expression takes a wide variety
 of forms. The need to vary our descriptions is also well-served by the diversity
 of PL paradigms. From stateful transformations in languages like Python and C++,
 through pure and immutable functions in Haskell and Lean, to fully declarative
 statements-of-fact in Nix, various languages have evolved to
 support the many ways in which we wish to describe our world and our needs.
 * __Reason 2__: diverse programming languages enable different perspectives
  and ways of storytelling, allowing us choice in how to express our thoughts
  and solve our problems.
 Those human thoughts of ours are not fundamentally grounded in logic,
 mathematics, or anything else. They are a product of millennia of evolution
 through natural selection, of adaptation to ever-changing conditions.
 Our cognition is limited, rife with blind spots, and partial to the subject
 matter at hand. We lean on objects, actors, contracts, and more as helpful,
 mammal-compatible analogies. I find this to be beautiful; here is something
 we can really call ours.
 * __Reason 3__: programming languages imbue the universe's fundamental rules of
  computation with humanity's identity and idiosyncrasies. They carve out
  a home for us within impersonal reality.
 Storytelling (and, more generally, writing) is not just about communicating
 with others. Writing helps clarify one's own thoughts, and to think deeper.
 In his 1979 Turing Award lecture,
 [Notation as a Tool of Thought](https://www.eecg.utoronto.ca/~jzhu/csc326/readings/iverson.pdf),
 Kenneth Iverson, the creator of [APL](https://tryapl.org/), highlighted ways
 in which programming languages, with their notation, can help express patterns
 and facilitate thinking.
 Throughout computing history, programming languages built abstractions that ---
 together with advances in hardware --- made it possible to create ever more
 complex software. Dijkstra's
 [structured programming](https://en.wikipedia.org/wiki/Structured_programming)
 crystallized the familiar patterns of `if`/`else` and `while` out of
 a sea of control flow. Structures and objects partitioned data and state
 into bundles that could be reasoned about, or put out of mind when irrelevant.
 Recently, I dare say that notions of ownership and lifetimes popularized
 by Rust have clarified how we think about memory.
 * __Reason 4__: programming languages combat complexity, and give us tools to
  think and reason about unwieldy and difficult problems.
 The fight against complexity occurs on more battlegrounds than PL design alone.
 Besides its syntax and semantics, a programming language is comprised of its
 surrounding tooling: its interpreter or compiler, perhaps its package manager
 or even its editor. Language designers and developers take great care to
 [improve the quality of error messages](https://elm-lang.org/news/compiler-errors-for-humans),
 to provide [convenient editor tooling](https://chapel-lang.org/blog/posts/chapel-lsp/),
 and build powerful package managers
 like [Yarn](https://yarnpkg.com/). Thus, in each language project, there is
 room for folks who, even if they are not particularly interested in grammars or
 semantics, care about the user experience.
 * __Reason 5__: programming languages provide numerous opportunities for
  thoughtful forays into the realms of User Experience and Human-Computer
  Interaction.
 I hope you agree, by this point, that programming languages are fundamentally
 tethered to the human. Like any human endeavor, then, they don't exist in
 isolation. To speak a language, one usually wants a partner who understands
 and speaks that same language. Likely, one wants a whole community, topics
 to talk about, or even a set of shared beliefs or mythologies. This desire
 maps onto the realm of programming languages. When using a particular PL,
 you want to talk to others about your code, implement established design patterns,
 use existing libraries.
 I mentioned mythologies earlier. In some ways, language
 communities do more than share know-how about writing code. In many
 cases, I think language communities rally around ideals embodied by their
 language. The most obvious example seems to be Rust. From what I've seen,
 the Rust community believes in language design that protects its users
 from the pitfalls of low-level programming. The Go community
 believes in radical simplicity. Julia actively incorporates contributions from
 diverse research projects into an interoperable set of scientific packages.
 * __Reason 6__: programming languages are complex collaborative social projects
  that have the power to champion innovative ideas within the field of
  computer science.
 So far, I've presented interpretations of the field of PL as tools for expression and thought,
 human harbor to the universe's ocean, and collaborative social projects.
 These interpretations coexist and superimpose, but they are only a fraction of
 the whole. What has kept me enamored with PL is that it blends these human
 aspects with a mathematical ground truth, through fundamental connections to
 computation and mathematics.
 ### The Mathematics of PL
 > Like buses: you wait two thousand years for a definition of “effectively
 > calculable”, and then three come along at once.
 >
 > --- Philip Wadler, _Propositions as Types_
 There are two foundations,
 [lambda calculus](https://en.wikipedia.org/wiki/Lambda_calculus) and
 [Turing machines](https://en.wikipedia.org/wiki/Turing_machine), that underpin
 most modern PLs. The abstract notion of Turing machines
 is closely related to, and most similar among the "famous" computational models,
 to the
 [von Neumann Architecture](https://en.wikipedia.org/wiki/Von_Neumann_architecture).
 Through bottom-up organization of "control unit instructions" into
 "structured programs" into the imperative high-level languages today, we can
 trace the influence of Turing machines in C++, Python, Java, and many others.
 At the same time, and running on the same hardware functional programming
 languages like Haskell represent a chain of succession from the lambda calculus,
 embellished today with types and numerous other niceties. These two lineages
 are inseparably linked: they have been mathematically proven to be equivalent.
 They are two worlds coexisting.
 The two foundations have a crucial property in common: they are descriptions
 of what can be computed. Both were developed initially as mathematical formalisms.
 They are rooted not only in pragmatic concerns of "what can I do with
 these transistors?", but in the deeper questions of "what can be done
 with a computer?".
 * __Reason 7__: general-purpose programming languages are built on foundations of computation,
  and wield the power to compute anything we consider "effectively computable at all".
 Because of these mathematical beginnings, we have long had precise and powerful
 ways to talk about what code written in a particular language _means_.
 This is the domain of _semantics_. Instead of reference implementations
 of languages (CPython for Python, `rustc` for Rust), and instead of textual
 specifications, we can explicitly map constructs in languages either to
 mathematical objects ([denotational semantics](https://en.wikipedia.org/wiki/Denotational_semantics))
 or to (abstractly) execute them ([operational semantics](https://en.wikipedia.org/wiki/Operational_semantics)).
 To be honest, the precise and mathematical nature of these tools is, for me,
 justification enough to love them. However, precise semantics for languages
 have real advantages. For one, they allow us to compare programs' real
 behavior with what we _expect_, giving us a "ground truth" when trying to
 fix bugs or evolve the language. For another, they allow us to confidently
 make optimizations: if you can _prove_ that a transformation won't affect
 a program's behavior, but make it faster, you can safely use it. Finally,
 the discipline of formalizing programming language semantics usually entails
 boiling them down to their most essential components. Stripping the
 [syntax sugar](https://en.wikipedia.org/wiki/Syntactic_sugar) helps clarify
 how complex combinations of features should behave together.
 Some of these techniques bear a noticeable resemblance to the study of
 semantics in linguistics. Given our preceding discussion on the humanity
 of programming languages, perhaps that's not too surprising.
 * __Reason 8__: programming languages can be precisely formalized, giving
  exact, mathematical descriptions of how they should work.
 In talking about how programs behave, we run into an important limitation
 of reasoning about Turing machines and lambda calculus, stated precisely in
 [Rice's theorem](https://en.wikipedia.org/wiki/Rice%27s_theorem):
 all non-trivial semantic properties of programs (termination, throwing errors)
 are undecidable. There will always be programs that elude not only human analysis,
 but algorithmic understanding.
 It is in the context of this constraint that I like to think about type systems.
 The beauty of type systems, to me, is in how they tame the impossible.
 Depending on the design of a type system, a well-typed program may well be
 guaranteed not to produce any errors, or produce only the "expected" sort of
 errors. By constructing reasonable _approximations_ of program
 behavior, type systems allow us to verify that programs are well-behaved in
 spite of Rice's theorem. Much of the time, too, we can do so in a way that is
 straightforward for humans to understand and machines to execute.
 * __Reason 9__: in the face of the fundamentally impossible, type systems
  pragmatically grant us confidence in our programs for surprisingly little
  conceptual cost.
 At first, type systems look like engineering formalisms. That
 may well be the original intention, but in our invention of type systems,
 we have actually completed a quadrant of a deeper connection: the
 [Curry-Howard isomorphism](https://en.wikipedia.org/wiki/Curry%E2%80%93Howard_correspondence).
 [Propositions](https://en.wikipedia.org/wiki/Proposition), in the logical sense,
 correspond one-to-one with types of programs, and proofs of these propositions
 correspond to programs that have the matching type.
 This is an incredibly deep connection. In adding parametric polymorphism
 to a type system (think Java generics, or C++ templates without specialization),
 we augment the corresponding logic with the "for all x" (\(\forall x\)) quantifier.
 Restrict the copying of values in a way similar to Rust, and you get an
 [affine logic](https://en.wikipedia.org/wiki/Affine_logic), capable of reasoning about resources and their use.
 In languages like Agda with [dependent types](https://en.wikipedia.org/wiki/Dependent_type),
 you get a system powerful enough [to serve as a foundation for mathematics](https://en.wikipedia.org/wiki/Intuitionistic_type_theory).
 Suddenly, you can write code and mathematically prove properties about that
 code in the same language. I've done this in my work with
 [formally-verified static program analysis]({{< relref "series/static-program-analysis-in-agda" >}}).
 This connection proves appealing even from the perspective of "regular"
 mathematics. We have developed established engineering practices
 for writing code: review, deployment, documentation. What if we could use
 the same techniques for doing mathematics? What if, through the deep
 connection of programming languages to logic, we could turn mathematics
 into a computer-verified, collaborative endeavor?
 I therefore present:
 * __Reason 10__: type systems for programming languages deeply correspond
  to logic, allowing us to mathematically prove properties about code,
  using code, and to advance mathematics through the practices of software engineering.
 {{< details summary="Bonus meta-reason to love the mathy side of PL!" >}}
 In addition to the theoretical depth, I also find great enjoyment in the way that PL is practiced.
 Here more than elsewhere, creativity and artfulness come into
 play. In PL, [inference rules](https://en.wikipedia.org/wiki/Rule_of_inference) are a
 lingua franca through which the formalisms I've mentioned above are expressed
 and shared. They are such a central tool in the field that I've
 developed [a system for exploring them interactively]({{< relref "blog/bergamot" >}})
 on this blog.
 In me personally, inference rules spark joy. They are a concise and elegant
 way to do much of the formal heavy-lifting I described in this section;
 we use them for operational semantics, type systems, and sometimes more.
 When navigating the variety and complexity of the many languages and type
 systems out there, we can count on inference rules to take us directly to
 what we need to know. This same variety naturally demands flexibility in
 how rules are constructed, and what notation is used. Though this can sometimes
 be troublesome (one [paper](https://labs.oracle.com/pls/apex/f?p=LABS%3A0%3A%3AAPPLICATION_PROCESS%3DGETDOC_INLINE%3A%3A%3ADOC_ID%3A959")
 I've seen describes __27__ different ways of writing the simple operation of substitution in literature!),
 it also creates opportunities for novel and elegant ways of formalizing
 PL.
 * __Bonus Reason__: the field of programming languages has a standard technique
  for expressing its formalisms, which precisely highlights core concepts
  and leaves room for creative expression and elegance.
 {{< /details >}}
 I know that mathematics is a polarizing subject. Often, I find myself
 torn between wanting precision and eschewing overzealous formalism. The
 cusp between the two is probably determined by my own tolerance for abstraction.
 Regardless of how much abstraction you are interested in learning about,
 PL has another dimension, close to the ground: more often than not, our languages
 need to execute on real hardware.
 ### Pragmatics of PL
 Your perfectly-designed language can be completely useless if there is no
 way to
 {{< sidenote "right" "execute-note" "execute it" >}}
 Technically, there are language that don't care if you execute them at all.
 Many programs in theorem-proving languages like Agda and Rocq exist only
 to be type-checked. So, you could nitpick this claim; or, you could take
 it more generally: your language can be useless if there's no
 way to make it efficiently do what it's been made to do.
 {{< /sidenote >}} efficiently. Thus, the field of PL subsumes not only
 the theoretical foundations of languages and their human-centric design; it
 includes also their realization as software.
 The overall point of this section is that there is much depth to the techniques
 involved in bringing a programming language to life. If you are a tinkerer
 or engineer at heart, you will never run out of avenues of exploration.
 The reasons are all framed from this perspective.
 One fascinating aspect to programming languages is the "direction" from
 which they have grown. On one side, you have languages that came
 together from the need to control and describe hardware. I'd say that
 this is the case for C and C++, Fortran, and others. More often than not,
 these languages are compiled to machine code. Still subject to human
 constraints, these languages often evolve more user-facing features as time
 goes on. On the other side, you have languages developed to enable
 people to write software, later faced constraints of actually working
 efficiently. These are languages like Python, Ruby, and JavaScript. These
 languages are often interpreted (executed by a dedicated program), with
 techniques such as [just-in-time compilation](https://en.wikipedia.org/wiki/Just-in-time_compilation).
 There is no one-size-fits-all way to execute a language, and as a result,
 * __Reason 11__: the techniques of executing programming languages are varied
  and rich. From compilation, to JIT, to interpretation, the field
  has many sub-disciplines, each with its own know-hows and tricks.
 At the same time, someone whose goal is to actually develop a compiler
 likely doesn't want to develop everything from scratch. To do so would
 be a daunting task, especially if you want the compiler to run beyond
 the confines of a personal machine. CPU [architectures](https://en.wikipedia.org/wiki/Instruction_set_architecture)
 and operating system differences are hard for any individual to keep up with.
 Fortunately, we have a gargantuan ongoing effort in the field:
 the [LLVM Project](https://llvm.org/). LLVM spans numerous architectures
 and targets, and has become a common back-end for languages like C++
 (via [Clang](https://clang.llvm.org/get_started.html)), Swift, and Rust.
 LLVM helps share and distribute the load of keeping up with the ongoing
 march of architectures and OSes. It also provides a shared playground upon
 which to experiment with language implementations, optimizations, and more.
 * __Reason 12__: large projects like LLVM enable language designers to
  lean on decades of precedent to develop a compiler for their language.
 Though LLVM is powerful, it does not automatically grant languages implemented
 with it good performance. In fact, no other tool does. To make a language
 run fast requires a deep understanding of the language itself, the hardware
 upon which it runs, and the tools used to execute it. That is a big ask!
 Modern computers are extraordinarily complex. Techniques such as
 [out-of-order execution](https://en.wikipedia.org/wiki/Out-of-order_execution),
 [caching](https://en.wikipedia.org/wiki/Cache_(computing)#HARDWARE),
 and [speculative execution](https://en.wikipedia.org/wiki/Speculative_execution)
 are constantly at play. This means that any program is subject to hard-to-predict
 and often unintuitive effects. On top of that, depending on your language's
 capabilities, performance work can often entail working with additional
 hardware, such as GPUs and NICs, which have their own distinct performance
 characteristics. This applies both to compiled and interpreted languages.
 Therefore, I give you:
 * __Reason 13__: improving the performance of a programming language is rife
  with opportunities to engage with low-level details of the hardware
  and operating system.
 In the [mathematics section](#the-mathematics-of-pl), we talked about how constructing correct
 optimizations requires an understanding of the language's semantics. It
 was one of the practical uses for having a mathematical definition of a language.
 Reason 13 is where that comes in, but the synthesis is not automatic. In fact,
 a discipline sits in-between defining how a language behaves and
 optimizing programs: program analysis. Algorithms that analyze
 properties of programs such as [reaching definitions](https://en.wikipedia.org/wiki/Reaching_definition)
 enable optimizations such as [loop-invariant code motion](https://en.wikipedia.org/wiki/Loop-invariant_code_motion),
 which can have very significant performance impact. At the same time, for an
 analysis to be correct, it must be grounded in the program's mathematical
 semantics. There are many fascinating techniques in this discipline,
 including [ones that use lattice theory](https://cs.au.dk/~amoeller/spa/spa.pdf).
 * __Reason 14__: the sub-discipline of program analysis serves as a grounded
  application of PL theory to PL practice, enabling numerous optimizations
  and transformations.
 The programs your compiler generates are software, and, as we just saw,
 may need to be tweaked for performance. But the compiler and/or interpreter
 is itself a piece of software, and its own performance. Today's language
 implementations are subject to demands that hadn't been there historically.
 For instance, languages are used to provide [language servers](https://microsoft.github.io/language-server-protocol/)
 to enable editors to give users deeper insights into their code. Today,
 a language implementation may be called upon every keystroke to provide
 a typing user live updates. This has led to the introduction of
 techniques like the [query architecture](https://ollef.github.io/blog/posts/query-based-compilers.html)
 (see also [salsa](https://github.com/salsa-rs/salsa)) to avoid
 redundant work and re-used intermediate results. New language implementations
 like that of [Carbon](https://github.com/carbon-language/carbon-lang)
 are exploring alternative representations of programs in memory. In
 short,
 * __Reason 15__: language implementations are themselves pieces of software,
  subject to unique constraints and requiring careful and innovative
  engineering.
 ### Conclusion
 I've now given a tour of ways in which I found the PL field compelling,
 organized across three broad categories. There is just one more reason
 I'd like to share.
 I was 16 years old when I got involved with the world of programming
 languages and compilers. Though I made efforts to learn about it through
 literature (the _Dragon Book_, and _Modern Compiler Design_), I simply
 didn't have the background to find these resources accessible. However, all
 was not lost. The PL community online has been, and still is, a vibrant and
 enthusiastic place. I have found it to be welcoming of folks with backgrounds
 spanning complete beginners and experts alike. Back then, it gave me
 accessible introductions to anything I wanted. Now, every week I see new
 articles go by that challenge my intuitions, teach me new things, or take PL
 ideas to absurd and humorous extremes. So, my final reason:
 * __Reason 16__: the programming languages community is full of brilliant,
  kind, welcoming and enthusiastic people, who dedicate much of their
  time to spreading the joy of the field.
  I ❤️ you.
--- a/content/blog/idris_catamorphisms.md
+++ b/content/blog/idris_catamorphisms.md
@@ -17,7 +17,8 @@ spend time explaining dependent types, nor the syntax for them in Idris,
 which is the language I'll use in this article. Below are a few resources
 that should help you get up to speed.
-{{< todo >}}List resources{{< /todo >}}
+> [!TODO]
 > List resources
 We've seen that, given a function `F a -> a`, we can define a function
 `B -> a`, if `F` is a base functor of the type `B`. However, what if
--- a/content/series/static-program-analysis-in-agda/_index.md
+++ b/content/series/static-program-analysis-in-agda/_index.md
@@ -6,6 +6,6 @@ summary = """
  in Agda. The goal is to have a formally verified, yet executable, static
  analyzer for a simple language.
  """
-status = "ongoing"
+status = "complete"
 divider = ": "
 +++
--- a/convert.rb
+++ b/convert.rb
@@ -25,13 +25,16 @@ class KatexRenderer
  end
  def substitute(content)
    found_any = false
    rendered = content.gsub /\\\(((?:[^\\]|\\[^\)])*)\\\)/ do |match|
      found_any = true
      render(false, $~[1])
    end
    rendered = rendered.gsub /\$\$((?:[^\$]|$[^\$])*)\$\$/ do |match|
      found_any = true
      render(true, $~[1])
    end
-    return rendered
+    return rendered, found_any
  end
 end
@@ -58,8 +61,20 @@ renderer = KatexRenderer.new(katex)
 files.each do |file|
  puts "Rendering file: #{file}"
  document = Nokogiri::HTML.parse(File.open(file))
  found_any = false
  document.search('//*[not(ancestor-or-self::code or ancestor-or-self::script)]/text()').each do |t|
-    t.replace(renderer.substitute(t.content))
+    rendered, found_any_in_text = renderer.substitute(t.content)
    found_any ||= found_any_in_text
    t.replace(rendered)
  end
  # If we didn't find any mathematical equations, no need to include KaTeX CSS.
  # Disabled here because Bergamot technically doesn't require math blocks
  # on the page but does need the CSS.
  #
  # unless found_any
  #   document.css('link[href$="katex.css"], link[href$="katex.min.css"]').each(&:remove)
  # end
  File.write(file, document.to_html(encoding: 'UTF-8'))
 end
--- a/layouts/shortcodes/donate_css.html
+++ b/layouts/shortcodes/donate_css.html
@@ -1,2 +1,2 @@
-{{ $style := resources.Get "scss/donate.scss" | resources.ToCSS | resources.Minify }}
+{{ $style := resources.Get "scss/donate.scss" | css.Sass | resources.Minify }}
 <link rel="stylesheet" href="{{ $style.Permalink }}">
--- a/layouts/shortcodes/gmachine_css.html
+++ b/layouts/shortcodes/gmachine_css.html
@@ -1,2 +1,2 @@
-{{ $style := resources.Get "scss/gmachine.scss" | resources.ToCSS | resources.Minify }}
+{{ $style := resources.Get "scss/gmachine.scss" | css.Sass | resources.Minify }}
 <link rel="stylesheet" href="{{ $style.Permalink }}">
--- a/layouts/shortcodes/stack_css.html
+++ b/layouts/shortcodes/stack_css.html
@@ -1,2 +1,2 @@
-{{ $style := resources.Get "scss/stack.scss" | resources.ToCSS | resources.Minify }}
+{{ $style := resources.Get "scss/stack.scss" | css.Sass | resources.Minify }}
 <link rel="stylesheet" href="{{ $style.Permalink }}">
--- a/layouts/thevoid/baseof.html
+++ b/layouts/thevoid/baseof.html
@@ -3,7 +3,7 @@
 <html lang="{{ .Site.Language.Lang }}">
    {{- partial "head.html" . -}}
    <body>
-        {{ $voidcss := resources.Get "scss/thevoid.scss" | resources.ToCSS | resources.Minify }}
+        {{ $voidcss := resources.Get "scss/thevoid.scss" | css.Sass | resources.Minify }}
        <link rel="stylesheet" href="{{ $voidcss.Permalink }}">
        {{- partial "header.html" . -}}
        <div class="container"><hr class="header-divider"></div>
--- a/themes/vanilla
+++ b/themes/vanilla
Author	SHA1	Message	Date
Danila Fedorin	7a088d6739	Edit, finish, and publish post on PL Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-12-31 21:27:55 -08:00
Danila Fedorin	626baefd76	Do some aggressive trimming and editing. Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-12-30 00:06:17 -08:00
Danila Fedorin	4602d02720	[WIP] 2/3 draft Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-12-29 23:07:55 -08:00
Danila Fedorin	7fbd4ea9f8	Update theme with syntax highlighting fix Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-04-12 17:56:44 -07:00
Danila Fedorin	6fd1e1962b	Update theme Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-03-30 23:15:54 -07:00
Danila Fedorin	62c338e382	Add a post on Chapel's runtime types Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-03-02 22:56:10 -08:00
Danila Fedorin	40ea9ec637	Update theme for newer Hugo Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-03-02 19:00:05 -08:00
Danila Fedorin	787e194d41	Update theme with new Hugo support Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-03-02 13:42:42 -08:00
Danila Fedorin	71162e2db9	Update nokogiri more Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-03-02 13:36:08 -08:00
Danila Fedorin	43debc65e4	Update nokogiri Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-03-02 13:27:09 -08:00
Danila Fedorin	b07ea85b70	Update ruby scripts to use 'File.exist?' Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-03-02 11:32:55 -08:00
Danila Fedorin	06e8b8e022	Keep KaTeX css files Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 17:01:49 -08:00
Danila Fedorin	647f47a5f3	Remove KaTeX CSS includes if we don't need them. Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 14:43:49 -08:00
Danila Fedorin	36e4feb668	Update theme Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 14:14:15 -08:00
Danila Fedorin	11be991946	Print number of files processed	2025-02-23 13:25:18 -08:00
Danila Fedorin	a8c2b1d05a	Fix bug in subsetting script Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 13:16:33 -08:00
Danila Fedorin	fb46142e9d	Remove incorrect print Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 12:45:24 -08:00
Danila Fedorin	0b33d03b73	Pass the font folder as part of argv Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 12:35:45 -08:00
Danila Fedorin	804147caef	Read SVG path from the command line Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 12:34:44 -08:00
Danila Fedorin	d847d20666	Adjust Python script to also just accept HTML files as args Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 12:28:19 -08:00
Danila Fedorin	07408d01a9	Use ARGV like other ruby post-processing scripts Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 12:22:06 -08:00
Danila Fedorin	816a473913	Add a (ChatGPT-provided) script to perform subsetting Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 12:05:02 -08:00
Danila Fedorin	ce8f8fb872	Add a (ChatGPT-provided) script to subset feather icons Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 11:58:41 -08:00
Danila Fedorin	2f60004241	Update theme with instanced fonts Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2025-02-23 11:43:38 -08:00
Danila Fedorin	7130c6bd11	Fix cross-linking in whitespace-trimmed files Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2024-12-26 12:35:03 -08:00
Danila Fedorin	c5aacc060a	Update theme with font improvements Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2024-12-26 11:53:29 -08:00
Danila Fedorin	6048dc0b9c	Mark SPA series as completed Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2024-12-25 19:07:26 -08:00
Danila Fedorin	1f01c3caff	Publish the last two posts in the SPA series Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2024-12-25 19:05:28 -08:00
Danila Fedorin	bca44343eb	Update theme with required shortcodes Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2024-12-25 19:04:10 -08:00
Danila Fedorin	3b9c2edcdd	Write up the "verified" portion of the forward analysis Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2024-12-25 19:03:51 -08:00
Danila Fedorin	fa180ee24e	Fix thevoid theme on light mode devices Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>	2024-12-04 13:58:32 -08:00
`@@ -1,2 +1,2 @@`
	`{{ $style := resources.Get "scss/donate.scss" \| resources.ToCSS \| resources.Minify }}`	`{{ $style := resources.Get "scss/donate.scss" \| css.Sass \| resources.Minify }}`
	`<link rel="stylesheet" href="{{ $style.Permalink }}">`	`<link rel="stylesheet" href="{{ $style.Permalink }}">`
`@@ -1,2 +1,2 @@`
	`{{ $style := resources.Get "scss/gmachine.scss" \| resources.ToCSS \| resources.Minify }}`	`{{ $style := resources.Get "scss/gmachine.scss" \| css.Sass \| resources.Minify }}`
	`<link rel="stylesheet" href="{{ $style.Permalink }}">`	`<link rel="stylesheet" href="{{ $style.Permalink }}">`
`@@ -1,2 +1,2 @@`
	`{{ $style := resources.Get "scss/stack.scss" \| resources.ToCSS \| resources.Minify }}`	`{{ $style := resources.Get "scss/stack.scss" \| css.Sass \| resources.Minify }}`
	`<link rel="stylesheet" href="{{ $style.Permalink }}">`	`<link rel="stylesheet" href="{{ $style.Permalink }}">`