From 9ddd2dd3bcba96e563c5a02599967fc496a78019 Mon Sep 17 00:00:00 2001
From: Danila Fedorin <danila.fedorin@gmail.com>
Date: Thu, 4 May 2023 01:03:58 -0700
Subject: [PATCH] Add initial draft of alloy article

---
 code/dyno-alloy/DynoAlloy.als | 163 ++++++++++++++++++++
 content/blog/dyno_alloy.md    | 277 ++++++++++++++++++++++++++++++++++
 2 files changed, 440 insertions(+)
 create mode 100644 code/dyno-alloy/DynoAlloy.als
 create mode 100644 content/blog/dyno_alloy.md

diff --git a/code/dyno-alloy/DynoAlloy.als b/code/dyno-alloy/DynoAlloy.als
new file mode 100644
index 0000000..b51beb9
--- /dev/null
+++ b/code/dyno-alloy/DynoAlloy.als
@@ -0,0 +1,163 @@
+enum Flag {Method, MethodOrField, Public}
+
+/* There is a negative version for each flag (METHOD and NOT_METHOD).
+     Model this as two sets, one of positive flags, and one of netative flags,
+     and interpret the bitfield to be a conjunction of both flags. */
+sig Bitfield {
+    , positiveFlags: set Flag
+    , negativeFlags: set Flag
+}
+
+/* A filter state has filterFlags and excludeFlags, both represented as conjunctions. */ 
+sig FilterState {
+    , include: Bitfield
+    , exclude: Bitfield
+}
+
+/* Initially, no search has happeneed for a scope, so its 'found' is not set to anything. */
+one sig NotSet {}
+
+/* Finally, there's a search state (whether or not a particular scope has already been
+    searched with a particular configuration). */
+one sig SearchState {
+    , var found: Bitfield + NotSet
+}
+
+
+pred bitfieldEmpty[b: Bitfield] {
+    #b.positiveFlags = 0 and #b.negativeFlags = 0
+}
+
+pred bitfieldEqual[b1: Bitfield, b2: Bitfield] {
+    b1.positiveFlags = b2.positiveFlags and b1.negativeFlags = b2.negativeFlags
+}
+
+pred bitfieldIntersection[b1: Bitfield, b2: Bitfield, b3: Bitfield] {
+    b3.positiveFlags = b1.positiveFlags & b2.positiveFlags
+    b3.negativeFlags = b1.negativeFlags & b2.negativeFlags
+}
+
+pred bitfieldSubset[b1: Bitfield, b2: Bitfield] {
+    b1.positiveFlags in b2.positiveFlags
+    b1.negativeFlags in b2.negativeFlags
+}
+
+pred bitfieldIncomparable[b1: Bitfield, b2: Bitfield] {
+    not bitfieldSubset[b1, b2]
+    not bitfieldSubset[b2, b1]
+}
+
+pred addBitfieldFlag[b1: Bitfield, b2: Bitfield, flag: Flag] {
+    b2.positiveFlags = b1.positiveFlags + flag
+    b2.negativeFlags = b1.negativeFlags
+}
+
+pred addBitfieldFlagNeg[b1: Bitfield, b2: Bitfield, flag: Flag] {
+    b2.negativeFlags = b1.negativeFlags + flag
+    b2.positiveFlags = b1.positiveFlags
+}
+
+enum Property { PMethod, PField, PPublic }
+
+sig Props {
+    properties: set Property
+}
+
+pred flagMatchesPropery[flag: Flag, property: Property] {
+    (flag = Method and property = PMethod) or
+    (flag = MethodOrField and (property = PMethod or property = PField)) or
+    (flag = Public and property = PPublic)
+}
+
+pred bitfieldMatchesProperties[bitfield: Bitfield, props: Props] {
+    // All positive flags must be satisifed
+    all flag: bitfield.positiveFlags | some property: props.properties | flagMatchesPropery[flag, property]
+    // All negative flags must not be satisfied
+    all flag: bitfield.negativeFlags | no property: props.properties | flagMatchesPropery[flag, property]
+}
+
+pred configureState[filterState: FilterState] {
+    some initialState: FilterState {
+        // Each lookup in scope starts with empty filter and exclude flags
+        bitfieldEmpty[initialState.include] and bitfieldEmpty[initialState.exclude]
+
+        // The intermediate states (bf1) are used for sequencing of operations.
+        some bf1 : Bitfield {
+            // Add "Public" depending on skipPrivateVisibilities
+            addBitfieldFlag[initialState.include, bf1, Public] or
+            bitfieldEqual[initialState.include, bf1]
+
+            // If it's a method receiver, add method or field restriction
+            addBitfieldFlag[bf1, filterState.include, MethodOrField] or
+            // if it's not a receiver, filter to non-methods (could be overridden)
+            addBitfieldFlagNeg[bf1, filterState.include, Method] or
+            // Maybe methods are not being included but it's not a receiver, so no change.
+            bitfieldEqual[bf1, filterState.include]
+        }
+        // Exclude filter doesn't change here
+        initialState.exclude = filterState.exclude
+    }
+}
+
+pred oldUpdate[toSet: Bitfield + NotSet, setTo: FilterState] {
+    toSet' in Bitfield and bitfieldIntersection[toSet, setTo.include, toSet']
+}
+
+pred newUpdate[toSet: Bitfield + NotSet, setTo: FilterState] {
+    (not bitfieldIncomparable[toSet, setTo.include] and oldUpdate[toSet, setTo]) or
+    (bitfieldIncomparable[toSet, setTo.include] and toSet = toSet')
+}
+
+pred updateOrSet[toSet: Bitfield + NotSet, setTo: FilterState] {
+    (toSet in NotSet and toSet' = setTo.include) or
+    (toSet not in NotSet and oldUpdate[toSet, setTo])
+}
+
+pred excludeBitfield[found: Bitfield + NotSet, exclude: Bitfield] {
+    (found != NotSet and bitfieldEqual[found, exclude]) or
+    (found = NotSet and bitfieldEmpty[exclude])
+}
+
+fact init {
+    all searchState: SearchState | searchState.found = NotSet
+}
+
+fact step {
+    always {
+        // Model that a new doLookupInScope could've occurred, with any combination of flags.
+        all searchState: SearchState {
+            some fs: FilterState {
+                // This is a possible combination of lookup flags
+                configureState[fs]
+
+                // If a search has been performed before, take the intersection; otherwise,
+                // just insert the current filter flags.
+                updateOrSet[searchState.found, fs]
+            }
+        }
+    }
+}
+
+example: run {
+    all searchState: SearchState {
+        // a way that subsequent results of searching it will miss things.
+        eventually some props: Props, fs: FilterState, fsBroken: FilterState, exclude: Bitfield {
+            // Some search (fs) will cause a transition / modification of the search state...
+            configureState[fs]
+            updateOrSet[searchState.found, fs]
+            // Such that a later, valid search... (fsBroken)
+            configureState[fsBroken]
+            excludeBitfield[searchState.found', exclude]
+
+            // Will allow for a set of properties...
+            // ... that are left out of the original search...
+            not bitfieldMatchesProperties[searchState.found, props]
+            // ... and out of the current search
+            not (bitfieldMatchesProperties[fs.include, props] and not bitfieldMatchesProperties[searchState.found, props])
+            // But would be matched by the broken search...
+            bitfieldMatchesProperties[fsBroken.include, props]
+            // ... to not be matched by a search with the new state:
+            not (bitfieldMatchesProperties[fsBroken.include, props] and not bitfieldMatchesProperties[exclude, props])
+        }
+    }
+}
diff --git a/content/blog/dyno_alloy.md b/content/blog/dyno_alloy.md
new file mode 100644
index 0000000..7f907f8
--- /dev/null
+++ b/content/blog/dyno_alloy.md
@@ -0,0 +1,277 @@
+---
+title: "Proving My Compiler Code Incorrect With Alloy"
+date: 2023-05-02T22:48:52-07:00
+tags: ["Compilers", "Alloy"]
+draft: true
+---
+
+{{< todo >}}
+Intro section and disclaimer
+{{< /todo >}}
+
+I work as a compiler developer on the [Chapel](https://chapel-lang.org) team.
+One of the things that a language like Chapel has to do is called _resolution_,
+which is the process of figuring out what each identifier, like `x`, refers to,
+and what its type is. Even the first part of that is pretty complicated, what
+with public and private variables, methods (which can be decalred outside
+of their receiver type in Chapel), and more...
+
+Scope resolution in Chapel is further complicated by the fact that the same
+scope might need to be searched multiple times, in different contexts. Let
+me start with a few examples to illustrate what I mean. Here's the first
+program:
+
+```Chapel {linenos=true}
+module M {
+    class C {}
+
+    // A regular procedure (not a method)
+    proc foo() {}
+
+    // A method on C.
+    proc C.foo() {}
+
+    // Another method on C.
+    proc C.doSomething() {
+        foo();
+    }
+}
+```
+
+If you don't know Chapel (and you probably don't!) this program already merits a fair
+bit of explanation. A _module_ in Chapel (declared via a `module` keyword)
+is just a collection of definitions. Such definitions could include variables,
+methods, classes and more. Putting them in a module helps group them.
+
+{{< todo >}}
+Write the rest of this explanation.
+{{< /todo >}}
+
+The interesting part of the snippet is the body of the `doSomething` method.
+It has a call to `foo`: but which `foo` is it referring to? There are two:
+the regular procedure (non-method) `foo`, declared on line 5, and the
+method `C.foo` declared on line 8. In Chapel, the rules dictate that when
+such a situation arises, and a fitting method is found, the method is
+preferred to the non-method. In the rewritten version of the Chapel compiler,
+titled Dyno, this disambiguation is achieved by first searching the scopes
+visible from the class `C` for methods only. In this particular example,
+the two scopes searched will be:
+
+1. The inside of class `C`. The class itself doesn't define any methods, so
+   nothing is found.
+2. The module in which `C` is defined (`M` in this case). This module does
+   have a method, the one on line 8, so that one is returned.
+
+Only if methods are not found are non-methods considered. In this situation,
+the search order will be as follows:
+
+1. The inside of `C.doSomething` will be searched. `doSomething` doesn't declare
+   anything, so the search will come up empty.
+2. The module in which `C.doSomething` is defined (`M` again) will be searched. This time,
+   both methods and non-methods will be considered. Since we're considering
+   a hypothetical situation in which the method `C.foo` isn't there (otherwise
+   it would've been found earlier), the only thing that will be found will
+   be the non-method `foo`.
+
+Notice that we've already had to search the module `M` twice, looking for
+different things each time. First, we were looking for only method, but
+later, we were looking for anything. However, this isn't as complicated as
+things can get. The simplifying aspect of this program is that both `doSomething`
+and `C` are defined inside the class `C`, and therefore have access to its
+private methods and procedures. If we extracted `C.doSomething` into its
+own separate module, the program would look like this.
+
+```Chapel {linenos=true}
+module M1 {
+    class C {}
+
+    // A regular procedure (not a method)
+    proc foo() {}
+
+    // A method on C.
+    proc C.foo() {}
+}
+module M2 {
+    use super.M1;
+
+    // Another method on C.
+    proc C.doSomething() {
+        foo();
+    }
+}
+```
+
+Since `doSomething` is now in another module, it can't just access the `foo`s from
+`M1` willy-nilly. There are a few ways to get the things that were declared in another module out
+and make use of them. I opted for a `use` statement, which, in its simplest form,
+just brings all the declarations inside the `use`d module into the current scope. Thus,
+the `use` statement on line 11 would bring all things declared in `M1` into
+the scope inside `M2`. There's a catch, though: since `M2` is not declared
+inside `M1`, a `use` statement will not be able to bring in _private_ symbols
+from `M1` (they're private for a reason!). So, this time, when searching the scope
+for `M1`, we will have to search only for public symbols. That's another,
+different way of searching `M1`. So far, we've seen three:
+
+* Search `M1` for any symbol.
+* Search `M1` for methods only.
+* Search `M1` for public symbols only.
+
+In Dyno, there are even more different ways of searching a single scope, and
+some of them are mixes of others (one might consider, for instance, searching
+for only public methods). To represent the various search configurations,
+the Dyno team came up with using a bitset of _flags_, each of which indicated
+a necessary condition for a symbol to be returned. A bitset with flags set
+for two properties (like "public" and "method") requires that both such
+properties be found on each symbol that's returned from a scope. This led to
+C++ code along the lines of:
+
+```C++
+auto allPublicSymbols = Flags::PUBLIC;
+auto allPublicMethods = Flags::PUBLIC | Flags::METHOD;
+```
+
+It also turned out convenient to add negative versions of each flag
+(`NOT_PUBLIC` for private symbols, `NOT_METHOD` for regular old procedures
+and other definitions, and so on. So, some other possible flag combinations
+include:
+
+```C++
+auto allNonMethods = Flags::NOT_METHOD;
+auto privateMethods = Flags::NOT_PUBLIC | Flags::METHOD;
+```
+
+Given these flags, there are some situations in which checking a scope a
+second time is redundant, in that it is guaranteed to find no additional
+symbols. For instance, if you search a scope for all public symbols, and
+then subsequently search for all public methods, you will only find
+duplicates -- after all, all public methods are public symbols. Most
+generally, this occurs when a second search has all the flags from a
+previous search, and maybe more. In math lingo, if the set of flags checked
+the first time is a subset of the set of flags checked the second time,
+it's guaranteed not to find anything new.
+
+In Dyno, we like to avoid additional work when we can. To do so, we track
+which scopes have already been searched, and avoid searching them again.
+Since what comes up from a search depends on the flags, we store the flags
+alongside the scopes we've checked. __If we find that the previously-checked
+bitset is a subset of the current biset, we just skip the search__.
+
+But then, what if it _isn't_ a subset? Another concern here is avoiding
+duplicate results (it's easier to check for duplicate definitions if you
+know a symbol is only returned from a search once). So, another feature of
+Dyno's scope search is an additional bitset of what to _exclude_, which we
+set to be the previous search's filter. So if the first search looked for
+symbols matching description \\(A\\), and the second search is supposed to
+look for symbols matching description \\(B\\), __then really we do a search
+for \\(A \\land \\lnot B\\) (that is, \\(A\\) and not \\(B\\))__.
+
+{{< dialog >}}
+{{< message "question" "reader" >}}
+Hold on, why do you need a whole another bitset? There are already negated
+versions of each flag available. Can't you just add those to the filter?
+{{< /message >}}
+{{< message "answer" "Daniel" >}}
+Good question. The difference is a little bit tricky. If we just negated
+each flag, we'd turn an expression like \(A \land B\) into \(\lnot A \land
+\lnot B\). However, according to De Morgan's laws, the proper negation of
+\(A \land B\) is \(\lnot A \lor \lnot B\) (notice the use of "or" instead
+of "and"). On the other hand, using an "exclude" bitset negates the whole
+conjunction, rather than the individual flags, and so gives us the result
+we need.
+{{< /message >}}
+{{< /dialog >}}
+
+One last thing: what happens if there were two previous searches? What we
+need is to to somehow combine the two filters into one. Taking a cue from
+a previous example, in which "public" was followed by "public methods", we
+can observe that since the second search has additional flags, it's more
+restrictive, and thus guaranteed to not find anything. __So we try to create
+the least restrictive bitset possible, by taking an intersection of the
+flags used.__
+
+Actually, that last point is not quite correct in every possible case
+(taking the intersection is not always the right thing to do).
+However, running the code through our test suite, we did not notice any
+cases in which it misbehaved. So, noting the potential issue in a comment,
+we moved on to other things.
+
+That is, until I decided that it was time to add another possible flag to
+the bitset. At that point, sitting and trying to reason about the
+possible cases, I realized that it would be much nicer to describe this
+mathematically, and have a model checker generate outlandish scenarios for
+me. Having at some point seen [Hillel Wayne's
+post](https://www.hillelwayne.com/post/alloy6/) about the release of
+[Alloy 6](https://alloytools.org/), I thought I'd give it a go. I'd never
+touched alloy before this, so be warned: this is what I came up with on my
+own attempt.
+
+### Modeling Flags and Bitsets in Alloy
+Flags are represented on the C++ side as an `enum` (with custom indexing so
+as to make each flag be exactly one bit). I checked, and it looked like
+Alloy had an `enum` feature, too! I started off by making an enum of the
+flags I wanted to play with.
+
+{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 1 1 >}}
+
+We haven't seen the `MethodOrField` flag, but it's an important one. It
+turns out that it's much more common to look for anything that could be
+part of a class, rather than just its methods. This flag is itself an "or"
+of two properties (something being a method and something being a class
+field). Note that this is not the same as having two flags, `Method` and
+`Field`, and always including them together (because that would be an
+"and", not an "or").
+
+Notice also that the list of flags doesn't include the negative versions.
+Since the negative versions are one-for-one with the positive ones, I
+instead chose to represent bitsets as simply two sets: one set of
+"positive" flags, in which the presence of e.g. `Method` indicates that the
+`METHOD` flag was set, and one set of "negative" flags, in which the
+presence of `Method` indicates that `NOT_METHOD` was set. This way, I'm
+guaranteed that there's a positive and negative version of each flag,
+automatically. Here's how I wrote that in Alloy.
+
+{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 6 9 >}}
+
+{{< todo >}}
+The rest of the article
+{{< /todo >}}
+
+### Scratch Work
+{{< todo >}}
+This section is temporary
+{{< /todo >}}
+
+a small-ish program to illustrate what I mean.
+
+```Chapel
+module M {
+    
+}
+```
+
+
+```Chapel {linenos=true}
+module M1 {
+    public use super.M2;
+}
+module M2 {
+    private var x = 1;
+
+    module M3 {
+        public use super;
+    }
+}
+
+use M1;
+use M2.M3;
+writeln(x)
+```
+
+
+ Moreover, a `public use` makes these definitions part
+of the module that does the `use` -- that is, `M1` would now contain the
+definitions from `M2`. However, since `M1` is not defined inside of `M2`, it
+isn't able to access its private variables (like `x` on line 5), so this
+particular use statement leaves `M1` just containing (a reference to) `M3`.
+
+The `public use` on line