Add initial draft of alloy article

This commit is contained in:
Danila Fedorin 2023-05-04 01:03:58 -07:00
parent f579641866
commit 9ddd2dd3bc
2 changed files with 440 additions and 0 deletions

View File

@ -0,0 +1,163 @@
enum Flag {Method, MethodOrField, Public}
/* There is a negative version for each flag (METHOD and NOT_METHOD).
Model this as two sets, one of positive flags, and one of netative flags,
and interpret the bitfield to be a conjunction of both flags. */
sig Bitfield {
, positiveFlags: set Flag
, negativeFlags: set Flag
}
/* A filter state has filterFlags and excludeFlags, both represented as conjunctions. */
sig FilterState {
, include: Bitfield
, exclude: Bitfield
}
/* Initially, no search has happeneed for a scope, so its 'found' is not set to anything. */
one sig NotSet {}
/* Finally, there's a search state (whether or not a particular scope has already been
searched with a particular configuration). */
one sig SearchState {
, var found: Bitfield + NotSet
}
pred bitfieldEmpty[b: Bitfield] {
#b.positiveFlags = 0 and #b.negativeFlags = 0
}
pred bitfieldEqual[b1: Bitfield, b2: Bitfield] {
b1.positiveFlags = b2.positiveFlags and b1.negativeFlags = b2.negativeFlags
}
pred bitfieldIntersection[b1: Bitfield, b2: Bitfield, b3: Bitfield] {
b3.positiveFlags = b1.positiveFlags & b2.positiveFlags
b3.negativeFlags = b1.negativeFlags & b2.negativeFlags
}
pred bitfieldSubset[b1: Bitfield, b2: Bitfield] {
b1.positiveFlags in b2.positiveFlags
b1.negativeFlags in b2.negativeFlags
}
pred bitfieldIncomparable[b1: Bitfield, b2: Bitfield] {
not bitfieldSubset[b1, b2]
not bitfieldSubset[b2, b1]
}
pred addBitfieldFlag[b1: Bitfield, b2: Bitfield, flag: Flag] {
b2.positiveFlags = b1.positiveFlags + flag
b2.negativeFlags = b1.negativeFlags
}
pred addBitfieldFlagNeg[b1: Bitfield, b2: Bitfield, flag: Flag] {
b2.negativeFlags = b1.negativeFlags + flag
b2.positiveFlags = b1.positiveFlags
}
enum Property { PMethod, PField, PPublic }
sig Props {
properties: set Property
}
pred flagMatchesPropery[flag: Flag, property: Property] {
(flag = Method and property = PMethod) or
(flag = MethodOrField and (property = PMethod or property = PField)) or
(flag = Public and property = PPublic)
}
pred bitfieldMatchesProperties[bitfield: Bitfield, props: Props] {
// All positive flags must be satisifed
all flag: bitfield.positiveFlags | some property: props.properties | flagMatchesPropery[flag, property]
// All negative flags must not be satisfied
all flag: bitfield.negativeFlags | no property: props.properties | flagMatchesPropery[flag, property]
}
pred configureState[filterState: FilterState] {
some initialState: FilterState {
// Each lookup in scope starts with empty filter and exclude flags
bitfieldEmpty[initialState.include] and bitfieldEmpty[initialState.exclude]
// The intermediate states (bf1) are used for sequencing of operations.
some bf1 : Bitfield {
// Add "Public" depending on skipPrivateVisibilities
addBitfieldFlag[initialState.include, bf1, Public] or
bitfieldEqual[initialState.include, bf1]
// If it's a method receiver, add method or field restriction
addBitfieldFlag[bf1, filterState.include, MethodOrField] or
// if it's not a receiver, filter to non-methods (could be overridden)
addBitfieldFlagNeg[bf1, filterState.include, Method] or
// Maybe methods are not being included but it's not a receiver, so no change.
bitfieldEqual[bf1, filterState.include]
}
// Exclude filter doesn't change here
initialState.exclude = filterState.exclude
}
}
pred oldUpdate[toSet: Bitfield + NotSet, setTo: FilterState] {
toSet' in Bitfield and bitfieldIntersection[toSet, setTo.include, toSet']
}
pred newUpdate[toSet: Bitfield + NotSet, setTo: FilterState] {
(not bitfieldIncomparable[toSet, setTo.include] and oldUpdate[toSet, setTo]) or
(bitfieldIncomparable[toSet, setTo.include] and toSet = toSet')
}
pred updateOrSet[toSet: Bitfield + NotSet, setTo: FilterState] {
(toSet in NotSet and toSet' = setTo.include) or
(toSet not in NotSet and oldUpdate[toSet, setTo])
}
pred excludeBitfield[found: Bitfield + NotSet, exclude: Bitfield] {
(found != NotSet and bitfieldEqual[found, exclude]) or
(found = NotSet and bitfieldEmpty[exclude])
}
fact init {
all searchState: SearchState | searchState.found = NotSet
}
fact step {
always {
// Model that a new doLookupInScope could've occurred, with any combination of flags.
all searchState: SearchState {
some fs: FilterState {
// This is a possible combination of lookup flags
configureState[fs]
// If a search has been performed before, take the intersection; otherwise,
// just insert the current filter flags.
updateOrSet[searchState.found, fs]
}
}
}
}
example: run {
all searchState: SearchState {
// a way that subsequent results of searching it will miss things.
eventually some props: Props, fs: FilterState, fsBroken: FilterState, exclude: Bitfield {
// Some search (fs) will cause a transition / modification of the search state...
configureState[fs]
updateOrSet[searchState.found, fs]
// Such that a later, valid search... (fsBroken)
configureState[fsBroken]
excludeBitfield[searchState.found', exclude]
// Will allow for a set of properties...
// ... that are left out of the original search...
not bitfieldMatchesProperties[searchState.found, props]
// ... and out of the current search
not (bitfieldMatchesProperties[fs.include, props] and not bitfieldMatchesProperties[searchState.found, props])
// But would be matched by the broken search...
bitfieldMatchesProperties[fsBroken.include, props]
// ... to not be matched by a search with the new state:
not (bitfieldMatchesProperties[fsBroken.include, props] and not bitfieldMatchesProperties[exclude, props])
}
}
}

277
content/blog/dyno_alloy.md Normal file
View File

@ -0,0 +1,277 @@
---
title: "Proving My Compiler Code Incorrect With Alloy"
date: 2023-05-02T22:48:52-07:00
tags: ["Compilers", "Alloy"]
draft: true
---
{{< todo >}}
Intro section and disclaimer
{{< /todo >}}
I work as a compiler developer on the [Chapel](https://chapel-lang.org) team.
One of the things that a language like Chapel has to do is called _resolution_,
which is the process of figuring out what each identifier, like `x`, refers to,
and what its type is. Even the first part of that is pretty complicated, what
with public and private variables, methods (which can be decalred outside
of their receiver type in Chapel), and more...
Scope resolution in Chapel is further complicated by the fact that the same
scope might need to be searched multiple times, in different contexts. Let
me start with a few examples to illustrate what I mean. Here's the first
program:
```Chapel {linenos=true}
module M {
class C {}
// A regular procedure (not a method)
proc foo() {}
// A method on C.
proc C.foo() {}
// Another method on C.
proc C.doSomething() {
foo();
}
}
```
If you don't know Chapel (and you probably don't!) this program already merits a fair
bit of explanation. A _module_ in Chapel (declared via a `module` keyword)
is just a collection of definitions. Such definitions could include variables,
methods, classes and more. Putting them in a module helps group them.
{{< todo >}}
Write the rest of this explanation.
{{< /todo >}}
The interesting part of the snippet is the body of the `doSomething` method.
It has a call to `foo`: but which `foo` is it referring to? There are two:
the regular procedure (non-method) `foo`, declared on line 5, and the
method `C.foo` declared on line 8. In Chapel, the rules dictate that when
such a situation arises, and a fitting method is found, the method is
preferred to the non-method. In the rewritten version of the Chapel compiler,
titled Dyno, this disambiguation is achieved by first searching the scopes
visible from the class `C` for methods only. In this particular example,
the two scopes searched will be:
1. The inside of class `C`. The class itself doesn't define any methods, so
nothing is found.
2. The module in which `C` is defined (`M` in this case). This module does
have a method, the one on line 8, so that one is returned.
Only if methods are not found are non-methods considered. In this situation,
the search order will be as follows:
1. The inside of `C.doSomething` will be searched. `doSomething` doesn't declare
anything, so the search will come up empty.
2. The module in which `C.doSomething` is defined (`M` again) will be searched. This time,
both methods and non-methods will be considered. Since we're considering
a hypothetical situation in which the method `C.foo` isn't there (otherwise
it would've been found earlier), the only thing that will be found will
be the non-method `foo`.
Notice that we've already had to search the module `M` twice, looking for
different things each time. First, we were looking for only method, but
later, we were looking for anything. However, this isn't as complicated as
things can get. The simplifying aspect of this program is that both `doSomething`
and `C` are defined inside the class `C`, and therefore have access to its
private methods and procedures. If we extracted `C.doSomething` into its
own separate module, the program would look like this.
```Chapel {linenos=true}
module M1 {
class C {}
// A regular procedure (not a method)
proc foo() {}
// A method on C.
proc C.foo() {}
}
module M2 {
use super.M1;
// Another method on C.
proc C.doSomething() {
foo();
}
}
```
Since `doSomething` is now in another module, it can't just access the `foo`s from
`M1` willy-nilly. There are a few ways to get the things that were declared in another module out
and make use of them. I opted for a `use` statement, which, in its simplest form,
just brings all the declarations inside the `use`d module into the current scope. Thus,
the `use` statement on line 11 would bring all things declared in `M1` into
the scope inside `M2`. There's a catch, though: since `M2` is not declared
inside `M1`, a `use` statement will not be able to bring in _private_ symbols
from `M1` (they're private for a reason!). So, this time, when searching the scope
for `M1`, we will have to search only for public symbols. That's another,
different way of searching `M1`. So far, we've seen three:
* Search `M1` for any symbol.
* Search `M1` for methods only.
* Search `M1` for public symbols only.
In Dyno, there are even more different ways of searching a single scope, and
some of them are mixes of others (one might consider, for instance, searching
for only public methods). To represent the various search configurations,
the Dyno team came up with using a bitset of _flags_, each of which indicated
a necessary condition for a symbol to be returned. A bitset with flags set
for two properties (like "public" and "method") requires that both such
properties be found on each symbol that's returned from a scope. This led to
C++ code along the lines of:
```C++
auto allPublicSymbols = Flags::PUBLIC;
auto allPublicMethods = Flags::PUBLIC | Flags::METHOD;
```
It also turned out convenient to add negative versions of each flag
(`NOT_PUBLIC` for private symbols, `NOT_METHOD` for regular old procedures
and other definitions, and so on. So, some other possible flag combinations
include:
```C++
auto allNonMethods = Flags::NOT_METHOD;
auto privateMethods = Flags::NOT_PUBLIC | Flags::METHOD;
```
Given these flags, there are some situations in which checking a scope a
second time is redundant, in that it is guaranteed to find no additional
symbols. For instance, if you search a scope for all public symbols, and
then subsequently search for all public methods, you will only find
duplicates -- after all, all public methods are public symbols. Most
generally, this occurs when a second search has all the flags from a
previous search, and maybe more. In math lingo, if the set of flags checked
the first time is a subset of the set of flags checked the second time,
it's guaranteed not to find anything new.
In Dyno, we like to avoid additional work when we can. To do so, we track
which scopes have already been searched, and avoid searching them again.
Since what comes up from a search depends on the flags, we store the flags
alongside the scopes we've checked. __If we find that the previously-checked
bitset is a subset of the current biset, we just skip the search__.
But then, what if it _isn't_ a subset? Another concern here is avoiding
duplicate results (it's easier to check for duplicate definitions if you
know a symbol is only returned from a search once). So, another feature of
Dyno's scope search is an additional bitset of what to _exclude_, which we
set to be the previous search's filter. So if the first search looked for
symbols matching description \\(A\\), and the second search is supposed to
look for symbols matching description \\(B\\), __then really we do a search
for \\(A \\land \\lnot B\\) (that is, \\(A\\) and not \\(B\\))__.
{{< dialog >}}
{{< message "question" "reader" >}}
Hold on, why do you need a whole another bitset? There are already negated
versions of each flag available. Can't you just add those to the filter?
{{< /message >}}
{{< message "answer" "Daniel" >}}
Good question. The difference is a little bit tricky. If we just negated
each flag, we'd turn an expression like \(A \land B\) into \(\lnot A \land
\lnot B\). However, according to De Morgan's laws, the proper negation of
\(A \land B\) is \(\lnot A \lor \lnot B\) (notice the use of "or" instead
of "and"). On the other hand, using an "exclude" bitset negates the whole
conjunction, rather than the individual flags, and so gives us the result
we need.
{{< /message >}}
{{< /dialog >}}
One last thing: what happens if there were two previous searches? What we
need is to to somehow combine the two filters into one. Taking a cue from
a previous example, in which "public" was followed by "public methods", we
can observe that since the second search has additional flags, it's more
restrictive, and thus guaranteed to not find anything. __So we try to create
the least restrictive bitset possible, by taking an intersection of the
flags used.__
Actually, that last point is not quite correct in every possible case
(taking the intersection is not always the right thing to do).
However, running the code through our test suite, we did not notice any
cases in which it misbehaved. So, noting the potential issue in a comment,
we moved on to other things.
That is, until I decided that it was time to add another possible flag to
the bitset. At that point, sitting and trying to reason about the
possible cases, I realized that it would be much nicer to describe this
mathematically, and have a model checker generate outlandish scenarios for
me. Having at some point seen [Hillel Wayne's
post](https://www.hillelwayne.com/post/alloy6/) about the release of
[Alloy 6](https://alloytools.org/), I thought I'd give it a go. I'd never
touched alloy before this, so be warned: this is what I came up with on my
own attempt.
### Modeling Flags and Bitsets in Alloy
Flags are represented on the C++ side as an `enum` (with custom indexing so
as to make each flag be exactly one bit). I checked, and it looked like
Alloy had an `enum` feature, too! I started off by making an enum of the
flags I wanted to play with.
{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 1 1 >}}
We haven't seen the `MethodOrField` flag, but it's an important one. It
turns out that it's much more common to look for anything that could be
part of a class, rather than just its methods. This flag is itself an "or"
of two properties (something being a method and something being a class
field). Note that this is not the same as having two flags, `Method` and
`Field`, and always including them together (because that would be an
"and", not an "or").
Notice also that the list of flags doesn't include the negative versions.
Since the negative versions are one-for-one with the positive ones, I
instead chose to represent bitsets as simply two sets: one set of
"positive" flags, in which the presence of e.g. `Method` indicates that the
`METHOD` flag was set, and one set of "negative" flags, in which the
presence of `Method` indicates that `NOT_METHOD` was set. This way, I'm
guaranteed that there's a positive and negative version of each flag,
automatically. Here's how I wrote that in Alloy.
{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 6 9 >}}
{{< todo >}}
The rest of the article
{{< /todo >}}
### Scratch Work
{{< todo >}}
This section is temporary
{{< /todo >}}
a small-ish program to illustrate what I mean.
```Chapel
module M {
}
```
```Chapel {linenos=true}
module M1 {
public use super.M2;
}
module M2 {
private var x = 1;
module M3 {
public use super;
}
}
use M1;
use M2.M3;
writeln(x)
```
Moreover, a `public use` makes these definitions part
of the module that does the `use` -- that is, `M1` would now contain the
definitions from `M2`. However, since `M1` is not defined inside of `M2`, it
isn't able to access its private variables (like `x` on line 5), so this
particular use statement leaves `M1` just containing (a reference to) `M3`.
The `public use` on line