Add initial draft of alloy article
This commit is contained in:
parent
f579641866
commit
9ddd2dd3bc
163
code/dyno-alloy/DynoAlloy.als
Normal file
163
code/dyno-alloy/DynoAlloy.als
Normal file
|
@ -0,0 +1,163 @@
|
||||||
|
enum Flag {Method, MethodOrField, Public}
|
||||||
|
|
||||||
|
/* There is a negative version for each flag (METHOD and NOT_METHOD).
|
||||||
|
Model this as two sets, one of positive flags, and one of netative flags,
|
||||||
|
and interpret the bitfield to be a conjunction of both flags. */
|
||||||
|
sig Bitfield {
|
||||||
|
, positiveFlags: set Flag
|
||||||
|
, negativeFlags: set Flag
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A filter state has filterFlags and excludeFlags, both represented as conjunctions. */
|
||||||
|
sig FilterState {
|
||||||
|
, include: Bitfield
|
||||||
|
, exclude: Bitfield
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initially, no search has happeneed for a scope, so its 'found' is not set to anything. */
|
||||||
|
one sig NotSet {}
|
||||||
|
|
||||||
|
/* Finally, there's a search state (whether or not a particular scope has already been
|
||||||
|
searched with a particular configuration). */
|
||||||
|
one sig SearchState {
|
||||||
|
, var found: Bitfield + NotSet
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pred bitfieldEmpty[b: Bitfield] {
|
||||||
|
#b.positiveFlags = 0 and #b.negativeFlags = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
pred bitfieldEqual[b1: Bitfield, b2: Bitfield] {
|
||||||
|
b1.positiveFlags = b2.positiveFlags and b1.negativeFlags = b2.negativeFlags
|
||||||
|
}
|
||||||
|
|
||||||
|
pred bitfieldIntersection[b1: Bitfield, b2: Bitfield, b3: Bitfield] {
|
||||||
|
b3.positiveFlags = b1.positiveFlags & b2.positiveFlags
|
||||||
|
b3.negativeFlags = b1.negativeFlags & b2.negativeFlags
|
||||||
|
}
|
||||||
|
|
||||||
|
pred bitfieldSubset[b1: Bitfield, b2: Bitfield] {
|
||||||
|
b1.positiveFlags in b2.positiveFlags
|
||||||
|
b1.negativeFlags in b2.negativeFlags
|
||||||
|
}
|
||||||
|
|
||||||
|
pred bitfieldIncomparable[b1: Bitfield, b2: Bitfield] {
|
||||||
|
not bitfieldSubset[b1, b2]
|
||||||
|
not bitfieldSubset[b2, b1]
|
||||||
|
}
|
||||||
|
|
||||||
|
pred addBitfieldFlag[b1: Bitfield, b2: Bitfield, flag: Flag] {
|
||||||
|
b2.positiveFlags = b1.positiveFlags + flag
|
||||||
|
b2.negativeFlags = b1.negativeFlags
|
||||||
|
}
|
||||||
|
|
||||||
|
pred addBitfieldFlagNeg[b1: Bitfield, b2: Bitfield, flag: Flag] {
|
||||||
|
b2.negativeFlags = b1.negativeFlags + flag
|
||||||
|
b2.positiveFlags = b1.positiveFlags
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Property { PMethod, PField, PPublic }
|
||||||
|
|
||||||
|
sig Props {
|
||||||
|
properties: set Property
|
||||||
|
}
|
||||||
|
|
||||||
|
pred flagMatchesPropery[flag: Flag, property: Property] {
|
||||||
|
(flag = Method and property = PMethod) or
|
||||||
|
(flag = MethodOrField and (property = PMethod or property = PField)) or
|
||||||
|
(flag = Public and property = PPublic)
|
||||||
|
}
|
||||||
|
|
||||||
|
pred bitfieldMatchesProperties[bitfield: Bitfield, props: Props] {
|
||||||
|
// All positive flags must be satisifed
|
||||||
|
all flag: bitfield.positiveFlags | some property: props.properties | flagMatchesPropery[flag, property]
|
||||||
|
// All negative flags must not be satisfied
|
||||||
|
all flag: bitfield.negativeFlags | no property: props.properties | flagMatchesPropery[flag, property]
|
||||||
|
}
|
||||||
|
|
||||||
|
pred configureState[filterState: FilterState] {
|
||||||
|
some initialState: FilterState {
|
||||||
|
// Each lookup in scope starts with empty filter and exclude flags
|
||||||
|
bitfieldEmpty[initialState.include] and bitfieldEmpty[initialState.exclude]
|
||||||
|
|
||||||
|
// The intermediate states (bf1) are used for sequencing of operations.
|
||||||
|
some bf1 : Bitfield {
|
||||||
|
// Add "Public" depending on skipPrivateVisibilities
|
||||||
|
addBitfieldFlag[initialState.include, bf1, Public] or
|
||||||
|
bitfieldEqual[initialState.include, bf1]
|
||||||
|
|
||||||
|
// If it's a method receiver, add method or field restriction
|
||||||
|
addBitfieldFlag[bf1, filterState.include, MethodOrField] or
|
||||||
|
// if it's not a receiver, filter to non-methods (could be overridden)
|
||||||
|
addBitfieldFlagNeg[bf1, filterState.include, Method] or
|
||||||
|
// Maybe methods are not being included but it's not a receiver, so no change.
|
||||||
|
bitfieldEqual[bf1, filterState.include]
|
||||||
|
}
|
||||||
|
// Exclude filter doesn't change here
|
||||||
|
initialState.exclude = filterState.exclude
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pred oldUpdate[toSet: Bitfield + NotSet, setTo: FilterState] {
|
||||||
|
toSet' in Bitfield and bitfieldIntersection[toSet, setTo.include, toSet']
|
||||||
|
}
|
||||||
|
|
||||||
|
pred newUpdate[toSet: Bitfield + NotSet, setTo: FilterState] {
|
||||||
|
(not bitfieldIncomparable[toSet, setTo.include] and oldUpdate[toSet, setTo]) or
|
||||||
|
(bitfieldIncomparable[toSet, setTo.include] and toSet = toSet')
|
||||||
|
}
|
||||||
|
|
||||||
|
pred updateOrSet[toSet: Bitfield + NotSet, setTo: FilterState] {
|
||||||
|
(toSet in NotSet and toSet' = setTo.include) or
|
||||||
|
(toSet not in NotSet and oldUpdate[toSet, setTo])
|
||||||
|
}
|
||||||
|
|
||||||
|
pred excludeBitfield[found: Bitfield + NotSet, exclude: Bitfield] {
|
||||||
|
(found != NotSet and bitfieldEqual[found, exclude]) or
|
||||||
|
(found = NotSet and bitfieldEmpty[exclude])
|
||||||
|
}
|
||||||
|
|
||||||
|
fact init {
|
||||||
|
all searchState: SearchState | searchState.found = NotSet
|
||||||
|
}
|
||||||
|
|
||||||
|
fact step {
|
||||||
|
always {
|
||||||
|
// Model that a new doLookupInScope could've occurred, with any combination of flags.
|
||||||
|
all searchState: SearchState {
|
||||||
|
some fs: FilterState {
|
||||||
|
// This is a possible combination of lookup flags
|
||||||
|
configureState[fs]
|
||||||
|
|
||||||
|
// If a search has been performed before, take the intersection; otherwise,
|
||||||
|
// just insert the current filter flags.
|
||||||
|
updateOrSet[searchState.found, fs]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
example: run {
|
||||||
|
all searchState: SearchState {
|
||||||
|
// a way that subsequent results of searching it will miss things.
|
||||||
|
eventually some props: Props, fs: FilterState, fsBroken: FilterState, exclude: Bitfield {
|
||||||
|
// Some search (fs) will cause a transition / modification of the search state...
|
||||||
|
configureState[fs]
|
||||||
|
updateOrSet[searchState.found, fs]
|
||||||
|
// Such that a later, valid search... (fsBroken)
|
||||||
|
configureState[fsBroken]
|
||||||
|
excludeBitfield[searchState.found', exclude]
|
||||||
|
|
||||||
|
// Will allow for a set of properties...
|
||||||
|
// ... that are left out of the original search...
|
||||||
|
not bitfieldMatchesProperties[searchState.found, props]
|
||||||
|
// ... and out of the current search
|
||||||
|
not (bitfieldMatchesProperties[fs.include, props] and not bitfieldMatchesProperties[searchState.found, props])
|
||||||
|
// But would be matched by the broken search...
|
||||||
|
bitfieldMatchesProperties[fsBroken.include, props]
|
||||||
|
// ... to not be matched by a search with the new state:
|
||||||
|
not (bitfieldMatchesProperties[fsBroken.include, props] and not bitfieldMatchesProperties[exclude, props])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
277
content/blog/dyno_alloy.md
Normal file
277
content/blog/dyno_alloy.md
Normal file
|
@ -0,0 +1,277 @@
|
||||||
|
---
|
||||||
|
title: "Proving My Compiler Code Incorrect With Alloy"
|
||||||
|
date: 2023-05-02T22:48:52-07:00
|
||||||
|
tags: ["Compilers", "Alloy"]
|
||||||
|
draft: true
|
||||||
|
---
|
||||||
|
|
||||||
|
{{< todo >}}
|
||||||
|
Intro section and disclaimer
|
||||||
|
{{< /todo >}}
|
||||||
|
|
||||||
|
I work as a compiler developer on the [Chapel](https://chapel-lang.org) team.
|
||||||
|
One of the things that a language like Chapel has to do is called _resolution_,
|
||||||
|
which is the process of figuring out what each identifier, like `x`, refers to,
|
||||||
|
and what its type is. Even the first part of that is pretty complicated, what
|
||||||
|
with public and private variables, methods (which can be decalred outside
|
||||||
|
of their receiver type in Chapel), and more...
|
||||||
|
|
||||||
|
Scope resolution in Chapel is further complicated by the fact that the same
|
||||||
|
scope might need to be searched multiple times, in different contexts. Let
|
||||||
|
me start with a few examples to illustrate what I mean. Here's the first
|
||||||
|
program:
|
||||||
|
|
||||||
|
```Chapel {linenos=true}
|
||||||
|
module M {
|
||||||
|
class C {}
|
||||||
|
|
||||||
|
// A regular procedure (not a method)
|
||||||
|
proc foo() {}
|
||||||
|
|
||||||
|
// A method on C.
|
||||||
|
proc C.foo() {}
|
||||||
|
|
||||||
|
// Another method on C.
|
||||||
|
proc C.doSomething() {
|
||||||
|
foo();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
If you don't know Chapel (and you probably don't!) this program already merits a fair
|
||||||
|
bit of explanation. A _module_ in Chapel (declared via a `module` keyword)
|
||||||
|
is just a collection of definitions. Such definitions could include variables,
|
||||||
|
methods, classes and more. Putting them in a module helps group them.
|
||||||
|
|
||||||
|
{{< todo >}}
|
||||||
|
Write the rest of this explanation.
|
||||||
|
{{< /todo >}}
|
||||||
|
|
||||||
|
The interesting part of the snippet is the body of the `doSomething` method.
|
||||||
|
It has a call to `foo`: but which `foo` is it referring to? There are two:
|
||||||
|
the regular procedure (non-method) `foo`, declared on line 5, and the
|
||||||
|
method `C.foo` declared on line 8. In Chapel, the rules dictate that when
|
||||||
|
such a situation arises, and a fitting method is found, the method is
|
||||||
|
preferred to the non-method. In the rewritten version of the Chapel compiler,
|
||||||
|
titled Dyno, this disambiguation is achieved by first searching the scopes
|
||||||
|
visible from the class `C` for methods only. In this particular example,
|
||||||
|
the two scopes searched will be:
|
||||||
|
|
||||||
|
1. The inside of class `C`. The class itself doesn't define any methods, so
|
||||||
|
nothing is found.
|
||||||
|
2. The module in which `C` is defined (`M` in this case). This module does
|
||||||
|
have a method, the one on line 8, so that one is returned.
|
||||||
|
|
||||||
|
Only if methods are not found are non-methods considered. In this situation,
|
||||||
|
the search order will be as follows:
|
||||||
|
|
||||||
|
1. The inside of `C.doSomething` will be searched. `doSomething` doesn't declare
|
||||||
|
anything, so the search will come up empty.
|
||||||
|
2. The module in which `C.doSomething` is defined (`M` again) will be searched. This time,
|
||||||
|
both methods and non-methods will be considered. Since we're considering
|
||||||
|
a hypothetical situation in which the method `C.foo` isn't there (otherwise
|
||||||
|
it would've been found earlier), the only thing that will be found will
|
||||||
|
be the non-method `foo`.
|
||||||
|
|
||||||
|
Notice that we've already had to search the module `M` twice, looking for
|
||||||
|
different things each time. First, we were looking for only method, but
|
||||||
|
later, we were looking for anything. However, this isn't as complicated as
|
||||||
|
things can get. The simplifying aspect of this program is that both `doSomething`
|
||||||
|
and `C` are defined inside the class `C`, and therefore have access to its
|
||||||
|
private methods and procedures. If we extracted `C.doSomething` into its
|
||||||
|
own separate module, the program would look like this.
|
||||||
|
|
||||||
|
```Chapel {linenos=true}
|
||||||
|
module M1 {
|
||||||
|
class C {}
|
||||||
|
|
||||||
|
// A regular procedure (not a method)
|
||||||
|
proc foo() {}
|
||||||
|
|
||||||
|
// A method on C.
|
||||||
|
proc C.foo() {}
|
||||||
|
}
|
||||||
|
module M2 {
|
||||||
|
use super.M1;
|
||||||
|
|
||||||
|
// Another method on C.
|
||||||
|
proc C.doSomething() {
|
||||||
|
foo();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Since `doSomething` is now in another module, it can't just access the `foo`s from
|
||||||
|
`M1` willy-nilly. There are a few ways to get the things that were declared in another module out
|
||||||
|
and make use of them. I opted for a `use` statement, which, in its simplest form,
|
||||||
|
just brings all the declarations inside the `use`d module into the current scope. Thus,
|
||||||
|
the `use` statement on line 11 would bring all things declared in `M1` into
|
||||||
|
the scope inside `M2`. There's a catch, though: since `M2` is not declared
|
||||||
|
inside `M1`, a `use` statement will not be able to bring in _private_ symbols
|
||||||
|
from `M1` (they're private for a reason!). So, this time, when searching the scope
|
||||||
|
for `M1`, we will have to search only for public symbols. That's another,
|
||||||
|
different way of searching `M1`. So far, we've seen three:
|
||||||
|
|
||||||
|
* Search `M1` for any symbol.
|
||||||
|
* Search `M1` for methods only.
|
||||||
|
* Search `M1` for public symbols only.
|
||||||
|
|
||||||
|
In Dyno, there are even more different ways of searching a single scope, and
|
||||||
|
some of them are mixes of others (one might consider, for instance, searching
|
||||||
|
for only public methods). To represent the various search configurations,
|
||||||
|
the Dyno team came up with using a bitset of _flags_, each of which indicated
|
||||||
|
a necessary condition for a symbol to be returned. A bitset with flags set
|
||||||
|
for two properties (like "public" and "method") requires that both such
|
||||||
|
properties be found on each symbol that's returned from a scope. This led to
|
||||||
|
C++ code along the lines of:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
auto allPublicSymbols = Flags::PUBLIC;
|
||||||
|
auto allPublicMethods = Flags::PUBLIC | Flags::METHOD;
|
||||||
|
```
|
||||||
|
|
||||||
|
It also turned out convenient to add negative versions of each flag
|
||||||
|
(`NOT_PUBLIC` for private symbols, `NOT_METHOD` for regular old procedures
|
||||||
|
and other definitions, and so on. So, some other possible flag combinations
|
||||||
|
include:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
auto allNonMethods = Flags::NOT_METHOD;
|
||||||
|
auto privateMethods = Flags::NOT_PUBLIC | Flags::METHOD;
|
||||||
|
```
|
||||||
|
|
||||||
|
Given these flags, there are some situations in which checking a scope a
|
||||||
|
second time is redundant, in that it is guaranteed to find no additional
|
||||||
|
symbols. For instance, if you search a scope for all public symbols, and
|
||||||
|
then subsequently search for all public methods, you will only find
|
||||||
|
duplicates -- after all, all public methods are public symbols. Most
|
||||||
|
generally, this occurs when a second search has all the flags from a
|
||||||
|
previous search, and maybe more. In math lingo, if the set of flags checked
|
||||||
|
the first time is a subset of the set of flags checked the second time,
|
||||||
|
it's guaranteed not to find anything new.
|
||||||
|
|
||||||
|
In Dyno, we like to avoid additional work when we can. To do so, we track
|
||||||
|
which scopes have already been searched, and avoid searching them again.
|
||||||
|
Since what comes up from a search depends on the flags, we store the flags
|
||||||
|
alongside the scopes we've checked. __If we find that the previously-checked
|
||||||
|
bitset is a subset of the current biset, we just skip the search__.
|
||||||
|
|
||||||
|
But then, what if it _isn't_ a subset? Another concern here is avoiding
|
||||||
|
duplicate results (it's easier to check for duplicate definitions if you
|
||||||
|
know a symbol is only returned from a search once). So, another feature of
|
||||||
|
Dyno's scope search is an additional bitset of what to _exclude_, which we
|
||||||
|
set to be the previous search's filter. So if the first search looked for
|
||||||
|
symbols matching description \\(A\\), and the second search is supposed to
|
||||||
|
look for symbols matching description \\(B\\), __then really we do a search
|
||||||
|
for \\(A \\land \\lnot B\\) (that is, \\(A\\) and not \\(B\\))__.
|
||||||
|
|
||||||
|
{{< dialog >}}
|
||||||
|
{{< message "question" "reader" >}}
|
||||||
|
Hold on, why do you need a whole another bitset? There are already negated
|
||||||
|
versions of each flag available. Can't you just add those to the filter?
|
||||||
|
{{< /message >}}
|
||||||
|
{{< message "answer" "Daniel" >}}
|
||||||
|
Good question. The difference is a little bit tricky. If we just negated
|
||||||
|
each flag, we'd turn an expression like \(A \land B\) into \(\lnot A \land
|
||||||
|
\lnot B\). However, according to De Morgan's laws, the proper negation of
|
||||||
|
\(A \land B\) is \(\lnot A \lor \lnot B\) (notice the use of "or" instead
|
||||||
|
of "and"). On the other hand, using an "exclude" bitset negates the whole
|
||||||
|
conjunction, rather than the individual flags, and so gives us the result
|
||||||
|
we need.
|
||||||
|
{{< /message >}}
|
||||||
|
{{< /dialog >}}
|
||||||
|
|
||||||
|
One last thing: what happens if there were two previous searches? What we
|
||||||
|
need is to to somehow combine the two filters into one. Taking a cue from
|
||||||
|
a previous example, in which "public" was followed by "public methods", we
|
||||||
|
can observe that since the second search has additional flags, it's more
|
||||||
|
restrictive, and thus guaranteed to not find anything. __So we try to create
|
||||||
|
the least restrictive bitset possible, by taking an intersection of the
|
||||||
|
flags used.__
|
||||||
|
|
||||||
|
Actually, that last point is not quite correct in every possible case
|
||||||
|
(taking the intersection is not always the right thing to do).
|
||||||
|
However, running the code through our test suite, we did not notice any
|
||||||
|
cases in which it misbehaved. So, noting the potential issue in a comment,
|
||||||
|
we moved on to other things.
|
||||||
|
|
||||||
|
That is, until I decided that it was time to add another possible flag to
|
||||||
|
the bitset. At that point, sitting and trying to reason about the
|
||||||
|
possible cases, I realized that it would be much nicer to describe this
|
||||||
|
mathematically, and have a model checker generate outlandish scenarios for
|
||||||
|
me. Having at some point seen [Hillel Wayne's
|
||||||
|
post](https://www.hillelwayne.com/post/alloy6/) about the release of
|
||||||
|
[Alloy 6](https://alloytools.org/), I thought I'd give it a go. I'd never
|
||||||
|
touched alloy before this, so be warned: this is what I came up with on my
|
||||||
|
own attempt.
|
||||||
|
|
||||||
|
### Modeling Flags and Bitsets in Alloy
|
||||||
|
Flags are represented on the C++ side as an `enum` (with custom indexing so
|
||||||
|
as to make each flag be exactly one bit). I checked, and it looked like
|
||||||
|
Alloy had an `enum` feature, too! I started off by making an enum of the
|
||||||
|
flags I wanted to play with.
|
||||||
|
|
||||||
|
{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 1 1 >}}
|
||||||
|
|
||||||
|
We haven't seen the `MethodOrField` flag, but it's an important one. It
|
||||||
|
turns out that it's much more common to look for anything that could be
|
||||||
|
part of a class, rather than just its methods. This flag is itself an "or"
|
||||||
|
of two properties (something being a method and something being a class
|
||||||
|
field). Note that this is not the same as having two flags, `Method` and
|
||||||
|
`Field`, and always including them together (because that would be an
|
||||||
|
"and", not an "or").
|
||||||
|
|
||||||
|
Notice also that the list of flags doesn't include the negative versions.
|
||||||
|
Since the negative versions are one-for-one with the positive ones, I
|
||||||
|
instead chose to represent bitsets as simply two sets: one set of
|
||||||
|
"positive" flags, in which the presence of e.g. `Method` indicates that the
|
||||||
|
`METHOD` flag was set, and one set of "negative" flags, in which the
|
||||||
|
presence of `Method` indicates that `NOT_METHOD` was set. This way, I'm
|
||||||
|
guaranteed that there's a positive and negative version of each flag,
|
||||||
|
automatically. Here's how I wrote that in Alloy.
|
||||||
|
|
||||||
|
{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 6 9 >}}
|
||||||
|
|
||||||
|
{{< todo >}}
|
||||||
|
The rest of the article
|
||||||
|
{{< /todo >}}
|
||||||
|
|
||||||
|
### Scratch Work
|
||||||
|
{{< todo >}}
|
||||||
|
This section is temporary
|
||||||
|
{{< /todo >}}
|
||||||
|
|
||||||
|
a small-ish program to illustrate what I mean.
|
||||||
|
|
||||||
|
```Chapel
|
||||||
|
module M {
|
||||||
|
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```Chapel {linenos=true}
|
||||||
|
module M1 {
|
||||||
|
public use super.M2;
|
||||||
|
}
|
||||||
|
module M2 {
|
||||||
|
private var x = 1;
|
||||||
|
|
||||||
|
module M3 {
|
||||||
|
public use super;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
use M1;
|
||||||
|
use M2.M3;
|
||||||
|
writeln(x)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Moreover, a `public use` makes these definitions part
|
||||||
|
of the module that does the `use` -- that is, `M1` would now contain the
|
||||||
|
definitions from `M2`. However, since `M1` is not defined inside of `M2`, it
|
||||||
|
isn't able to access its private variables (like `x` on line 5), so this
|
||||||
|
particular use statement leaves `M1` just containing (a reference to) `M3`.
|
||||||
|
|
||||||
|
The `public use` on line
|
Loading…
Reference in New Issue
Block a user