Add initial draft of alloy article
This commit is contained in:
parent
f579641866
commit
9ddd2dd3bc
163
code/dyno-alloy/DynoAlloy.als
Normal file
163
code/dyno-alloy/DynoAlloy.als
Normal file
@ -0,0 +1,163 @@
|
||||
enum Flag {Method, MethodOrField, Public}
|
||||
|
||||
/* There is a negative version for each flag (METHOD and NOT_METHOD).
|
||||
Model this as two sets, one of positive flags, and one of netative flags,
|
||||
and interpret the bitfield to be a conjunction of both flags. */
|
||||
sig Bitfield {
|
||||
, positiveFlags: set Flag
|
||||
, negativeFlags: set Flag
|
||||
}
|
||||
|
||||
/* A filter state has filterFlags and excludeFlags, both represented as conjunctions. */
|
||||
sig FilterState {
|
||||
, include: Bitfield
|
||||
, exclude: Bitfield
|
||||
}
|
||||
|
||||
/* Initially, no search has happeneed for a scope, so its 'found' is not set to anything. */
|
||||
one sig NotSet {}
|
||||
|
||||
/* Finally, there's a search state (whether or not a particular scope has already been
|
||||
searched with a particular configuration). */
|
||||
one sig SearchState {
|
||||
, var found: Bitfield + NotSet
|
||||
}
|
||||
|
||||
|
||||
pred bitfieldEmpty[b: Bitfield] {
|
||||
#b.positiveFlags = 0 and #b.negativeFlags = 0
|
||||
}
|
||||
|
||||
pred bitfieldEqual[b1: Bitfield, b2: Bitfield] {
|
||||
b1.positiveFlags = b2.positiveFlags and b1.negativeFlags = b2.negativeFlags
|
||||
}
|
||||
|
||||
pred bitfieldIntersection[b1: Bitfield, b2: Bitfield, b3: Bitfield] {
|
||||
b3.positiveFlags = b1.positiveFlags & b2.positiveFlags
|
||||
b3.negativeFlags = b1.negativeFlags & b2.negativeFlags
|
||||
}
|
||||
|
||||
pred bitfieldSubset[b1: Bitfield, b2: Bitfield] {
|
||||
b1.positiveFlags in b2.positiveFlags
|
||||
b1.negativeFlags in b2.negativeFlags
|
||||
}
|
||||
|
||||
pred bitfieldIncomparable[b1: Bitfield, b2: Bitfield] {
|
||||
not bitfieldSubset[b1, b2]
|
||||
not bitfieldSubset[b2, b1]
|
||||
}
|
||||
|
||||
pred addBitfieldFlag[b1: Bitfield, b2: Bitfield, flag: Flag] {
|
||||
b2.positiveFlags = b1.positiveFlags + flag
|
||||
b2.negativeFlags = b1.negativeFlags
|
||||
}
|
||||
|
||||
pred addBitfieldFlagNeg[b1: Bitfield, b2: Bitfield, flag: Flag] {
|
||||
b2.negativeFlags = b1.negativeFlags + flag
|
||||
b2.positiveFlags = b1.positiveFlags
|
||||
}
|
||||
|
||||
enum Property { PMethod, PField, PPublic }
|
||||
|
||||
sig Props {
|
||||
properties: set Property
|
||||
}
|
||||
|
||||
pred flagMatchesPropery[flag: Flag, property: Property] {
|
||||
(flag = Method and property = PMethod) or
|
||||
(flag = MethodOrField and (property = PMethod or property = PField)) or
|
||||
(flag = Public and property = PPublic)
|
||||
}
|
||||
|
||||
pred bitfieldMatchesProperties[bitfield: Bitfield, props: Props] {
|
||||
// All positive flags must be satisifed
|
||||
all flag: bitfield.positiveFlags | some property: props.properties | flagMatchesPropery[flag, property]
|
||||
// All negative flags must not be satisfied
|
||||
all flag: bitfield.negativeFlags | no property: props.properties | flagMatchesPropery[flag, property]
|
||||
}
|
||||
|
||||
pred configureState[filterState: FilterState] {
|
||||
some initialState: FilterState {
|
||||
// Each lookup in scope starts with empty filter and exclude flags
|
||||
bitfieldEmpty[initialState.include] and bitfieldEmpty[initialState.exclude]
|
||||
|
||||
// The intermediate states (bf1) are used for sequencing of operations.
|
||||
some bf1 : Bitfield {
|
||||
// Add "Public" depending on skipPrivateVisibilities
|
||||
addBitfieldFlag[initialState.include, bf1, Public] or
|
||||
bitfieldEqual[initialState.include, bf1]
|
||||
|
||||
// If it's a method receiver, add method or field restriction
|
||||
addBitfieldFlag[bf1, filterState.include, MethodOrField] or
|
||||
// if it's not a receiver, filter to non-methods (could be overridden)
|
||||
addBitfieldFlagNeg[bf1, filterState.include, Method] or
|
||||
// Maybe methods are not being included but it's not a receiver, so no change.
|
||||
bitfieldEqual[bf1, filterState.include]
|
||||
}
|
||||
// Exclude filter doesn't change here
|
||||
initialState.exclude = filterState.exclude
|
||||
}
|
||||
}
|
||||
|
||||
pred oldUpdate[toSet: Bitfield + NotSet, setTo: FilterState] {
|
||||
toSet' in Bitfield and bitfieldIntersection[toSet, setTo.include, toSet']
|
||||
}
|
||||
|
||||
pred newUpdate[toSet: Bitfield + NotSet, setTo: FilterState] {
|
||||
(not bitfieldIncomparable[toSet, setTo.include] and oldUpdate[toSet, setTo]) or
|
||||
(bitfieldIncomparable[toSet, setTo.include] and toSet = toSet')
|
||||
}
|
||||
|
||||
pred updateOrSet[toSet: Bitfield + NotSet, setTo: FilterState] {
|
||||
(toSet in NotSet and toSet' = setTo.include) or
|
||||
(toSet not in NotSet and oldUpdate[toSet, setTo])
|
||||
}
|
||||
|
||||
pred excludeBitfield[found: Bitfield + NotSet, exclude: Bitfield] {
|
||||
(found != NotSet and bitfieldEqual[found, exclude]) or
|
||||
(found = NotSet and bitfieldEmpty[exclude])
|
||||
}
|
||||
|
||||
fact init {
|
||||
all searchState: SearchState | searchState.found = NotSet
|
||||
}
|
||||
|
||||
fact step {
|
||||
always {
|
||||
// Model that a new doLookupInScope could've occurred, with any combination of flags.
|
||||
all searchState: SearchState {
|
||||
some fs: FilterState {
|
||||
// This is a possible combination of lookup flags
|
||||
configureState[fs]
|
||||
|
||||
// If a search has been performed before, take the intersection; otherwise,
|
||||
// just insert the current filter flags.
|
||||
updateOrSet[searchState.found, fs]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
example: run {
|
||||
all searchState: SearchState {
|
||||
// a way that subsequent results of searching it will miss things.
|
||||
eventually some props: Props, fs: FilterState, fsBroken: FilterState, exclude: Bitfield {
|
||||
// Some search (fs) will cause a transition / modification of the search state...
|
||||
configureState[fs]
|
||||
updateOrSet[searchState.found, fs]
|
||||
// Such that a later, valid search... (fsBroken)
|
||||
configureState[fsBroken]
|
||||
excludeBitfield[searchState.found', exclude]
|
||||
|
||||
// Will allow for a set of properties...
|
||||
// ... that are left out of the original search...
|
||||
not bitfieldMatchesProperties[searchState.found, props]
|
||||
// ... and out of the current search
|
||||
not (bitfieldMatchesProperties[fs.include, props] and not bitfieldMatchesProperties[searchState.found, props])
|
||||
// But would be matched by the broken search...
|
||||
bitfieldMatchesProperties[fsBroken.include, props]
|
||||
// ... to not be matched by a search with the new state:
|
||||
not (bitfieldMatchesProperties[fsBroken.include, props] and not bitfieldMatchesProperties[exclude, props])
|
||||
}
|
||||
}
|
||||
}
|
277
content/blog/dyno_alloy.md
Normal file
277
content/blog/dyno_alloy.md
Normal file
@ -0,0 +1,277 @@
|
||||
---
|
||||
title: "Proving My Compiler Code Incorrect With Alloy"
|
||||
date: 2023-05-02T22:48:52-07:00
|
||||
tags: ["Compilers", "Alloy"]
|
||||
draft: true
|
||||
---
|
||||
|
||||
{{< todo >}}
|
||||
Intro section and disclaimer
|
||||
{{< /todo >}}
|
||||
|
||||
I work as a compiler developer on the [Chapel](https://chapel-lang.org) team.
|
||||
One of the things that a language like Chapel has to do is called _resolution_,
|
||||
which is the process of figuring out what each identifier, like `x`, refers to,
|
||||
and what its type is. Even the first part of that is pretty complicated, what
|
||||
with public and private variables, methods (which can be decalred outside
|
||||
of their receiver type in Chapel), and more...
|
||||
|
||||
Scope resolution in Chapel is further complicated by the fact that the same
|
||||
scope might need to be searched multiple times, in different contexts. Let
|
||||
me start with a few examples to illustrate what I mean. Here's the first
|
||||
program:
|
||||
|
||||
```Chapel {linenos=true}
|
||||
module M {
|
||||
class C {}
|
||||
|
||||
// A regular procedure (not a method)
|
||||
proc foo() {}
|
||||
|
||||
// A method on C.
|
||||
proc C.foo() {}
|
||||
|
||||
// Another method on C.
|
||||
proc C.doSomething() {
|
||||
foo();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you don't know Chapel (and you probably don't!) this program already merits a fair
|
||||
bit of explanation. A _module_ in Chapel (declared via a `module` keyword)
|
||||
is just a collection of definitions. Such definitions could include variables,
|
||||
methods, classes and more. Putting them in a module helps group them.
|
||||
|
||||
{{< todo >}}
|
||||
Write the rest of this explanation.
|
||||
{{< /todo >}}
|
||||
|
||||
The interesting part of the snippet is the body of the `doSomething` method.
|
||||
It has a call to `foo`: but which `foo` is it referring to? There are two:
|
||||
the regular procedure (non-method) `foo`, declared on line 5, and the
|
||||
method `C.foo` declared on line 8. In Chapel, the rules dictate that when
|
||||
such a situation arises, and a fitting method is found, the method is
|
||||
preferred to the non-method. In the rewritten version of the Chapel compiler,
|
||||
titled Dyno, this disambiguation is achieved by first searching the scopes
|
||||
visible from the class `C` for methods only. In this particular example,
|
||||
the two scopes searched will be:
|
||||
|
||||
1. The inside of class `C`. The class itself doesn't define any methods, so
|
||||
nothing is found.
|
||||
2. The module in which `C` is defined (`M` in this case). This module does
|
||||
have a method, the one on line 8, so that one is returned.
|
||||
|
||||
Only if methods are not found are non-methods considered. In this situation,
|
||||
the search order will be as follows:
|
||||
|
||||
1. The inside of `C.doSomething` will be searched. `doSomething` doesn't declare
|
||||
anything, so the search will come up empty.
|
||||
2. The module in which `C.doSomething` is defined (`M` again) will be searched. This time,
|
||||
both methods and non-methods will be considered. Since we're considering
|
||||
a hypothetical situation in which the method `C.foo` isn't there (otherwise
|
||||
it would've been found earlier), the only thing that will be found will
|
||||
be the non-method `foo`.
|
||||
|
||||
Notice that we've already had to search the module `M` twice, looking for
|
||||
different things each time. First, we were looking for only method, but
|
||||
later, we were looking for anything. However, this isn't as complicated as
|
||||
things can get. The simplifying aspect of this program is that both `doSomething`
|
||||
and `C` are defined inside the class `C`, and therefore have access to its
|
||||
private methods and procedures. If we extracted `C.doSomething` into its
|
||||
own separate module, the program would look like this.
|
||||
|
||||
```Chapel {linenos=true}
|
||||
module M1 {
|
||||
class C {}
|
||||
|
||||
// A regular procedure (not a method)
|
||||
proc foo() {}
|
||||
|
||||
// A method on C.
|
||||
proc C.foo() {}
|
||||
}
|
||||
module M2 {
|
||||
use super.M1;
|
||||
|
||||
// Another method on C.
|
||||
proc C.doSomething() {
|
||||
foo();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Since `doSomething` is now in another module, it can't just access the `foo`s from
|
||||
`M1` willy-nilly. There are a few ways to get the things that were declared in another module out
|
||||
and make use of them. I opted for a `use` statement, which, in its simplest form,
|
||||
just brings all the declarations inside the `use`d module into the current scope. Thus,
|
||||
the `use` statement on line 11 would bring all things declared in `M1` into
|
||||
the scope inside `M2`. There's a catch, though: since `M2` is not declared
|
||||
inside `M1`, a `use` statement will not be able to bring in _private_ symbols
|
||||
from `M1` (they're private for a reason!). So, this time, when searching the scope
|
||||
for `M1`, we will have to search only for public symbols. That's another,
|
||||
different way of searching `M1`. So far, we've seen three:
|
||||
|
||||
* Search `M1` for any symbol.
|
||||
* Search `M1` for methods only.
|
||||
* Search `M1` for public symbols only.
|
||||
|
||||
In Dyno, there are even more different ways of searching a single scope, and
|
||||
some of them are mixes of others (one might consider, for instance, searching
|
||||
for only public methods). To represent the various search configurations,
|
||||
the Dyno team came up with using a bitset of _flags_, each of which indicated
|
||||
a necessary condition for a symbol to be returned. A bitset with flags set
|
||||
for two properties (like "public" and "method") requires that both such
|
||||
properties be found on each symbol that's returned from a scope. This led to
|
||||
C++ code along the lines of:
|
||||
|
||||
```C++
|
||||
auto allPublicSymbols = Flags::PUBLIC;
|
||||
auto allPublicMethods = Flags::PUBLIC | Flags::METHOD;
|
||||
```
|
||||
|
||||
It also turned out convenient to add negative versions of each flag
|
||||
(`NOT_PUBLIC` for private symbols, `NOT_METHOD` for regular old procedures
|
||||
and other definitions, and so on. So, some other possible flag combinations
|
||||
include:
|
||||
|
||||
```C++
|
||||
auto allNonMethods = Flags::NOT_METHOD;
|
||||
auto privateMethods = Flags::NOT_PUBLIC | Flags::METHOD;
|
||||
```
|
||||
|
||||
Given these flags, there are some situations in which checking a scope a
|
||||
second time is redundant, in that it is guaranteed to find no additional
|
||||
symbols. For instance, if you search a scope for all public symbols, and
|
||||
then subsequently search for all public methods, you will only find
|
||||
duplicates -- after all, all public methods are public symbols. Most
|
||||
generally, this occurs when a second search has all the flags from a
|
||||
previous search, and maybe more. In math lingo, if the set of flags checked
|
||||
the first time is a subset of the set of flags checked the second time,
|
||||
it's guaranteed not to find anything new.
|
||||
|
||||
In Dyno, we like to avoid additional work when we can. To do so, we track
|
||||
which scopes have already been searched, and avoid searching them again.
|
||||
Since what comes up from a search depends on the flags, we store the flags
|
||||
alongside the scopes we've checked. __If we find that the previously-checked
|
||||
bitset is a subset of the current biset, we just skip the search__.
|
||||
|
||||
But then, what if it _isn't_ a subset? Another concern here is avoiding
|
||||
duplicate results (it's easier to check for duplicate definitions if you
|
||||
know a symbol is only returned from a search once). So, another feature of
|
||||
Dyno's scope search is an additional bitset of what to _exclude_, which we
|
||||
set to be the previous search's filter. So if the first search looked for
|
||||
symbols matching description \\(A\\), and the second search is supposed to
|
||||
look for symbols matching description \\(B\\), __then really we do a search
|
||||
for \\(A \\land \\lnot B\\) (that is, \\(A\\) and not \\(B\\))__.
|
||||
|
||||
{{< dialog >}}
|
||||
{{< message "question" "reader" >}}
|
||||
Hold on, why do you need a whole another bitset? There are already negated
|
||||
versions of each flag available. Can't you just add those to the filter?
|
||||
{{< /message >}}
|
||||
{{< message "answer" "Daniel" >}}
|
||||
Good question. The difference is a little bit tricky. If we just negated
|
||||
each flag, we'd turn an expression like \(A \land B\) into \(\lnot A \land
|
||||
\lnot B\). However, according to De Morgan's laws, the proper negation of
|
||||
\(A \land B\) is \(\lnot A \lor \lnot B\) (notice the use of "or" instead
|
||||
of "and"). On the other hand, using an "exclude" bitset negates the whole
|
||||
conjunction, rather than the individual flags, and so gives us the result
|
||||
we need.
|
||||
{{< /message >}}
|
||||
{{< /dialog >}}
|
||||
|
||||
One last thing: what happens if there were two previous searches? What we
|
||||
need is to to somehow combine the two filters into one. Taking a cue from
|
||||
a previous example, in which "public" was followed by "public methods", we
|
||||
can observe that since the second search has additional flags, it's more
|
||||
restrictive, and thus guaranteed to not find anything. __So we try to create
|
||||
the least restrictive bitset possible, by taking an intersection of the
|
||||
flags used.__
|
||||
|
||||
Actually, that last point is not quite correct in every possible case
|
||||
(taking the intersection is not always the right thing to do).
|
||||
However, running the code through our test suite, we did not notice any
|
||||
cases in which it misbehaved. So, noting the potential issue in a comment,
|
||||
we moved on to other things.
|
||||
|
||||
That is, until I decided that it was time to add another possible flag to
|
||||
the bitset. At that point, sitting and trying to reason about the
|
||||
possible cases, I realized that it would be much nicer to describe this
|
||||
mathematically, and have a model checker generate outlandish scenarios for
|
||||
me. Having at some point seen [Hillel Wayne's
|
||||
post](https://www.hillelwayne.com/post/alloy6/) about the release of
|
||||
[Alloy 6](https://alloytools.org/), I thought I'd give it a go. I'd never
|
||||
touched alloy before this, so be warned: this is what I came up with on my
|
||||
own attempt.
|
||||
|
||||
### Modeling Flags and Bitsets in Alloy
|
||||
Flags are represented on the C++ side as an `enum` (with custom indexing so
|
||||
as to make each flag be exactly one bit). I checked, and it looked like
|
||||
Alloy had an `enum` feature, too! I started off by making an enum of the
|
||||
flags I wanted to play with.
|
||||
|
||||
{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 1 1 >}}
|
||||
|
||||
We haven't seen the `MethodOrField` flag, but it's an important one. It
|
||||
turns out that it's much more common to look for anything that could be
|
||||
part of a class, rather than just its methods. This flag is itself an "or"
|
||||
of two properties (something being a method and something being a class
|
||||
field). Note that this is not the same as having two flags, `Method` and
|
||||
`Field`, and always including them together (because that would be an
|
||||
"and", not an "or").
|
||||
|
||||
Notice also that the list of flags doesn't include the negative versions.
|
||||
Since the negative versions are one-for-one with the positive ones, I
|
||||
instead chose to represent bitsets as simply two sets: one set of
|
||||
"positive" flags, in which the presence of e.g. `Method` indicates that the
|
||||
`METHOD` flag was set, and one set of "negative" flags, in which the
|
||||
presence of `Method` indicates that `NOT_METHOD` was set. This way, I'm
|
||||
guaranteed that there's a positive and negative version of each flag,
|
||||
automatically. Here's how I wrote that in Alloy.
|
||||
|
||||
{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 6 9 >}}
|
||||
|
||||
{{< todo >}}
|
||||
The rest of the article
|
||||
{{< /todo >}}
|
||||
|
||||
### Scratch Work
|
||||
{{< todo >}}
|
||||
This section is temporary
|
||||
{{< /todo >}}
|
||||
|
||||
a small-ish program to illustrate what I mean.
|
||||
|
||||
```Chapel
|
||||
module M {
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
```Chapel {linenos=true}
|
||||
module M1 {
|
||||
public use super.M2;
|
||||
}
|
||||
module M2 {
|
||||
private var x = 1;
|
||||
|
||||
module M3 {
|
||||
public use super;
|
||||
}
|
||||
}
|
||||
|
||||
use M1;
|
||||
use M2.M3;
|
||||
writeln(x)
|
||||
```
|
||||
|
||||
|
||||
Moreover, a `public use` makes these definitions part
|
||||
of the module that does the `use` -- that is, `M1` would now contain the
|
||||
definitions from `M2`. However, since `M1` is not defined inside of `M2`, it
|
||||
isn't able to access its private variables (like `x` on line 5), so this
|
||||
particular use statement leaves `M1` just containing (a reference to) `M3`.
|
||||
|
||||
The `public use` on line
|
Loading…
Reference in New Issue
Block a user