From 9ddd2dd3bcba96e563c5a02599967fc496a78019 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Thu, 4 May 2023 01:03:58 -0700 Subject: [PATCH] Add initial draft of alloy article --- code/dyno-alloy/DynoAlloy.als | 163 ++++++++++++++++++++ content/blog/dyno_alloy.md | 277 ++++++++++++++++++++++++++++++++++ 2 files changed, 440 insertions(+) create mode 100644 code/dyno-alloy/DynoAlloy.als create mode 100644 content/blog/dyno_alloy.md diff --git a/code/dyno-alloy/DynoAlloy.als b/code/dyno-alloy/DynoAlloy.als new file mode 100644 index 0000000..b51beb9 --- /dev/null +++ b/code/dyno-alloy/DynoAlloy.als @@ -0,0 +1,163 @@ +enum Flag {Method, MethodOrField, Public} + +/* There is a negative version for each flag (METHOD and NOT_METHOD). + Model this as two sets, one of positive flags, and one of netative flags, + and interpret the bitfield to be a conjunction of both flags. */ +sig Bitfield { + , positiveFlags: set Flag + , negativeFlags: set Flag +} + +/* A filter state has filterFlags and excludeFlags, both represented as conjunctions. */ +sig FilterState { + , include: Bitfield + , exclude: Bitfield +} + +/* Initially, no search has happeneed for a scope, so its 'found' is not set to anything. */ +one sig NotSet {} + +/* Finally, there's a search state (whether or not a particular scope has already been + searched with a particular configuration). */ +one sig SearchState { + , var found: Bitfield + NotSet +} + + +pred bitfieldEmpty[b: Bitfield] { + #b.positiveFlags = 0 and #b.negativeFlags = 0 +} + +pred bitfieldEqual[b1: Bitfield, b2: Bitfield] { + b1.positiveFlags = b2.positiveFlags and b1.negativeFlags = b2.negativeFlags +} + +pred bitfieldIntersection[b1: Bitfield, b2: Bitfield, b3: Bitfield] { + b3.positiveFlags = b1.positiveFlags & b2.positiveFlags + b3.negativeFlags = b1.negativeFlags & b2.negativeFlags +} + +pred bitfieldSubset[b1: Bitfield, b2: Bitfield] { + b1.positiveFlags in b2.positiveFlags + b1.negativeFlags in b2.negativeFlags +} + +pred bitfieldIncomparable[b1: Bitfield, b2: Bitfield] { + not bitfieldSubset[b1, b2] + not bitfieldSubset[b2, b1] +} + +pred addBitfieldFlag[b1: Bitfield, b2: Bitfield, flag: Flag] { + b2.positiveFlags = b1.positiveFlags + flag + b2.negativeFlags = b1.negativeFlags +} + +pred addBitfieldFlagNeg[b1: Bitfield, b2: Bitfield, flag: Flag] { + b2.negativeFlags = b1.negativeFlags + flag + b2.positiveFlags = b1.positiveFlags +} + +enum Property { PMethod, PField, PPublic } + +sig Props { + properties: set Property +} + +pred flagMatchesPropery[flag: Flag, property: Property] { + (flag = Method and property = PMethod) or + (flag = MethodOrField and (property = PMethod or property = PField)) or + (flag = Public and property = PPublic) +} + +pred bitfieldMatchesProperties[bitfield: Bitfield, props: Props] { + // All positive flags must be satisifed + all flag: bitfield.positiveFlags | some property: props.properties | flagMatchesPropery[flag, property] + // All negative flags must not be satisfied + all flag: bitfield.negativeFlags | no property: props.properties | flagMatchesPropery[flag, property] +} + +pred configureState[filterState: FilterState] { + some initialState: FilterState { + // Each lookup in scope starts with empty filter and exclude flags + bitfieldEmpty[initialState.include] and bitfieldEmpty[initialState.exclude] + + // The intermediate states (bf1) are used for sequencing of operations. + some bf1 : Bitfield { + // Add "Public" depending on skipPrivateVisibilities + addBitfieldFlag[initialState.include, bf1, Public] or + bitfieldEqual[initialState.include, bf1] + + // If it's a method receiver, add method or field restriction + addBitfieldFlag[bf1, filterState.include, MethodOrField] or + // if it's not a receiver, filter to non-methods (could be overridden) + addBitfieldFlagNeg[bf1, filterState.include, Method] or + // Maybe methods are not being included but it's not a receiver, so no change. + bitfieldEqual[bf1, filterState.include] + } + // Exclude filter doesn't change here + initialState.exclude = filterState.exclude + } +} + +pred oldUpdate[toSet: Bitfield + NotSet, setTo: FilterState] { + toSet' in Bitfield and bitfieldIntersection[toSet, setTo.include, toSet'] +} + +pred newUpdate[toSet: Bitfield + NotSet, setTo: FilterState] { + (not bitfieldIncomparable[toSet, setTo.include] and oldUpdate[toSet, setTo]) or + (bitfieldIncomparable[toSet, setTo.include] and toSet = toSet') +} + +pred updateOrSet[toSet: Bitfield + NotSet, setTo: FilterState] { + (toSet in NotSet and toSet' = setTo.include) or + (toSet not in NotSet and oldUpdate[toSet, setTo]) +} + +pred excludeBitfield[found: Bitfield + NotSet, exclude: Bitfield] { + (found != NotSet and bitfieldEqual[found, exclude]) or + (found = NotSet and bitfieldEmpty[exclude]) +} + +fact init { + all searchState: SearchState | searchState.found = NotSet +} + +fact step { + always { + // Model that a new doLookupInScope could've occurred, with any combination of flags. + all searchState: SearchState { + some fs: FilterState { + // This is a possible combination of lookup flags + configureState[fs] + + // If a search has been performed before, take the intersection; otherwise, + // just insert the current filter flags. + updateOrSet[searchState.found, fs] + } + } + } +} + +example: run { + all searchState: SearchState { + // a way that subsequent results of searching it will miss things. + eventually some props: Props, fs: FilterState, fsBroken: FilterState, exclude: Bitfield { + // Some search (fs) will cause a transition / modification of the search state... + configureState[fs] + updateOrSet[searchState.found, fs] + // Such that a later, valid search... (fsBroken) + configureState[fsBroken] + excludeBitfield[searchState.found', exclude] + + // Will allow for a set of properties... + // ... that are left out of the original search... + not bitfieldMatchesProperties[searchState.found, props] + // ... and out of the current search + not (bitfieldMatchesProperties[fs.include, props] and not bitfieldMatchesProperties[searchState.found, props]) + // But would be matched by the broken search... + bitfieldMatchesProperties[fsBroken.include, props] + // ... to not be matched by a search with the new state: + not (bitfieldMatchesProperties[fsBroken.include, props] and not bitfieldMatchesProperties[exclude, props]) + } + } +} diff --git a/content/blog/dyno_alloy.md b/content/blog/dyno_alloy.md new file mode 100644 index 0000000..7f907f8 --- /dev/null +++ b/content/blog/dyno_alloy.md @@ -0,0 +1,277 @@ +--- +title: "Proving My Compiler Code Incorrect With Alloy" +date: 2023-05-02T22:48:52-07:00 +tags: ["Compilers", "Alloy"] +draft: true +--- + +{{< todo >}} +Intro section and disclaimer +{{< /todo >}} + +I work as a compiler developer on the [Chapel](https://chapel-lang.org) team. +One of the things that a language like Chapel has to do is called _resolution_, +which is the process of figuring out what each identifier, like `x`, refers to, +and what its type is. Even the first part of that is pretty complicated, what +with public and private variables, methods (which can be decalred outside +of their receiver type in Chapel), and more... + +Scope resolution in Chapel is further complicated by the fact that the same +scope might need to be searched multiple times, in different contexts. Let +me start with a few examples to illustrate what I mean. Here's the first +program: + +```Chapel {linenos=true} +module M { + class C {} + + // A regular procedure (not a method) + proc foo() {} + + // A method on C. + proc C.foo() {} + + // Another method on C. + proc C.doSomething() { + foo(); + } +} +``` + +If you don't know Chapel (and you probably don't!) this program already merits a fair +bit of explanation. A _module_ in Chapel (declared via a `module` keyword) +is just a collection of definitions. Such definitions could include variables, +methods, classes and more. Putting them in a module helps group them. + +{{< todo >}} +Write the rest of this explanation. +{{< /todo >}} + +The interesting part of the snippet is the body of the `doSomething` method. +It has a call to `foo`: but which `foo` is it referring to? There are two: +the regular procedure (non-method) `foo`, declared on line 5, and the +method `C.foo` declared on line 8. In Chapel, the rules dictate that when +such a situation arises, and a fitting method is found, the method is +preferred to the non-method. In the rewritten version of the Chapel compiler, +titled Dyno, this disambiguation is achieved by first searching the scopes +visible from the class `C` for methods only. In this particular example, +the two scopes searched will be: + +1. The inside of class `C`. The class itself doesn't define any methods, so + nothing is found. +2. The module in which `C` is defined (`M` in this case). This module does + have a method, the one on line 8, so that one is returned. + +Only if methods are not found are non-methods considered. In this situation, +the search order will be as follows: + +1. The inside of `C.doSomething` will be searched. `doSomething` doesn't declare + anything, so the search will come up empty. +2. The module in which `C.doSomething` is defined (`M` again) will be searched. This time, + both methods and non-methods will be considered. Since we're considering + a hypothetical situation in which the method `C.foo` isn't there (otherwise + it would've been found earlier), the only thing that will be found will + be the non-method `foo`. + +Notice that we've already had to search the module `M` twice, looking for +different things each time. First, we were looking for only method, but +later, we were looking for anything. However, this isn't as complicated as +things can get. The simplifying aspect of this program is that both `doSomething` +and `C` are defined inside the class `C`, and therefore have access to its +private methods and procedures. If we extracted `C.doSomething` into its +own separate module, the program would look like this. + +```Chapel {linenos=true} +module M1 { + class C {} + + // A regular procedure (not a method) + proc foo() {} + + // A method on C. + proc C.foo() {} +} +module M2 { + use super.M1; + + // Another method on C. + proc C.doSomething() { + foo(); + } +} +``` + +Since `doSomething` is now in another module, it can't just access the `foo`s from +`M1` willy-nilly. There are a few ways to get the things that were declared in another module out +and make use of them. I opted for a `use` statement, which, in its simplest form, +just brings all the declarations inside the `use`d module into the current scope. Thus, +the `use` statement on line 11 would bring all things declared in `M1` into +the scope inside `M2`. There's a catch, though: since `M2` is not declared +inside `M1`, a `use` statement will not be able to bring in _private_ symbols +from `M1` (they're private for a reason!). So, this time, when searching the scope +for `M1`, we will have to search only for public symbols. That's another, +different way of searching `M1`. So far, we've seen three: + +* Search `M1` for any symbol. +* Search `M1` for methods only. +* Search `M1` for public symbols only. + +In Dyno, there are even more different ways of searching a single scope, and +some of them are mixes of others (one might consider, for instance, searching +for only public methods). To represent the various search configurations, +the Dyno team came up with using a bitset of _flags_, each of which indicated +a necessary condition for a symbol to be returned. A bitset with flags set +for two properties (like "public" and "method") requires that both such +properties be found on each symbol that's returned from a scope. This led to +C++ code along the lines of: + +```C++ +auto allPublicSymbols = Flags::PUBLIC; +auto allPublicMethods = Flags::PUBLIC | Flags::METHOD; +``` + +It also turned out convenient to add negative versions of each flag +(`NOT_PUBLIC` for private symbols, `NOT_METHOD` for regular old procedures +and other definitions, and so on. So, some other possible flag combinations +include: + +```C++ +auto allNonMethods = Flags::NOT_METHOD; +auto privateMethods = Flags::NOT_PUBLIC | Flags::METHOD; +``` + +Given these flags, there are some situations in which checking a scope a +second time is redundant, in that it is guaranteed to find no additional +symbols. For instance, if you search a scope for all public symbols, and +then subsequently search for all public methods, you will only find +duplicates -- after all, all public methods are public symbols. Most +generally, this occurs when a second search has all the flags from a +previous search, and maybe more. In math lingo, if the set of flags checked +the first time is a subset of the set of flags checked the second time, +it's guaranteed not to find anything new. + +In Dyno, we like to avoid additional work when we can. To do so, we track +which scopes have already been searched, and avoid searching them again. +Since what comes up from a search depends on the flags, we store the flags +alongside the scopes we've checked. __If we find that the previously-checked +bitset is a subset of the current biset, we just skip the search__. + +But then, what if it _isn't_ a subset? Another concern here is avoiding +duplicate results (it's easier to check for duplicate definitions if you +know a symbol is only returned from a search once). So, another feature of +Dyno's scope search is an additional bitset of what to _exclude_, which we +set to be the previous search's filter. So if the first search looked for +symbols matching description \\(A\\), and the second search is supposed to +look for symbols matching description \\(B\\), __then really we do a search +for \\(A \\land \\lnot B\\) (that is, \\(A\\) and not \\(B\\))__. + +{{< dialog >}} +{{< message "question" "reader" >}} +Hold on, why do you need a whole another bitset? There are already negated +versions of each flag available. Can't you just add those to the filter? +{{< /message >}} +{{< message "answer" "Daniel" >}} +Good question. The difference is a little bit tricky. If we just negated +each flag, we'd turn an expression like \(A \land B\) into \(\lnot A \land +\lnot B\). However, according to De Morgan's laws, the proper negation of +\(A \land B\) is \(\lnot A \lor \lnot B\) (notice the use of "or" instead +of "and"). On the other hand, using an "exclude" bitset negates the whole +conjunction, rather than the individual flags, and so gives us the result +we need. +{{< /message >}} +{{< /dialog >}} + +One last thing: what happens if there were two previous searches? What we +need is to to somehow combine the two filters into one. Taking a cue from +a previous example, in which "public" was followed by "public methods", we +can observe that since the second search has additional flags, it's more +restrictive, and thus guaranteed to not find anything. __So we try to create +the least restrictive bitset possible, by taking an intersection of the +flags used.__ + +Actually, that last point is not quite correct in every possible case +(taking the intersection is not always the right thing to do). +However, running the code through our test suite, we did not notice any +cases in which it misbehaved. So, noting the potential issue in a comment, +we moved on to other things. + +That is, until I decided that it was time to add another possible flag to +the bitset. At that point, sitting and trying to reason about the +possible cases, I realized that it would be much nicer to describe this +mathematically, and have a model checker generate outlandish scenarios for +me. Having at some point seen [Hillel Wayne's +post](https://www.hillelwayne.com/post/alloy6/) about the release of +[Alloy 6](https://alloytools.org/), I thought I'd give it a go. I'd never +touched alloy before this, so be warned: this is what I came up with on my +own attempt. + +### Modeling Flags and Bitsets in Alloy +Flags are represented on the C++ side as an `enum` (with custom indexing so +as to make each flag be exactly one bit). I checked, and it looked like +Alloy had an `enum` feature, too! I started off by making an enum of the +flags I wanted to play with. + +{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 1 1 >}} + +We haven't seen the `MethodOrField` flag, but it's an important one. It +turns out that it's much more common to look for anything that could be +part of a class, rather than just its methods. This flag is itself an "or" +of two properties (something being a method and something being a class +field). Note that this is not the same as having two flags, `Method` and +`Field`, and always including them together (because that would be an +"and", not an "or"). + +Notice also that the list of flags doesn't include the negative versions. +Since the negative versions are one-for-one with the positive ones, I +instead chose to represent bitsets as simply two sets: one set of +"positive" flags, in which the presence of e.g. `Method` indicates that the +`METHOD` flag was set, and one set of "negative" flags, in which the +presence of `Method` indicates that `NOT_METHOD` was set. This way, I'm +guaranteed that there's a positive and negative version of each flag, +automatically. Here's how I wrote that in Alloy. + +{{< codelines "Alloy" "dyno-alloy/DynoAlloy.als" 6 9 >}} + +{{< todo >}} +The rest of the article +{{< /todo >}} + +### Scratch Work +{{< todo >}} +This section is temporary +{{< /todo >}} + +a small-ish program to illustrate what I mean. + +```Chapel +module M { + +} +``` + + +```Chapel {linenos=true} +module M1 { + public use super.M2; +} +module M2 { + private var x = 1; + + module M3 { + public use super; + } +} + +use M1; +use M2.M3; +writeln(x) +``` + + + Moreover, a `public use` makes these definitions part +of the module that does the `use` -- that is, `M1` would now contain the +definitions from `M2`. However, since `M1` is not defined inside of `M2`, it +isn't able to access its private variables (like `x` on line 5), so this +particular use statement leaves `M1` just containing (a reference to) `M3`. + +The `public use` on line