From 562eea4a21a275f599726a35960c68cc014a4571 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Wed, 10 Aug 2022 19:22:43 -0700 Subject: [PATCH] Add draft of compiler article --- code/linear-multistep/lmm.chpl | 61 ++++++ content/blog/linear_multistep.md | 319 +++++++++++++++++++++++++++++++ 2 files changed, 380 insertions(+) create mode 100644 code/linear-multistep/lmm.chpl create mode 100644 content/blog/linear_multistep.md diff --git a/code/linear-multistep/lmm.chpl b/code/linear-multistep/lmm.chpl new file mode 100644 index 0000000..e040c0b --- /dev/null +++ b/code/linear-multistep/lmm.chpl @@ -0,0 +1,61 @@ +module LinearMultiStep { + record empty {} + record cons { + param weight; + type tail; + } + + proc initial(type x : empty) param return 0; + proc initial(type x : cons(?w, ?t)) param return 1 + initial(t); + proc cff(param x : int, type ct : cons(?w, ?t)) param { + if x == 1 { + return w; + } else { + return cff(x-1, t); + } + } + + proc runMethod(type method, step : real, count : int, start : real, + n : real ... initial(method)): real { + param coeffCount = initial(method); + // Repeat the methods as many times as requested + for i in 1..count { + // We're computing by adding h*b_j*f(...) to y_n. + // Set total to y_n. + var total = n(coeffCount - 1); + for param j in 1..coeffCount do + // For each coefficient b_j given by cff(j, method) + // increment the total by h*bj*f(...) + total += step * cff(j, method) * + f(start + step*(i-1+coeffCount-j), n(coeffCount-j)); + // Shift each y_i over by one, and set y_{n+s} to the + // newly computed total. + for j in 0..< coeffCount - 1 do + n(j) = n(j+1); + n(coeffCount - 1) = total; + } + // return final y_{n+s} + return n(coeffCount - 1); + } +} + +proc f(t: real, y: real) return y; + +use LinearMultiStep; + +type euler = cons(1.0, empty); +type adamsBashforth = cons(3.0/2.0, cons(-0.5, empty)); +type someThirdMethod = cons(23.0/12.0, cons(-16.0/12.0, cons(5.0/12.0, empty))); + +// prints 5.0625 (correct) +writeln(runMethod(euler, step=0.5, count=4, start=0, 1)); + +// For Adams-Bashforth, pick second initial point from Euler's method +// returns 6.0234 (correct) +writeln(runMethod(adamsBashforth, step=0.5, count=3, start=0, 1, + runMethod(euler, step=0.5, count=1, start=0, 1))); + +writeln(runMethod(someThirdMethod, step=0.5, count=2, start=0, + 1, + runMethod(euler, step=0.5, count=1, start=0, 1), + runMethod(adamsBashforth, step=0.5, count=1, start=0, 1, runMethod(euler, step=0.5, count=1, start=0, 1)))); diff --git a/content/blog/linear_multistep.md b/content/blog/linear_multistep.md new file mode 100644 index 0000000..90fb292 --- /dev/null +++ b/content/blog/linear_multistep.md @@ -0,0 +1,319 @@ +--- +title: "Generic Linear Multistep Method Evaluator using Chapel" +date: 2022-07-31T11:40:05-07:00 +draft: true +tags: ["Chapel", "Mathematics"] +--- + +The Chapel programming language has a very pleasant mix of features for +general-purpose programming. Having helped review the recent change +adding [variadic arguments](https://chapel-lang.org/docs/1.14/primers/primers/varargs.html) +to Dyno, the work-in-progress "new compiler" for the language, I've been +excited in giving them a test drive (in the "old" compiler, for the time being). +Chapel can naturally express patterns like "as many variadic arguments as the +first argument specifies". In combination with this, Chapel lets you define +functions that perform computations on _types_ at compile time. This is a lot +of power -- Haskell programmers have done much more, with less! + +All that's left is a nail for this unusually large hammer. Ideally, our +application would a family of related algorithms that differ in +predictable ways. Then, we could use type-level computation in Chapel to +pick particular algorithms from this family, without writing much extra +code. Fortunately, I know just the "family of related algorithms": +[linear multistep methods](https://en.wikipedia.org/wiki/Linear_multistep_method) +for computing approximate solutions to ordinary differential equations! +Aside from being a collection of related-but-different algorithms, like +we would want, LMMs are also right up Chapel's alley as a programming language +for high performance computing. + +# Euler's Method +Suppose we have a (first-order) differential equation. Such a differential +equation has the form: +{{< latex >}} +y' = f(t, y) +{{< /latex >}} +That is, the value of the derivative of \\(y\\) (aka \\(y'\\)) at any point \\(t\\) +depends also on the value of \\(y\\) itself. You want to figure out what the +value of \\(y\\) is at any point \\(t\\). However, there is no general solution to +such an equation. In the general case, we'll have to settle for approximations. + +Here is a simple approximation. Suppose that we knew a starting point for our +\\(y\\). For instance, we could know that at \\(t=0\\), the value of \\(y\\) is +\\(y(t) = y(0) = 1\\). With this, we have both \\(t\\) and \\(y\\), just +what we need to figure out the value of \\(y'(0)\\): +{{< latex >}} +y'(0) = f(0, y(0)) = f(0,1) +{{< /latex >}} + +Okay, but how do we get \\(y(t)\\) for _any_ \\(t\\)? All we have is a point +and a derivative at that point. To get any further, we have to extrapolate. +At our point, the value of \\(y\\) is changing at a rate of \\(y'(0)\\). It's +not unreasonable to guess, then, that after some distance \\(h\\), the +value of \\(y\\) would be: +{{< latex >}} +y(0+h) \approx y(0) + hf(0,y(0)) +{{< /latex >}} +Importantly, \\(h\\) shouldn't be large, since the further away you +extrapolate, the less accorate your approximation becomes. If we need +to get further than \\(0+h\\), we can use the new point we just computed, +and extrapolate again: +{{< latex >}} +y(0+2h) \approx y(0+h) + hf(0+h,y(0+h)) +{{< /latex >}} +And so on and so forth. The last thing I'm going to do is clean up the above +equations. First, let's stop assuming that our initial \\(t=0\\); that doesn't work +in the general case. Instead, we'll use \\(t_0\\) to denote the initial value of +\\(t\\). Similarly, we can use \\(y_0\\) to denote our (known _a priori_) +value of \\(y(t_0)\\). From then, observing that we are moving in discrete +steps, estimating one point after another. We can denote the first point +we estimate as \\((t_1, y_1)\\), the second as \\((t_2, y_2)\\), and so on. +Since we're always moving our \\(t\\) by adding \\(h\\) to it, the formula for +a general \\(t_n\\) is easy enough: +{{< latex >}} +t_n = t_0 + nh +{{< /latex >}} +We can get the formula for \\(y_n\\) by looking at our aproximation steps above. +{{< latex >}} +\begin{gather*} + y_1 = y_0 + hf(t_0,y_0) \\\\ + y_2 = y_1 + hf(t_1,y_1) \\\\ + ... \\\\ + y_{n+1} = y_n + hf(t_n,y_n) +\end{gather*} +{{< /latex >}} + +What we have arrived at is [Euler's method](https://mathworld.wolfram.com/EulerForwardMethod.html). +It works pretty well, especially considering how easy it is to implement. +We could write it in Chapel like so: + +```Chapel +proc runEulerMethod(step: real, count: int, t0: real, y0: real) { + var y = y0; + var t = t0; + for i in 1..count { + y += step*f(t,y); + t += step; + } + return y; +} +``` + +Just eight lines of code! + +# Linear Multistep Methods +In Euler's method, we use only the derivative at the _current_ point to drive +our subsequent approximations. Why does that have to be the case, though? +After a few steps, we've computed a whole history of points, each of which +gives us information about \\(y\\)'s derivatives. Of course, the more recent +derivative are more useful to us (they're closer, so using them would reduce +the error we get from extrapolating). Nevertheless, we can still try to +mix in a little bit of "older" information into our estimate of the derivative, +and try to get a better result that way. Take, for example, the two-step +Adams-Bashforth method: + +{{< latex >}} +y_{n+2} = y_{n+1} + h\left[\frac{3}{2}f(t_{n+1}, y_{n+1})-\frac{1}{2}f(t_n,y_n)\right] +{{< /latex >}} + +In this method, the _two_ last known derivatives are used for computing the next +point. More generally, an \\(s\\)-step linear multistep method uses the last +\\(s\\) points to compute the next, and always takes a linear combination +of the derivatives and the $y_n$ values themselves. The general form is: + +{{< latex >}} +\sum_{j=0}^s a_jy_{n+j} = h\sum_{j=0}^s b_jf(t_{n+j}, y_{n+j}) +{{< /latex >}} + +Let's focus for a second only on methods where the very last \\(y\\) value +is used to compute the next. Furthermore, we won't allow our specifications +to depend on the slope at the point we're trying to compute (i.e., when computing +\\(y_{n+s}\\), we do not look at \\(f(t_{n+s}, y_{n+s})\\). The methods +we will consider will consequently have the following form: + +{{< latex >}} +y_{n+s} = y_{n+s-1} + h\sum_{j=0}^{s-1} b_jf(t_{n+j}, y_{n+j}) +{{< /latex >}} + +Importantly, notice that a method of the above form is pretty +much uniquely determined by its list of coefficients. The +number of steps \\(s\\) is equal to the number of coefficients +\\(b_j\\), and rest of the equation simply "zips together" +a coefficient with a fixed expression representing the derivative +at a particular point in the past. + +What we would like to do now is to be able to represent this +information entirely at compile-time. That is, we would like +to be able to fully describe a method using some Chapel code, +feed it to a function that implements the method, and pay +little-to-nothing in terms of runtime overhead. By encoding +information about a numerical method at compile-time, +we can also ensure that the function is correctly invoked. For +instance, Euler's method needs a single initial coordinate to +get started, \\(y_{0}\\), but the two-steps Adams-Bashforth method +above requires both \\(y_{0}\\) and another coordinate \\(y_{1}\\). +If we were to describe methods using a runtime construct, such +as perhaps a list, it would be entirely possible for a user to +feed in a list with not enough points, or too many. We can be +smarter than that. + +Okay, but how do we represent a list at compile-time? Chapel +does have `param`s, which are integer, boolean, or string +values that can occur at the type-level and are known when +a program is being compiled. However, `param`s only support +primitive values; a list is far from a primitive value! + +What is necessary is a trick used quite frequently in +Haskell land, a place where I suppose I am now a permanent +resident if not already a citizen. We can create two +completely unrelated records, just to tell the compiler +about two new types: + +```Chapel +record empty {} +record cons { + param head: real; + type tail; +} +``` + +The new `cons` type is generic. It needs two +things: a `real` for the `head` field, and +a `type` for the `tail` field. Coming up +with a `real` is not that hard; we can, for instance, +write: + +```Chapel +type t1 = cons(1.0, ?); +``` + +Alright, but what goes in the place of the question mark? +The second parameter of `cons` is a type, and a fully-instantiated +`cons` is itself a type. Thus, perhaps we could try nesting +two `cons`s: + +```Chapel +type t2 = cons(1.0, cons(?, ?)); +``` + +Once again, the first parameter to `cons` is a real number, and +coming up with one of those is easy. + +```Chapel +type t3 = cons(1.0, cons(2.0, ?)); +``` + +I hope it's obvious enough that we can keep +repeating this process to build up a longer +and longer chain of numbers. However, we need to +do something different if we are to get out of this +`cons`-loop. That's where `empty` comes in. + +``` +type t4 = cons(1.0, cons(2.0, empty)); +``` + +Now, `t4` is a fully-concrete type. Note that we +haven't yet -- nor will we -- actually allocate +any records of type `cons` or `empty`; the purpose +of these two is solely to live at the type level. +Specifically, if we encode our special types +of linear multistep methods as lists of coefficients, +we can embed these lists of coefficients into Chapel +types by using `cons` and `empty`. Check out the +table below. + +|Method | List | Chapel Type Expression | +|-------|----------------------|-------------------| +| Degenerate case | \\(\\varnothing\\) | `empty` | +| Euler's method | \\(1\\) | `cons(1.0, empty)` | +| Two-step Adams-Bashforth | \\(\frac{3}{2}, -\\frac{1}{2}\\) | `cons(3.0/2.0, cons(-0.5, empty))` | + +As I mentioned earlier, the number of steps \\(s\\) a method takes +(which is also the number of initial data points required to +get the method started) is the number of coefficients. Thus, +we need a way to take a type like `t4` and count how many coefficients +it contains. Fortunately, Chapel allows us to write functions on +types, as well as to pattern match on types' arguments. An +important caveat is that matching occurs when a function is called, +so we can't somehow write `myType == cons(?h, ?t)`. Instead, we need +to write a function that _definitely_ accepts a type `cons` with +some arguments: + +{{< codelines "chapel" "linear-multistep/lmm.chpl" 9 9 >}} + +The above function works roughly as follows: +```Chapel +// initial call +initial(cons(1.0, cons(2.0, empty))) +// ?w gets matched with 1.0 +// ?t gets matched with cons(2.0, empty) +// the function return is evaluated +1 + initial(t) +// Since t was matched with cons(2.0, empty), this is the same as +1 + initial(cons(2.0, empty)) +// This is another call to initial. +// ?w gets matched with 2.0 +// ?t gets matched with empty +// evaluation proceeds like last time +1 + 1 + initial(empty) +``` +Here, we could we get stuck; what's `initial(empty)`? We need +another overload for `initial` that handles this case. + +{{< codelines "chapel" "linear-multistep/lmm.chpl" 8 8 >}} + +With this, the final expression will effectively become `1 + 1 + 0 = 2`, +which is indeed the length of the given list. + +Another thing we're going to need is the ability to get a coefficient at a particular +location in the list. Such a function is pretty simple, though it +does also use pattern matching via `?w` and `?t`: + +{{< codelines "chapel" "linear-multistep/lmm.chpl" 10 16 >}} + +Alright, now's the time to get working on the actual implementation +of linear multistep methods. A function implementing them will +need a few parameters: + +* A `type` parameter representing the linear multi-step method + being implemented. This method will be encoded using our + `cons` lists. +* A `real` representing the step size \\(h\\). +* An `int` representing how many iterations we should run. +* A `real` representing the starting x-coordinate of the method + \\(t_0\\). We don't need to feed in a whole list \\(t_0, \\ldots, t_{s-1}\\) + since each \\(t_{i}\\) is within a fixed distance \\(h\\) of + its neighbors. +* As many `real` coordinates as required by the method, + \\(y_0, \\ldots, y_{s-1}\\). + +Here's the corresponding Chapel code: + +{{< codelines "chapel" "linear-multistep/lmm.chpl" 18 19 >}} + +That last little bit, `n : real ... initial(method)`, is an +instance of Chapel's variable arguments functionality. Specifically, +this says that `runMethod` takes multiple `real` arguments, +the exact number of which is given by the expression `initial(method)`. +We've just seen that `initial` computes the number of coefficients +in a list, so the function will expect exactly as many \\(y_i\\) +y-coordinates as there are coefficients. + +The actual code is pretty boring. +{{< codelines "chapel" "linear-multistep/lmm.chpl" 18 38 >}} + +That completes our implementation. All that's left is to actually +use our API! First, we need to encode the various methods: + +{{< codelines "chapel" "linear-multistep/lmm.chpl" 46 47 >}} + +Finally, let's try this on a function. Wikipedia runs +all the methods with the differential equation \\(y' = y\\), +for which the corresponding function `f` is: + +{{< codelines "chapel" "linear-multistep/lmm.chpl" 42 42 >}} + +Then, we can run the methods as follows (with initial coordinate +\\((0, 1)\\), as Wikipedia does). + +{{< codelines "chapel" "linear-multistep/lmm.chpl" 50 56 >}}