Address missing problem and make some other improvements in CS325HW2

Extract common parsing code
2020-01-01 11:12:44 -08:00 · 2019-12-31 21:59:13 -08:00
5 changed files with 187 additions and 139 deletions
--- a/code/cs325-langs/src/CommonParsing.hs
+++ b/code/cs325-langs/src/CommonParsing.hs
@@ -0,0 +1,66 @@
 module CommonParsing where
 import Data.Char
 import Data.Functor
 import Text.Parsec
 import Text.Parsec.Char
 import Text.Parsec.Combinator
 type Parser a b = Parsec String a b
 kw :: String -> Parser a ()
 kw s = string s $> ()
 kwIf :: Parser a ()
 kwIf = kw "if"
 kwThen :: Parser a ()
 kwThen = kw "then"
 kwElse :: Parser a ()
 kwElse = kw "else"
 kwState :: Parser a ()
 kwState = kw "state"
 kwEffect :: Parser a ()
 kwEffect = kw "effect"
 kwCombine :: Parser a ()
 kwCombine = kw "combine"
 kwRand :: Parser a ()
 kwRand = kw "rand"
 op :: String -> op -> Parser a op
 op s o = string s $> o
 int :: Parser a Int
 int = read <$> (many1 digit <* spaces)
 var :: [String] -> Parser a String
 var reserved =
    do
        c <- satisfy $ \c -> isLetter c || c == '_'
        cs <- many (satisfy isLetter <|> digit) <* spaces
        let name = c:cs
        if name `elem` reserved
        then fail "Can't use reserved keyword as identifier"
        else return name
 surround :: Char -> Char -> Parser a b -> Parser a b
 surround c1 c2 pe =
    do
        char c1 >> spaces
        e <- pe
        spaces >> char c2 >> spaces
        return e
 level :: (o -> e -> e -> e) -> Parser a o -> Parser a e -> Parser a e
 level c po pe =
    do
        e <- pe <* spaces
        ops <- many $ try $ (flip . c <$> (po <* spaces) <*> pe) <* spaces
        return $ foldl (flip ($)) e ops
 precedence :: (o -> e -> e -> e) -> Parser a e -> [ Parser a o ] -> Parser a e
 precedence = foldl . flip . level
--- a/code/cs325-langs/src/LanguageOne.hs
+++ b/code/cs325-langs/src/LanguageOne.hs
@@ -1,5 +1,6 @@
 module LanguageOne where
 import qualified PythonAst as Py
 import qualified CommonParsing as P
 import Data.Bifunctor
 import Data.Char
 import Data.Functor
@@ -54,31 +55,8 @@ data Prog = Prog [Function]
 {- Parser -}
 type Parser = Parsec String (Maybe Int)
 parseInt :: Parser Int
 parseInt = read <$> (many1 digit <* spaces)
 parseVar :: Parser String
-parseVar =
+parseVar = P.var ["if", "then", "else", "var"]
    do
        c <- satisfy (\c -> (isLetter c && isLower c) || c == '_')
        cs <- many (satisfy isLetter <|> digit)
        spaces
        let var = c:cs
        if var `elem` ["if", "then", "else", "rand"]
        then fail "reserved"
        else return var
 parseKwIf :: Parser ()
 parseKwIf = string "if" $> ()
 parseKwThen :: Parser ()
 parseKwThen = string "then" $> ()
 parseKwElse :: Parser ()
 parseKwElse = string "else" $> ()
 parseKwRand :: Parser Expr
 parseKwRand = string "rand" $> Random
 parseThis :: Parser Expr
 parseThis =
@@ -127,11 +105,11 @@ parseSelector =
 parseIfElse :: Parser Expr
 parseIfElse =
    do
-        parseKwIf >> spaces
+        P.kwIf >> spaces
        ec <- parseExpr
-        spaces >> parseKwThen >> spaces
+        spaces >> P.kwThen >> spaces
        et <- parseExpr
-        spaces >> parseKwElse >> spaces
+        spaces >> P.kwElse >> spaces
        ee <- parseExpr
        spaces
        return $ IfElse ec et ee
@@ -162,7 +140,7 @@ parseParenthesized =
 parseBasicExpr :: Parser Expr
 parseBasicExpr = choice
-    [ IntLiteral <$> parseInt
+    [ IntLiteral <$> P.int
    , parseThis
    , parseList
    , parseSplit
@@ -170,7 +148,7 @@ parseBasicExpr = choice
    , parseParameter
    , parseParenthesized
    , Var <$> try parseVar
-    , parseKwRand
+    , P.kwRand $> Random
    , parseIfElse
    ] 
@@ -203,33 +181,16 @@ parsePostfixedExpr =
        ps <- many parsePostfix
        return $ foldl (flip ($)) eb ps
 parseOp :: String -> Op -> Parser Op
 parseOp s o = try (string s) >> return o
 parseLevel :: Parser Expr -> Parser Op -> Parser Expr
 parseLevel pe po =
    do
        start <- pe
        spaces
        ops <- many $ try $ do
            op <- po
            spaces
            val <- pe
            spaces
            return (op, val)
        spaces
        return $ foldl (\l (o, r) -> BinOp o l r) start ops
 parseExpr :: Parser Expr
-parseExpr = foldl parseLevel parsePostfixedExpr
+parseExpr = P.precedence BinOp parsePostfixedExpr
-    [ parseOp "*" Multiply, parseOp "/" Divide
+    [ P.op "*" Multiply, P.op "/" Divide
-    , parseOp "+" Add, parseOp "-" Subtract
+    , P.op "+" Add, P.op "-" Subtract
-    , parseOp "<<" Insert
+    , P.op "<<" Insert
-    , parseOp "++" Concat
+    , P.op "++" Concat
-    , parseOp "<=" LessThanEq <|> parseOp ">=" GreaterThanEq <|>
+    , try (P.op "<=" LessThanEq) <|> try (P.op ">=" GreaterThanEq) <|>
-        parseOp "<" LessThan <|> parseOp ">" GreaterThan <|>
+        P.op "<" LessThan <|> P.op ">" GreaterThan <|>
-        parseOp "==" Equal <|> parseOp "!=" NotEqual
+        P.op "==" Equal <|> P.op "!=" NotEqual
-    , parseOp "&&" And <|> parseOp "||" Or
+    , P.op "&&" And <|> P.op "||" Or
    ]
 parseFunction :: Parser Function
--- a/code/cs325-langs/src/LanguageTwo.hs
+++ b/code/cs325-langs/src/LanguageTwo.hs
@@ -1,5 +1,6 @@
 module LanguageTwo where
 import qualified PythonAst as Py
 import qualified CommonParsing as P
 import Data.Char
 import Data.Functor
 import Text.Parsec
@@ -33,95 +34,50 @@ data Prog = Prog Expr [Stmt] [Stmt]
 {- Parser -}
 type Parser = Parsec String ()
 parseKw :: String -> Parser ()
 parseKw s = string s $> ()
 parseKwIf :: Parser ()
 parseKwIf = parseKw "if"
 parseKwElse :: Parser ()
 parseKwElse = parseKw "else"
 parseKwState :: Parser ()
 parseKwState = parseKw "state"
 parseKwEffect :: Parser ()
 parseKwEffect = parseKw "effect"
 parseKwCombine :: Parser ()
 parseKwCombine = parseKw "combine"
 parseOp :: String -> Op -> Parser Op
 parseOp s o = string s $> o
 parseInt :: Parser Int
 parseInt = read <$> (many1 digit <* spaces)
 parseVar :: Parser String
-parseVar =
+parseVar = P.var [ "if", "else", "state", "effect", "combine" ]
    do
        c <- satisfy $ \c -> isLetter c || c == '_'
        cs <- many (satisfy isLetter <|> digit) <* spaces
        let name = c:cs
        if name `elem` ["if", "else", "state", "effect", "combine"]
        then fail "Can't use reserved keyword as identifier"
        else return name
 parseSurrounded :: Char -> Char -> Parser a -> Parser a
 parseSurrounded c1 c2 pe =
    do
        char c1 >> spaces
        e <- pe
        spaces >> char c2 >> spaces
        return e
 parseLength :: Parser Expr
-parseLength = Length <$> parseSurrounded '|' '|' parseExpr
+parseLength = Length <$> P.surround '|' '|' parseExpr
 parseParenthesized :: Parser Expr
-parseParenthesized = parseSurrounded '(' ')' parseExpr
+parseParenthesized = P.surround '(' ')' parseExpr
 parseBasic :: Parser Expr
 parseBasic = choice
-    [ IntLiteral <$> parseInt
+    [ IntLiteral <$> P.int
    , Var <$> parseVar
    , parseLength
    , parseParenthesized
    ]
 parseLevel :: Parser Op -> Parser Expr -> Parser Expr
 parseLevel po pe =
    do
        e <- pe <* spaces
        ops <- many ((flip . BinOp <$> (po <* spaces) <*> pe) <* spaces)
        return $ foldl (flip ($)) e ops
 parseExpr :: Parser Expr
-parseExpr = foldl (flip parseLevel) parseBasic
+parseExpr = P.precedence BinOp parseBasic
-    [ parseOp "*" Multiply <|> parseOp "/" Divide
+    [ P.op "*" Multiply <|> P.op "/" Divide
-    , parseOp "+" Add <|> parseOp "-" Subtract
+    , P.op "+" Add <|> P.op "-" Subtract
-    , parseOp "==" Equal <|> parseOp "!=" NotEqual
+    , P.op "==" Equal <|> P.op "!=" NotEqual
-    , parseOp "&&" And
+    , P.op "&&" And
-    , try $ parseOp "||" Or
+    , try $ P.op "||" Or
    ]
 parseIf :: Parser Stmt
 parseIf = do
-    parseKwIf >> spaces
+    P.kwIf >> spaces
    c <- parseParenthesized
    t <- parseStmt <* spaces
-    e <- (Just <$> (parseKwElse >> spaces *> parseStmt)) <|> return Nothing
+    e <- (Just <$> (P.kwElse >> spaces *> parseStmt)) <|> return Nothing
    return $ IfElse c t e
 parseBlockStmts :: Parser [Stmt]
-parseBlockStmts = parseSurrounded '{' '}' (many parseStmt)
+parseBlockStmts = P.surround '{' '}' (many parseStmt)
 parseBlock :: Parser Stmt
 parseBlock = Block <$> parseBlockStmts
 parseAssign :: Parser Stmt
 parseAssign = Assign <$>
-    (parseVar <* spaces <* char '=' <* spaces) <*>
+    (parseVar <* char '=' <* spaces) <*>
    parseExpr <* (char ';' >> spaces)
 parseStmt :: Parser Stmt
@@ -133,9 +89,9 @@ parseStmt = choice
 parseProgram :: Parser Prog
 parseProgram = do
-    state <- parseKwState >> spaces *> parseExpr <* char ';' <* spaces
+    state <- P.kwState >> spaces *> parseExpr <* char ';' <* spaces
-    effect <- parseKwEffect >> spaces *> parseBlockStmts <* spaces
+    effect <- P.kwEffect >> spaces *> parseBlockStmts <* spaces
-    combined <- parseKwCombine >> spaces *> parseBlockStmts <* spaces
+    combined <- P.kwCombine >> spaces *> parseBlockStmts <* spaces
    return $ Prog state effect combined
 parse :: String -> String -> Either ParseError Prog
--- a/content/blog/00_cs325_languages_hw1.md
+++ b/content/blog/00_cs325_languages_hw1.md
@@ -269,18 +269,18 @@ by prepending the word "temp" to that number. We start
 with `temp0`, then `temp1`, and so on. To keep a counter,
 we can use a state monad:
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 269 269 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 230 230 >}}
 Don't worry about the `Map.Map String [String]`, we'll get to that in a bit.
 For now, all we have to worry about is the second element of the tuple,
 the integer counting how many temporary variables we've used. We can
 get the current temporary variable as follows:
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 271 274 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 232 235 >}}
 We can also get a fresh temporary variable like this:
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 276 279 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 237 240 >}}
 Now, the
 {{< sidenote "left" "code-note" "code" >}}
@@ -297,7 +297,7 @@ source code for the blog (which includes this project)
 <a href="https://dev.danilafe.com/Web-Projects/blog-static">here</a>.
 {{< /sidenote >}}
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 364 369 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 325 330 >}}
 ##### Implementing "lazy evaluation"
 Lazy evaluation in functional programs usually arises from
@@ -344,20 +344,20 @@ and also of the dependencies of each variable (the variables that need
 to be access before the variable itself). We compute such a map for
 each selector as follows: 
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 337 337 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 298 298 >}}
 We update the existing map using `Map.union`:
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 338 338 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 299 >}}
 And, after we're done generating expressions in the body of this selector,
 we clear it to its previous value `vs`:
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 341 341 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 302 302 >}}
 We generate a single selector as follows:
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 307 320 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 268 281 >}}
 This generates a function definition statement, which we will examine in
 generated Python code later on.
@@ -366,7 +366,7 @@ Solving the problem this way also introduces another gotcha: sometimes,
 a variable is produced by a function call, and other times the variable
 is just a Python variable. We write this as follows:
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 322 327 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 283 288 >}}
 ##### Special Case Insertion
 This is a silly language for a single homework assignment. I'm not
@@ -377,7 +377,7 @@ a list, it can also return the list from its base case. Thus,
 that's all we will try to figure out. The checking code is so
 short that we can include the whole snippet at once:
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 258 266 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 219 227 >}}
 `mergePossibleType`
 {{< sidenote "right" "bool-identity-note" "figures out" >}}
@@ -404,7 +404,7 @@ My Haskell linter actually suggested a pretty clever way of writing
 the whole "add a base case if this function returns a list" code.
 Check it out:
-{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 305 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 260 266 >}}
 Specifically, look at the line with `let fastReturn = ...`. It
 uses a list comprehension: we take a parameter `p` from the list of
--- a/content/blog/01_cs325_languages_hw2.md
+++ b/content/blog/01_cs325_languages_hw2.md
@@ -25,7 +25,7 @@ using a slightly-modified `mergesort`__. The trick is to maintain a counter
 of inversions in every recursive call to `mergesort`, updating
 it every time we take an element from the
 {{< sidenote "right" "right-note" "right list" >}}
-If this nomeclature is not clear to you, recall that
+If this nomenclature is not clear to you, recall that
 mergesort divides a list into two smaller lists. The
 "right list" refers to the second of the two, because
 if you visualize the original list as a rectangle, and cut
@@ -72,13 +72,19 @@ Again, let's start by visualizing what the solution will look like. How about th
 We divide the code into the same three steps that we described above. The first
 section is the initial state. Since it doesn't depend on anything, we expect
 it to be some kind of literal, like an integer. Next, we have the effect section,
-which has access to variables such as "STATE" (to access the current state)
+which has access to the variables below:
-and "LEFT" (to access the left list), or "L" to access the "name" of the left list.
+
-We use an `if`-statement to check if the origin of the element that was popped
+* `STATE`, to manipulate or check the current state.
-(held in the "SOURCE" variable) is the right list (denoted by "R"). If it is,
+* `LEFT` and `RIGHT`, to access the two lists being merged.
-we increment the counter (state) by the proper amount. In the combine step, we simply increment
+* `L` and `R`, constants that are used to compare against the `SOURCE` variable.
-the state by the counters from the left and right solutions, stored in "LSTATE" and "RSTATE".
+* `SOURCE`, to denote which list a number came from. 
-That's it!
+* `LSTATE` and `RSTATE`, to denote the final states from the two subproblems.
 We use an `if`-statement to check if the element that was popped came
 from the right list (by checking `SOURCE == R`). If it is, we increment the counter
 (state) by the proper amount. In the combine step, which has access to the
 same variables, we simply increment the state by the counters from the left
 and right solutions, stored in `LSTATE` and `RSTATE`. That's it!
 #### Implementation
 The implementation is not tricky at all. We don't need to use monads like we did last
@@ -87,14 +93,14 @@ time, and nor do we have to perform any fancy Python nested function declaration
 To keep with the Python convention of lowercase variables, we'll translate the
 uppercase "global" variables to lowercase. We'll do it like so:
-{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 211 220 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 167 176 >}}
-Note that we translated "L" and "R" to integer literals. We'll indicate the source of
+Note that we translated `L` and `R` to integer literals. We'll indicate the source of
 each element with an integer, since there's no real point to representing it with
 a string or a variable. We'll need to be aware of this when we implement the actual, generic
 mergesort code. Let's do that now:
-{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 145 205 >}}
+{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 101 161 >}}
 This is probably the ugliest part of this assignment: we handwrote a Python
 AST in Haskell that implements mergesort with our augmentations. Note that
@@ -151,3 +157,62 @@ we have to do is not specify any additional behavior. Cool, huh?
 That's the end of this post. If you liked this one (and the previous one!),
 keep an eye out for more!
 ### Appendix (Missing Homework Question)
 I should not view homework assignments on a small-screen device. There __was__ a third problem
 on homework 2:
 {{< codelines "text" "cs325-langs/hws/hw2.txt" 46 65 >}}
 This is not a mergesort variant, and adding support for it into our second language
 will prevent us from making it the neat specialized
 {{< sidenote "right" "dsl-note" "DSL" >}}
 DSL is a shortened form of "domain specific language", which was briefly
 described in another sidenote while solving homework 1.
 {{< /sidenote >}} that was just saw. We'll do something else, instead:
 we'll use the language we defined in homework 1 to solve this
 problem:
 ```
 empty() = [0, 0];
 longest(xs) =
    if |xs| != 0
    then _longest(longest(xs[0]), longest(xs[2]))
    else empty();
 _longest(l, r) = [max(l[0], r[0]) + 1, max(l[0]+r[0], max(l[1], r[1]))];
 ```
 {{< sidenote "right" "terrible-note" "This is quite terrible." >}}
 This is probably true with any program written in our first
 language.
 {{< /sidenote >}} In these 6 lines of code, there are two hacks
 to work around the peculiarities of the language.
 At each recursive call, we want to keep track of both the depth
 of the tree and the existing longest path. This is because
 the longest path could be found either somewhere down
 a subtree, or from combining the largest depths of
 two subtrees. To return two values from a function in Python,
 we'd use a tuple. Here, we use a list.
 Alarm bells should be going off here. There's no reason why we should
 ever return an empty list from the recursive call: at the very least, we
 want to return `[0,0]`. But placing such a list literal in a function
 will trigger the special case insertion. So, we have to hide this literal
 from the compiler. Fortunately, that's not too hard to do - the compiler
 is pretty halfhearted in its inference of types. Simply putting
 the literal behind a constant function (`empty`) does the trick.
 The program uses the subproblem depths multiple times in the
 final computation. We thus probably want to assign these values
 to names so we don't have to perform any repeated work. Since
 the only two mechanisms for
 {{< sidenote "right" "binding-note" "binding variables" >}}
 To bind a variable means to assign a value to it.
 {{< /sidenote >}} in this language are function calls
 and list selectors, we use a helper function `_longest`,
 which takes two subproblem solutions an combines them
 into a new solution. It's pretty obvious that `_longest`
 returns a list, so the compiler will try insert a base
 case. Fortunately, subproblem solutions are always
 lists of two numbers, so this doesn't affect us too much.
Author	SHA1	Message	Date
Danila Fedorin	765d497724	Address missing problem and make some other improvements in CS325HW2	2020-01-01 11:12:44 -08:00
Danila Fedorin	80410c9200	Extract common parsing code	2019-12-31 21:59:13 -08:00