From 80410c920038bc2d72755fb59663ef7cc7ddfbe7 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Tue, 31 Dec 2019 21:59:13 -0800 Subject: [PATCH] Extract common parsing code --- code/cs325-langs/src/CommonParsing.hs | 66 +++++++++++++++++++++ code/cs325-langs/src/LanguageOne.hs | 71 ++++++----------------- code/cs325-langs/src/LanguageTwo.hs | 80 ++++++-------------------- content/blog/00_cs325_languages_hw1.md | 22 +++---- content/blog/01_cs325_languages_hw2.md | 4 +- 5 files changed, 113 insertions(+), 130 deletions(-) create mode 100644 code/cs325-langs/src/CommonParsing.hs diff --git a/code/cs325-langs/src/CommonParsing.hs b/code/cs325-langs/src/CommonParsing.hs new file mode 100644 index 0000000..3e02f41 --- /dev/null +++ b/code/cs325-langs/src/CommonParsing.hs @@ -0,0 +1,66 @@ +module CommonParsing where +import Data.Char +import Data.Functor +import Text.Parsec +import Text.Parsec.Char +import Text.Parsec.Combinator + +type Parser a b = Parsec String a b + +kw :: String -> Parser a () +kw s = string s $> () + +kwIf :: Parser a () +kwIf = kw "if" + +kwThen :: Parser a () +kwThen = kw "then" + +kwElse :: Parser a () +kwElse = kw "else" + +kwState :: Parser a () +kwState = kw "state" + +kwEffect :: Parser a () +kwEffect = kw "effect" + +kwCombine :: Parser a () +kwCombine = kw "combine" + +kwRand :: Parser a () +kwRand = kw "rand" + +op :: String -> op -> Parser a op +op s o = string s $> o + +int :: Parser a Int +int = read <$> (many1 digit <* spaces) + +var :: [String] -> Parser a String +var reserved = + do + c <- satisfy $ \c -> isLetter c || c == '_' + cs <- many (satisfy isLetter <|> digit) <* spaces + let name = c:cs + if name `elem` reserved + then fail "Can't use reserved keyword as identifier" + else return name + +surround :: Char -> Char -> Parser a b -> Parser a b +surround c1 c2 pe = + do + char c1 >> spaces + e <- pe + spaces >> char c2 >> spaces + return e + +level :: (o -> e -> e -> e) -> Parser a o -> Parser a e -> Parser a e +level c po pe = + do + e <- pe <* spaces + ops <- many $ try $ (flip . c <$> (po <* spaces) <*> pe) <* spaces + return $ foldl (flip ($)) e ops + +precedence :: (o -> e -> e -> e) -> Parser a e -> [ Parser a o ] -> Parser a e +precedence = foldl . flip . level diff --git a/code/cs325-langs/src/LanguageOne.hs b/code/cs325-langs/src/LanguageOne.hs index 3cff31f..36514ee 100644 --- a/code/cs325-langs/src/LanguageOne.hs +++ b/code/cs325-langs/src/LanguageOne.hs @@ -1,5 +1,6 @@ module LanguageOne where import qualified PythonAst as Py +import qualified CommonParsing as P import Data.Bifunctor import Data.Char import Data.Functor @@ -54,31 +55,8 @@ data Prog = Prog [Function] {- Parser -} type Parser = Parsec String (Maybe Int) -parseInt :: Parser Int -parseInt = read <$> (many1 digit <* spaces) - parseVar :: Parser String -parseVar = - do - c <- satisfy (\c -> (isLetter c && isLower c) || c == '_') - cs <- many (satisfy isLetter <|> digit) - spaces - let var = c:cs - if var `elem` ["if", "then", "else", "rand"] - then fail "reserved" - else return var - -parseKwIf :: Parser () -parseKwIf = string "if" $> () - -parseKwThen :: Parser () -parseKwThen = string "then" $> () - -parseKwElse :: Parser () -parseKwElse = string "else" $> () - -parseKwRand :: Parser Expr -parseKwRand = string "rand" $> Random +parseVar = P.var ["if", "then", "else", "var"] parseThis :: Parser Expr parseThis = @@ -127,11 +105,11 @@ parseSelector = parseIfElse :: Parser Expr parseIfElse = do - parseKwIf >> spaces + P.kwIf >> spaces ec <- parseExpr - spaces >> parseKwThen >> spaces + spaces >> P.kwThen >> spaces et <- parseExpr - spaces >> parseKwElse >> spaces + spaces >> P.kwElse >> spaces ee <- parseExpr spaces return $ IfElse ec et ee @@ -162,7 +140,7 @@ parseParenthesized = parseBasicExpr :: Parser Expr parseBasicExpr = choice - [ IntLiteral <$> parseInt + [ IntLiteral <$> P.int , parseThis , parseList , parseSplit @@ -170,7 +148,7 @@ parseBasicExpr = choice , parseParameter , parseParenthesized , Var <$> try parseVar - , parseKwRand + , P.kwRand $> Random , parseIfElse ] @@ -203,33 +181,16 @@ parsePostfixedExpr = ps <- many parsePostfix return $ foldl (flip ($)) eb ps -parseOp :: String -> Op -> Parser Op -parseOp s o = try (string s) >> return o - -parseLevel :: Parser Expr -> Parser Op -> Parser Expr -parseLevel pe po = - do - start <- pe - spaces - ops <- many $ try $ do - op <- po - spaces - val <- pe - spaces - return (op, val) - spaces - return $ foldl (\l (o, r) -> BinOp o l r) start ops - parseExpr :: Parser Expr -parseExpr = foldl parseLevel parsePostfixedExpr - [ parseOp "*" Multiply, parseOp "/" Divide - , parseOp "+" Add, parseOp "-" Subtract - , parseOp "<<" Insert - , parseOp "++" Concat - , parseOp "<=" LessThanEq <|> parseOp ">=" GreaterThanEq <|> - parseOp "<" LessThan <|> parseOp ">" GreaterThan <|> - parseOp "==" Equal <|> parseOp "!=" NotEqual - , parseOp "&&" And <|> parseOp "||" Or +parseExpr = P.precedence BinOp parsePostfixedExpr + [ P.op "*" Multiply, P.op "/" Divide + , P.op "+" Add, P.op "-" Subtract + , P.op "<<" Insert + , P.op "++" Concat + , try (P.op "<=" LessThanEq) <|> try (P.op ">=" GreaterThanEq) <|> + P.op "<" LessThan <|> P.op ">" GreaterThan <|> + P.op "==" Equal <|> P.op "!=" NotEqual + , P.op "&&" And <|> P.op "||" Or ] parseFunction :: Parser Function diff --git a/code/cs325-langs/src/LanguageTwo.hs b/code/cs325-langs/src/LanguageTwo.hs index 05d8514..32e4b7d 100644 --- a/code/cs325-langs/src/LanguageTwo.hs +++ b/code/cs325-langs/src/LanguageTwo.hs @@ -1,5 +1,6 @@ module LanguageTwo where import qualified PythonAst as Py +import qualified CommonParsing as P import Data.Char import Data.Functor import Text.Parsec @@ -33,95 +34,50 @@ data Prog = Prog Expr [Stmt] [Stmt] {- Parser -} type Parser = Parsec String () -parseKw :: String -> Parser () -parseKw s = string s $> () - -parseKwIf :: Parser () -parseKwIf = parseKw "if" - -parseKwElse :: Parser () -parseKwElse = parseKw "else" - -parseKwState :: Parser () -parseKwState = parseKw "state" - -parseKwEffect :: Parser () -parseKwEffect = parseKw "effect" - -parseKwCombine :: Parser () -parseKwCombine = parseKw "combine" - -parseOp :: String -> Op -> Parser Op -parseOp s o = string s $> o - -parseInt :: Parser Int -parseInt = read <$> (many1 digit <* spaces) - parseVar :: Parser String -parseVar = - do - c <- satisfy $ \c -> isLetter c || c == '_' - cs <- many (satisfy isLetter <|> digit) <* spaces - let name = c:cs - if name `elem` ["if", "else", "state", "effect", "combine"] - then fail "Can't use reserved keyword as identifier" - else return name - -parseSurrounded :: Char -> Char -> Parser a -> Parser a -parseSurrounded c1 c2 pe = - do - char c1 >> spaces - e <- pe - spaces >> char c2 >> spaces - return e +parseVar = P.var [ "if", "else", "state", "effect", "combine" ] parseLength :: Parser Expr -parseLength = Length <$> parseSurrounded '|' '|' parseExpr +parseLength = Length <$> P.surround '|' '|' parseExpr parseParenthesized :: Parser Expr -parseParenthesized = parseSurrounded '(' ')' parseExpr +parseParenthesized = P.surround '(' ')' parseExpr parseBasic :: Parser Expr parseBasic = choice - [ IntLiteral <$> parseInt + [ IntLiteral <$> P.int , Var <$> parseVar , parseLength , parseParenthesized ] -parseLevel :: Parser Op -> Parser Expr -> Parser Expr -parseLevel po pe = - do - e <- pe <* spaces - ops <- many ((flip . BinOp <$> (po <* spaces) <*> pe) <* spaces) - return $ foldl (flip ($)) e ops parseExpr :: Parser Expr -parseExpr = foldl (flip parseLevel) parseBasic - [ parseOp "*" Multiply <|> parseOp "/" Divide - , parseOp "+" Add <|> parseOp "-" Subtract - , parseOp "==" Equal <|> parseOp "!=" NotEqual - , parseOp "&&" And - , try $ parseOp "||" Or +parseExpr = P.precedence BinOp parseBasic + [ P.op "*" Multiply <|> P.op "/" Divide + , P.op "+" Add <|> P.op "-" Subtract + , P.op "==" Equal <|> P.op "!=" NotEqual + , P.op "&&" And + , try $ P.op "||" Or ] parseIf :: Parser Stmt parseIf = do - parseKwIf >> spaces + P.kwIf >> spaces c <- parseParenthesized t <- parseStmt <* spaces - e <- (Just <$> (parseKwElse >> spaces *> parseStmt)) <|> return Nothing + e <- (Just <$> (P.kwElse >> spaces *> parseStmt)) <|> return Nothing return $ IfElse c t e parseBlockStmts :: Parser [Stmt] -parseBlockStmts = parseSurrounded '{' '}' (many parseStmt) +parseBlockStmts = P.surround '{' '}' (many parseStmt) parseBlock :: Parser Stmt parseBlock = Block <$> parseBlockStmts parseAssign :: Parser Stmt parseAssign = Assign <$> - (parseVar <* spaces <* char '=' <* spaces) <*> + (parseVar <* char '=' <* spaces) <*> parseExpr <* (char ';' >> spaces) parseStmt :: Parser Stmt @@ -133,9 +89,9 @@ parseStmt = choice parseProgram :: Parser Prog parseProgram = do - state <- parseKwState >> spaces *> parseExpr <* char ';' <* spaces - effect <- parseKwEffect >> spaces *> parseBlockStmts <* spaces - combined <- parseKwCombine >> spaces *> parseBlockStmts <* spaces + state <- P.kwState >> spaces *> parseExpr <* char ';' <* spaces + effect <- P.kwEffect >> spaces *> parseBlockStmts <* spaces + combined <- P.kwCombine >> spaces *> parseBlockStmts <* spaces return $ Prog state effect combined parse :: String -> String -> Either ParseError Prog diff --git a/content/blog/00_cs325_languages_hw1.md b/content/blog/00_cs325_languages_hw1.md index 3071468..1a365a1 100644 --- a/content/blog/00_cs325_languages_hw1.md +++ b/content/blog/00_cs325_languages_hw1.md @@ -269,18 +269,18 @@ by prepending the word "temp" to that number. We start with `temp0`, then `temp1`, and so on. To keep a counter, we can use a state monad: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 269 269 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 230 230 >}} Don't worry about the `Map.Map String [String]`, we'll get to that in a bit. For now, all we have to worry about is the second element of the tuple, the integer counting how many temporary variables we've used. We can get the current temporary variable as follows: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 271 274 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 232 235 >}} We can also get a fresh temporary variable like this: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 276 279 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 237 240 >}} Now, the {{< sidenote "left" "code-note" "code" >}} @@ -297,7 +297,7 @@ source code for the blog (which includes this project) here. {{< /sidenote >}} -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 364 369 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 325 330 >}} ##### Implementing "lazy evaluation" Lazy evaluation in functional programs usually arises from @@ -344,20 +344,20 @@ and also of the dependencies of each variable (the variables that need to be access before the variable itself). We compute such a map for each selector as follows: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 337 337 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 298 298 >}} We update the existing map using `Map.union`: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 338 338 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 299 >}} And, after we're done generating expressions in the body of this selector, we clear it to its previous value `vs`: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 341 341 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 302 302 >}} We generate a single selector as follows: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 307 320 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 268 281 >}} This generates a function definition statement, which we will examine in generated Python code later on. @@ -366,7 +366,7 @@ Solving the problem this way also introduces another gotcha: sometimes, a variable is produced by a function call, and other times the variable is just a Python variable. We write this as follows: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 322 327 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 283 288 >}} ##### Special Case Insertion This is a silly language for a single homework assignment. I'm not @@ -377,7 +377,7 @@ a list, it can also return the list from its base case. Thus, that's all we will try to figure out. The checking code is so short that we can include the whole snippet at once: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 258 266 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 219 227 >}} `mergePossibleType` {{< sidenote "right" "bool-identity-note" "figures out" >}} @@ -404,7 +404,7 @@ My Haskell linter actually suggested a pretty clever way of writing the whole "add a base case if this function returns a list" code. Check it out: -{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 305 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 260 266 >}} Specifically, look at the line with `let fastReturn = ...`. It uses a list comprehension: we take a parameter `p` from the list of diff --git a/content/blog/01_cs325_languages_hw2.md b/content/blog/01_cs325_languages_hw2.md index ed36e6b..c5f7d22 100644 --- a/content/blog/01_cs325_languages_hw2.md +++ b/content/blog/01_cs325_languages_hw2.md @@ -87,14 +87,14 @@ time, and nor do we have to perform any fancy Python nested function declaration To keep with the Python convention of lowercase variables, we'll translate the uppercase "global" variables to lowercase. We'll do it like so: -{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 211 220 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 167 176 >}} Note that we translated "L" and "R" to integer literals. We'll indicate the source of each element with an integer, since there's no real point to representing it with a string or a variable. We'll need to be aware of this when we implement the actual, generic mergesort code. Let's do that now: -{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 145 205 >}} +{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 101 161 >}} This is probably the ugliest part of this assignment: we handwrote a Python AST in Haskell that implements mergesort with our augmentations. Note that