Extract common parsing code

This commit is contained in:
Danila Fedorin 2019-12-31 21:59:13 -08:00
parent 4e918db5cb
commit 80410c9200
5 changed files with 113 additions and 130 deletions

View File

@ -0,0 +1,66 @@
module CommonParsing where
import Data.Char
import Data.Functor
import Text.Parsec
import Text.Parsec.Char
import Text.Parsec.Combinator
type Parser a b = Parsec String a b
kw :: String -> Parser a ()
kw s = string s $> ()
kwIf :: Parser a ()
kwIf = kw "if"
kwThen :: Parser a ()
kwThen = kw "then"
kwElse :: Parser a ()
kwElse = kw "else"
kwState :: Parser a ()
kwState = kw "state"
kwEffect :: Parser a ()
kwEffect = kw "effect"
kwCombine :: Parser a ()
kwCombine = kw "combine"
kwRand :: Parser a ()
kwRand = kw "rand"
op :: String -> op -> Parser a op
op s o = string s $> o
int :: Parser a Int
int = read <$> (many1 digit <* spaces)
var :: [String] -> Parser a String
var reserved =
do
c <- satisfy $ \c -> isLetter c || c == '_'
cs <- many (satisfy isLetter <|> digit) <* spaces
let name = c:cs
if name `elem` reserved
then fail "Can't use reserved keyword as identifier"
else return name
surround :: Char -> Char -> Parser a b -> Parser a b
surround c1 c2 pe =
do
char c1 >> spaces
e <- pe
spaces >> char c2 >> spaces
return e
level :: (o -> e -> e -> e) -> Parser a o -> Parser a e -> Parser a e
level c po pe =
do
e <- pe <* spaces
ops <- many $ try $ (flip . c <$> (po <* spaces) <*> pe) <* spaces
return $ foldl (flip ($)) e ops
precedence :: (o -> e -> e -> e) -> Parser a e -> [ Parser a o ] -> Parser a e
precedence = foldl . flip . level

View File

@ -1,5 +1,6 @@
module LanguageOne where module LanguageOne where
import qualified PythonAst as Py import qualified PythonAst as Py
import qualified CommonParsing as P
import Data.Bifunctor import Data.Bifunctor
import Data.Char import Data.Char
import Data.Functor import Data.Functor
@ -54,31 +55,8 @@ data Prog = Prog [Function]
{- Parser -} {- Parser -}
type Parser = Parsec String (Maybe Int) type Parser = Parsec String (Maybe Int)
parseInt :: Parser Int
parseInt = read <$> (many1 digit <* spaces)
parseVar :: Parser String parseVar :: Parser String
parseVar = parseVar = P.var ["if", "then", "else", "var"]
do
c <- satisfy (\c -> (isLetter c && isLower c) || c == '_')
cs <- many (satisfy isLetter <|> digit)
spaces
let var = c:cs
if var `elem` ["if", "then", "else", "rand"]
then fail "reserved"
else return var
parseKwIf :: Parser ()
parseKwIf = string "if" $> ()
parseKwThen :: Parser ()
parseKwThen = string "then" $> ()
parseKwElse :: Parser ()
parseKwElse = string "else" $> ()
parseKwRand :: Parser Expr
parseKwRand = string "rand" $> Random
parseThis :: Parser Expr parseThis :: Parser Expr
parseThis = parseThis =
@ -127,11 +105,11 @@ parseSelector =
parseIfElse :: Parser Expr parseIfElse :: Parser Expr
parseIfElse = parseIfElse =
do do
parseKwIf >> spaces P.kwIf >> spaces
ec <- parseExpr ec <- parseExpr
spaces >> parseKwThen >> spaces spaces >> P.kwThen >> spaces
et <- parseExpr et <- parseExpr
spaces >> parseKwElse >> spaces spaces >> P.kwElse >> spaces
ee <- parseExpr ee <- parseExpr
spaces spaces
return $ IfElse ec et ee return $ IfElse ec et ee
@ -162,7 +140,7 @@ parseParenthesized =
parseBasicExpr :: Parser Expr parseBasicExpr :: Parser Expr
parseBasicExpr = choice parseBasicExpr = choice
[ IntLiteral <$> parseInt [ IntLiteral <$> P.int
, parseThis , parseThis
, parseList , parseList
, parseSplit , parseSplit
@ -170,7 +148,7 @@ parseBasicExpr = choice
, parseParameter , parseParameter
, parseParenthesized , parseParenthesized
, Var <$> try parseVar , Var <$> try parseVar
, parseKwRand , P.kwRand $> Random
, parseIfElse , parseIfElse
] ]
@ -203,33 +181,16 @@ parsePostfixedExpr =
ps <- many parsePostfix ps <- many parsePostfix
return $ foldl (flip ($)) eb ps return $ foldl (flip ($)) eb ps
parseOp :: String -> Op -> Parser Op
parseOp s o = try (string s) >> return o
parseLevel :: Parser Expr -> Parser Op -> Parser Expr
parseLevel pe po =
do
start <- pe
spaces
ops <- many $ try $ do
op <- po
spaces
val <- pe
spaces
return (op, val)
spaces
return $ foldl (\l (o, r) -> BinOp o l r) start ops
parseExpr :: Parser Expr parseExpr :: Parser Expr
parseExpr = foldl parseLevel parsePostfixedExpr parseExpr = P.precedence BinOp parsePostfixedExpr
[ parseOp "*" Multiply, parseOp "/" Divide [ P.op "*" Multiply, P.op "/" Divide
, parseOp "+" Add, parseOp "-" Subtract , P.op "+" Add, P.op "-" Subtract
, parseOp "<<" Insert , P.op "<<" Insert
, parseOp "++" Concat , P.op "++" Concat
, parseOp "<=" LessThanEq <|> parseOp ">=" GreaterThanEq <|> , try (P.op "<=" LessThanEq) <|> try (P.op ">=" GreaterThanEq) <|>
parseOp "<" LessThan <|> parseOp ">" GreaterThan <|> P.op "<" LessThan <|> P.op ">" GreaterThan <|>
parseOp "==" Equal <|> parseOp "!=" NotEqual P.op "==" Equal <|> P.op "!=" NotEqual
, parseOp "&&" And <|> parseOp "||" Or , P.op "&&" And <|> P.op "||" Or
] ]
parseFunction :: Parser Function parseFunction :: Parser Function

View File

@ -1,5 +1,6 @@
module LanguageTwo where module LanguageTwo where
import qualified PythonAst as Py import qualified PythonAst as Py
import qualified CommonParsing as P
import Data.Char import Data.Char
import Data.Functor import Data.Functor
import Text.Parsec import Text.Parsec
@ -33,95 +34,50 @@ data Prog = Prog Expr [Stmt] [Stmt]
{- Parser -} {- Parser -}
type Parser = Parsec String () type Parser = Parsec String ()
parseKw :: String -> Parser ()
parseKw s = string s $> ()
parseKwIf :: Parser ()
parseKwIf = parseKw "if"
parseKwElse :: Parser ()
parseKwElse = parseKw "else"
parseKwState :: Parser ()
parseKwState = parseKw "state"
parseKwEffect :: Parser ()
parseKwEffect = parseKw "effect"
parseKwCombine :: Parser ()
parseKwCombine = parseKw "combine"
parseOp :: String -> Op -> Parser Op
parseOp s o = string s $> o
parseInt :: Parser Int
parseInt = read <$> (many1 digit <* spaces)
parseVar :: Parser String parseVar :: Parser String
parseVar = parseVar = P.var [ "if", "else", "state", "effect", "combine" ]
do
c <- satisfy $ \c -> isLetter c || c == '_'
cs <- many (satisfy isLetter <|> digit) <* spaces
let name = c:cs
if name `elem` ["if", "else", "state", "effect", "combine"]
then fail "Can't use reserved keyword as identifier"
else return name
parseSurrounded :: Char -> Char -> Parser a -> Parser a
parseSurrounded c1 c2 pe =
do
char c1 >> spaces
e <- pe
spaces >> char c2 >> spaces
return e
parseLength :: Parser Expr parseLength :: Parser Expr
parseLength = Length <$> parseSurrounded '|' '|' parseExpr parseLength = Length <$> P.surround '|' '|' parseExpr
parseParenthesized :: Parser Expr parseParenthesized :: Parser Expr
parseParenthesized = parseSurrounded '(' ')' parseExpr parseParenthesized = P.surround '(' ')' parseExpr
parseBasic :: Parser Expr parseBasic :: Parser Expr
parseBasic = choice parseBasic = choice
[ IntLiteral <$> parseInt [ IntLiteral <$> P.int
, Var <$> parseVar , Var <$> parseVar
, parseLength , parseLength
, parseParenthesized , parseParenthesized
] ]
parseLevel :: Parser Op -> Parser Expr -> Parser Expr
parseLevel po pe =
do
e <- pe <* spaces
ops <- many ((flip . BinOp <$> (po <* spaces) <*> pe) <* spaces)
return $ foldl (flip ($)) e ops
parseExpr :: Parser Expr parseExpr :: Parser Expr
parseExpr = foldl (flip parseLevel) parseBasic parseExpr = P.precedence BinOp parseBasic
[ parseOp "*" Multiply <|> parseOp "/" Divide [ P.op "*" Multiply <|> P.op "/" Divide
, parseOp "+" Add <|> parseOp "-" Subtract , P.op "+" Add <|> P.op "-" Subtract
, parseOp "==" Equal <|> parseOp "!=" NotEqual , P.op "==" Equal <|> P.op "!=" NotEqual
, parseOp "&&" And , P.op "&&" And
, try $ parseOp "||" Or , try $ P.op "||" Or
] ]
parseIf :: Parser Stmt parseIf :: Parser Stmt
parseIf = do parseIf = do
parseKwIf >> spaces P.kwIf >> spaces
c <- parseParenthesized c <- parseParenthesized
t <- parseStmt <* spaces t <- parseStmt <* spaces
e <- (Just <$> (parseKwElse >> spaces *> parseStmt)) <|> return Nothing e <- (Just <$> (P.kwElse >> spaces *> parseStmt)) <|> return Nothing
return $ IfElse c t e return $ IfElse c t e
parseBlockStmts :: Parser [Stmt] parseBlockStmts :: Parser [Stmt]
parseBlockStmts = parseSurrounded '{' '}' (many parseStmt) parseBlockStmts = P.surround '{' '}' (many parseStmt)
parseBlock :: Parser Stmt parseBlock :: Parser Stmt
parseBlock = Block <$> parseBlockStmts parseBlock = Block <$> parseBlockStmts
parseAssign :: Parser Stmt parseAssign :: Parser Stmt
parseAssign = Assign <$> parseAssign = Assign <$>
(parseVar <* spaces <* char '=' <* spaces) <*> (parseVar <* char '=' <* spaces) <*>
parseExpr <* (char ';' >> spaces) parseExpr <* (char ';' >> spaces)
parseStmt :: Parser Stmt parseStmt :: Parser Stmt
@ -133,9 +89,9 @@ parseStmt = choice
parseProgram :: Parser Prog parseProgram :: Parser Prog
parseProgram = do parseProgram = do
state <- parseKwState >> spaces *> parseExpr <* char ';' <* spaces state <- P.kwState >> spaces *> parseExpr <* char ';' <* spaces
effect <- parseKwEffect >> spaces *> parseBlockStmts <* spaces effect <- P.kwEffect >> spaces *> parseBlockStmts <* spaces
combined <- parseKwCombine >> spaces *> parseBlockStmts <* spaces combined <- P.kwCombine >> spaces *> parseBlockStmts <* spaces
return $ Prog state effect combined return $ Prog state effect combined
parse :: String -> String -> Either ParseError Prog parse :: String -> String -> Either ParseError Prog

View File

@ -269,18 +269,18 @@ by prepending the word "temp" to that number. We start
with `temp0`, then `temp1`, and so on. To keep a counter, with `temp0`, then `temp1`, and so on. To keep a counter,
we can use a state monad: we can use a state monad:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 269 269 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 230 230 >}}
Don't worry about the `Map.Map String [String]`, we'll get to that in a bit. Don't worry about the `Map.Map String [String]`, we'll get to that in a bit.
For now, all we have to worry about is the second element of the tuple, For now, all we have to worry about is the second element of the tuple,
the integer counting how many temporary variables we've used. We can the integer counting how many temporary variables we've used. We can
get the current temporary variable as follows: get the current temporary variable as follows:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 271 274 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 232 235 >}}
We can also get a fresh temporary variable like this: We can also get a fresh temporary variable like this:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 276 279 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 237 240 >}}
Now, the Now, the
{{< sidenote "left" "code-note" "code" >}} {{< sidenote "left" "code-note" "code" >}}
@ -297,7 +297,7 @@ source code for the blog (which includes this project)
<a href="https://dev.danilafe.com/Web-Projects/blog-static">here</a>. <a href="https://dev.danilafe.com/Web-Projects/blog-static">here</a>.
{{< /sidenote >}} {{< /sidenote >}}
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 364 369 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 325 330 >}}
##### Implementing "lazy evaluation" ##### Implementing "lazy evaluation"
Lazy evaluation in functional programs usually arises from Lazy evaluation in functional programs usually arises from
@ -344,20 +344,20 @@ and also of the dependencies of each variable (the variables that need
to be access before the variable itself). We compute such a map for to be access before the variable itself). We compute such a map for
each selector as follows: each selector as follows:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 337 337 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 298 298 >}}
We update the existing map using `Map.union`: We update the existing map using `Map.union`:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 338 338 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 299 >}}
And, after we're done generating expressions in the body of this selector, And, after we're done generating expressions in the body of this selector,
we clear it to its previous value `vs`: we clear it to its previous value `vs`:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 341 341 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 302 302 >}}
We generate a single selector as follows: We generate a single selector as follows:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 307 320 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 268 281 >}}
This generates a function definition statement, which we will examine in This generates a function definition statement, which we will examine in
generated Python code later on. generated Python code later on.
@ -366,7 +366,7 @@ Solving the problem this way also introduces another gotcha: sometimes,
a variable is produced by a function call, and other times the variable a variable is produced by a function call, and other times the variable
is just a Python variable. We write this as follows: is just a Python variable. We write this as follows:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 322 327 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 283 288 >}}
##### Special Case Insertion ##### Special Case Insertion
This is a silly language for a single homework assignment. I'm not This is a silly language for a single homework assignment. I'm not
@ -377,7 +377,7 @@ a list, it can also return the list from its base case. Thus,
that's all we will try to figure out. The checking code is so that's all we will try to figure out. The checking code is so
short that we can include the whole snippet at once: short that we can include the whole snippet at once:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 258 266 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 219 227 >}}
`mergePossibleType` `mergePossibleType`
{{< sidenote "right" "bool-identity-note" "figures out" >}} {{< sidenote "right" "bool-identity-note" "figures out" >}}
@ -404,7 +404,7 @@ My Haskell linter actually suggested a pretty clever way of writing
the whole "add a base case if this function returns a list" code. the whole "add a base case if this function returns a list" code.
Check it out: Check it out:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 305 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 260 266 >}}
Specifically, look at the line with `let fastReturn = ...`. It Specifically, look at the line with `let fastReturn = ...`. It
uses a list comprehension: we take a parameter `p` from the list of uses a list comprehension: we take a parameter `p` from the list of

View File

@ -87,14 +87,14 @@ time, and nor do we have to perform any fancy Python nested function declaration
To keep with the Python convention of lowercase variables, we'll translate the To keep with the Python convention of lowercase variables, we'll translate the
uppercase "global" variables to lowercase. We'll do it like so: uppercase "global" variables to lowercase. We'll do it like so:
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 211 220 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 167 176 >}}
Note that we translated "L" and "R" to integer literals. We'll indicate the source of Note that we translated "L" and "R" to integer literals. We'll indicate the source of
each element with an integer, since there's no real point to representing it with each element with an integer, since there's no real point to representing it with
a string or a variable. We'll need to be aware of this when we implement the actual, generic a string or a variable. We'll need to be aware of this when we implement the actual, generic
mergesort code. Let's do that now: mergesort code. Let's do that now:
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 145 205 >}} {{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 101 161 >}}
This is probably the ugliest part of this assignment: we handwrote a Python This is probably the ugliest part of this assignment: we handwrote a Python
AST in Haskell that implements mergesort with our augmentations. Note that AST in Haskell that implements mergesort with our augmentations. Note that