Extract common parsing code
This commit is contained in:
parent
4e918db5cb
commit
80410c9200
66
code/cs325-langs/src/CommonParsing.hs
Normal file
66
code/cs325-langs/src/CommonParsing.hs
Normal file
@ -0,0 +1,66 @@
|
||||
module CommonParsing where
|
||||
import Data.Char
|
||||
import Data.Functor
|
||||
import Text.Parsec
|
||||
import Text.Parsec.Char
|
||||
import Text.Parsec.Combinator
|
||||
|
||||
type Parser a b = Parsec String a b
|
||||
|
||||
kw :: String -> Parser a ()
|
||||
kw s = string s $> ()
|
||||
|
||||
kwIf :: Parser a ()
|
||||
kwIf = kw "if"
|
||||
|
||||
kwThen :: Parser a ()
|
||||
kwThen = kw "then"
|
||||
|
||||
kwElse :: Parser a ()
|
||||
kwElse = kw "else"
|
||||
|
||||
kwState :: Parser a ()
|
||||
kwState = kw "state"
|
||||
|
||||
kwEffect :: Parser a ()
|
||||
kwEffect = kw "effect"
|
||||
|
||||
kwCombine :: Parser a ()
|
||||
kwCombine = kw "combine"
|
||||
|
||||
kwRand :: Parser a ()
|
||||
kwRand = kw "rand"
|
||||
|
||||
op :: String -> op -> Parser a op
|
||||
op s o = string s $> o
|
||||
|
||||
int :: Parser a Int
|
||||
int = read <$> (many1 digit <* spaces)
|
||||
|
||||
var :: [String] -> Parser a String
|
||||
var reserved =
|
||||
do
|
||||
c <- satisfy $ \c -> isLetter c || c == '_'
|
||||
cs <- many (satisfy isLetter <|> digit) <* spaces
|
||||
let name = c:cs
|
||||
if name `elem` reserved
|
||||
then fail "Can't use reserved keyword as identifier"
|
||||
else return name
|
||||
|
||||
surround :: Char -> Char -> Parser a b -> Parser a b
|
||||
surround c1 c2 pe =
|
||||
do
|
||||
char c1 >> spaces
|
||||
e <- pe
|
||||
spaces >> char c2 >> spaces
|
||||
return e
|
||||
|
||||
level :: (o -> e -> e -> e) -> Parser a o -> Parser a e -> Parser a e
|
||||
level c po pe =
|
||||
do
|
||||
e <- pe <* spaces
|
||||
ops <- many $ try $ (flip . c <$> (po <* spaces) <*> pe) <* spaces
|
||||
return $ foldl (flip ($)) e ops
|
||||
|
||||
precedence :: (o -> e -> e -> e) -> Parser a e -> [ Parser a o ] -> Parser a e
|
||||
precedence = foldl . flip . level
|
@ -1,5 +1,6 @@
|
||||
module LanguageOne where
|
||||
import qualified PythonAst as Py
|
||||
import qualified CommonParsing as P
|
||||
import Data.Bifunctor
|
||||
import Data.Char
|
||||
import Data.Functor
|
||||
@ -54,31 +55,8 @@ data Prog = Prog [Function]
|
||||
{- Parser -}
|
||||
type Parser = Parsec String (Maybe Int)
|
||||
|
||||
parseInt :: Parser Int
|
||||
parseInt = read <$> (many1 digit <* spaces)
|
||||
|
||||
parseVar :: Parser String
|
||||
parseVar =
|
||||
do
|
||||
c <- satisfy (\c -> (isLetter c && isLower c) || c == '_')
|
||||
cs <- many (satisfy isLetter <|> digit)
|
||||
spaces
|
||||
let var = c:cs
|
||||
if var `elem` ["if", "then", "else", "rand"]
|
||||
then fail "reserved"
|
||||
else return var
|
||||
|
||||
parseKwIf :: Parser ()
|
||||
parseKwIf = string "if" $> ()
|
||||
|
||||
parseKwThen :: Parser ()
|
||||
parseKwThen = string "then" $> ()
|
||||
|
||||
parseKwElse :: Parser ()
|
||||
parseKwElse = string "else" $> ()
|
||||
|
||||
parseKwRand :: Parser Expr
|
||||
parseKwRand = string "rand" $> Random
|
||||
parseVar = P.var ["if", "then", "else", "var"]
|
||||
|
||||
parseThis :: Parser Expr
|
||||
parseThis =
|
||||
@ -127,11 +105,11 @@ parseSelector =
|
||||
parseIfElse :: Parser Expr
|
||||
parseIfElse =
|
||||
do
|
||||
parseKwIf >> spaces
|
||||
P.kwIf >> spaces
|
||||
ec <- parseExpr
|
||||
spaces >> parseKwThen >> spaces
|
||||
spaces >> P.kwThen >> spaces
|
||||
et <- parseExpr
|
||||
spaces >> parseKwElse >> spaces
|
||||
spaces >> P.kwElse >> spaces
|
||||
ee <- parseExpr
|
||||
spaces
|
||||
return $ IfElse ec et ee
|
||||
@ -162,7 +140,7 @@ parseParenthesized =
|
||||
|
||||
parseBasicExpr :: Parser Expr
|
||||
parseBasicExpr = choice
|
||||
[ IntLiteral <$> parseInt
|
||||
[ IntLiteral <$> P.int
|
||||
, parseThis
|
||||
, parseList
|
||||
, parseSplit
|
||||
@ -170,7 +148,7 @@ parseBasicExpr = choice
|
||||
, parseParameter
|
||||
, parseParenthesized
|
||||
, Var <$> try parseVar
|
||||
, parseKwRand
|
||||
, P.kwRand $> Random
|
||||
, parseIfElse
|
||||
]
|
||||
|
||||
@ -203,33 +181,16 @@ parsePostfixedExpr =
|
||||
ps <- many parsePostfix
|
||||
return $ foldl (flip ($)) eb ps
|
||||
|
||||
parseOp :: String -> Op -> Parser Op
|
||||
parseOp s o = try (string s) >> return o
|
||||
|
||||
parseLevel :: Parser Expr -> Parser Op -> Parser Expr
|
||||
parseLevel pe po =
|
||||
do
|
||||
start <- pe
|
||||
spaces
|
||||
ops <- many $ try $ do
|
||||
op <- po
|
||||
spaces
|
||||
val <- pe
|
||||
spaces
|
||||
return (op, val)
|
||||
spaces
|
||||
return $ foldl (\l (o, r) -> BinOp o l r) start ops
|
||||
|
||||
parseExpr :: Parser Expr
|
||||
parseExpr = foldl parseLevel parsePostfixedExpr
|
||||
[ parseOp "*" Multiply, parseOp "/" Divide
|
||||
, parseOp "+" Add, parseOp "-" Subtract
|
||||
, parseOp "<<" Insert
|
||||
, parseOp "++" Concat
|
||||
, parseOp "<=" LessThanEq <|> parseOp ">=" GreaterThanEq <|>
|
||||
parseOp "<" LessThan <|> parseOp ">" GreaterThan <|>
|
||||
parseOp "==" Equal <|> parseOp "!=" NotEqual
|
||||
, parseOp "&&" And <|> parseOp "||" Or
|
||||
parseExpr = P.precedence BinOp parsePostfixedExpr
|
||||
[ P.op "*" Multiply, P.op "/" Divide
|
||||
, P.op "+" Add, P.op "-" Subtract
|
||||
, P.op "<<" Insert
|
||||
, P.op "++" Concat
|
||||
, try (P.op "<=" LessThanEq) <|> try (P.op ">=" GreaterThanEq) <|>
|
||||
P.op "<" LessThan <|> P.op ">" GreaterThan <|>
|
||||
P.op "==" Equal <|> P.op "!=" NotEqual
|
||||
, P.op "&&" And <|> P.op "||" Or
|
||||
]
|
||||
|
||||
parseFunction :: Parser Function
|
||||
|
@ -1,5 +1,6 @@
|
||||
module LanguageTwo where
|
||||
import qualified PythonAst as Py
|
||||
import qualified CommonParsing as P
|
||||
import Data.Char
|
||||
import Data.Functor
|
||||
import Text.Parsec
|
||||
@ -33,95 +34,50 @@ data Prog = Prog Expr [Stmt] [Stmt]
|
||||
{- Parser -}
|
||||
type Parser = Parsec String ()
|
||||
|
||||
parseKw :: String -> Parser ()
|
||||
parseKw s = string s $> ()
|
||||
|
||||
parseKwIf :: Parser ()
|
||||
parseKwIf = parseKw "if"
|
||||
|
||||
parseKwElse :: Parser ()
|
||||
parseKwElse = parseKw "else"
|
||||
|
||||
parseKwState :: Parser ()
|
||||
parseKwState = parseKw "state"
|
||||
|
||||
parseKwEffect :: Parser ()
|
||||
parseKwEffect = parseKw "effect"
|
||||
|
||||
parseKwCombine :: Parser ()
|
||||
parseKwCombine = parseKw "combine"
|
||||
|
||||
parseOp :: String -> Op -> Parser Op
|
||||
parseOp s o = string s $> o
|
||||
|
||||
parseInt :: Parser Int
|
||||
parseInt = read <$> (many1 digit <* spaces)
|
||||
|
||||
parseVar :: Parser String
|
||||
parseVar =
|
||||
do
|
||||
c <- satisfy $ \c -> isLetter c || c == '_'
|
||||
cs <- many (satisfy isLetter <|> digit) <* spaces
|
||||
let name = c:cs
|
||||
if name `elem` ["if", "else", "state", "effect", "combine"]
|
||||
then fail "Can't use reserved keyword as identifier"
|
||||
else return name
|
||||
|
||||
parseSurrounded :: Char -> Char -> Parser a -> Parser a
|
||||
parseSurrounded c1 c2 pe =
|
||||
do
|
||||
char c1 >> spaces
|
||||
e <- pe
|
||||
spaces >> char c2 >> spaces
|
||||
return e
|
||||
parseVar = P.var [ "if", "else", "state", "effect", "combine" ]
|
||||
|
||||
parseLength :: Parser Expr
|
||||
parseLength = Length <$> parseSurrounded '|' '|' parseExpr
|
||||
parseLength = Length <$> P.surround '|' '|' parseExpr
|
||||
|
||||
parseParenthesized :: Parser Expr
|
||||
parseParenthesized = parseSurrounded '(' ')' parseExpr
|
||||
parseParenthesized = P.surround '(' ')' parseExpr
|
||||
|
||||
parseBasic :: Parser Expr
|
||||
parseBasic = choice
|
||||
[ IntLiteral <$> parseInt
|
||||
[ IntLiteral <$> P.int
|
||||
, Var <$> parseVar
|
||||
, parseLength
|
||||
, parseParenthesized
|
||||
]
|
||||
|
||||
parseLevel :: Parser Op -> Parser Expr -> Parser Expr
|
||||
parseLevel po pe =
|
||||
do
|
||||
e <- pe <* spaces
|
||||
ops <- many ((flip . BinOp <$> (po <* spaces) <*> pe) <* spaces)
|
||||
return $ foldl (flip ($)) e ops
|
||||
|
||||
parseExpr :: Parser Expr
|
||||
parseExpr = foldl (flip parseLevel) parseBasic
|
||||
[ parseOp "*" Multiply <|> parseOp "/" Divide
|
||||
, parseOp "+" Add <|> parseOp "-" Subtract
|
||||
, parseOp "==" Equal <|> parseOp "!=" NotEqual
|
||||
, parseOp "&&" And
|
||||
, try $ parseOp "||" Or
|
||||
parseExpr = P.precedence BinOp parseBasic
|
||||
[ P.op "*" Multiply <|> P.op "/" Divide
|
||||
, P.op "+" Add <|> P.op "-" Subtract
|
||||
, P.op "==" Equal <|> P.op "!=" NotEqual
|
||||
, P.op "&&" And
|
||||
, try $ P.op "||" Or
|
||||
]
|
||||
|
||||
parseIf :: Parser Stmt
|
||||
parseIf = do
|
||||
parseKwIf >> spaces
|
||||
P.kwIf >> spaces
|
||||
c <- parseParenthesized
|
||||
t <- parseStmt <* spaces
|
||||
e <- (Just <$> (parseKwElse >> spaces *> parseStmt)) <|> return Nothing
|
||||
e <- (Just <$> (P.kwElse >> spaces *> parseStmt)) <|> return Nothing
|
||||
return $ IfElse c t e
|
||||
|
||||
parseBlockStmts :: Parser [Stmt]
|
||||
parseBlockStmts = parseSurrounded '{' '}' (many parseStmt)
|
||||
parseBlockStmts = P.surround '{' '}' (many parseStmt)
|
||||
|
||||
parseBlock :: Parser Stmt
|
||||
parseBlock = Block <$> parseBlockStmts
|
||||
|
||||
parseAssign :: Parser Stmt
|
||||
parseAssign = Assign <$>
|
||||
(parseVar <* spaces <* char '=' <* spaces) <*>
|
||||
(parseVar <* char '=' <* spaces) <*>
|
||||
parseExpr <* (char ';' >> spaces)
|
||||
|
||||
parseStmt :: Parser Stmt
|
||||
@ -133,9 +89,9 @@ parseStmt = choice
|
||||
|
||||
parseProgram :: Parser Prog
|
||||
parseProgram = do
|
||||
state <- parseKwState >> spaces *> parseExpr <* char ';' <* spaces
|
||||
effect <- parseKwEffect >> spaces *> parseBlockStmts <* spaces
|
||||
combined <- parseKwCombine >> spaces *> parseBlockStmts <* spaces
|
||||
state <- P.kwState >> spaces *> parseExpr <* char ';' <* spaces
|
||||
effect <- P.kwEffect >> spaces *> parseBlockStmts <* spaces
|
||||
combined <- P.kwCombine >> spaces *> parseBlockStmts <* spaces
|
||||
return $ Prog state effect combined
|
||||
|
||||
parse :: String -> String -> Either ParseError Prog
|
||||
|
@ -269,18 +269,18 @@ by prepending the word "temp" to that number. We start
|
||||
with `temp0`, then `temp1`, and so on. To keep a counter,
|
||||
we can use a state monad:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 269 269 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 230 230 >}}
|
||||
|
||||
Don't worry about the `Map.Map String [String]`, we'll get to that in a bit.
|
||||
For now, all we have to worry about is the second element of the tuple,
|
||||
the integer counting how many temporary variables we've used. We can
|
||||
get the current temporary variable as follows:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 271 274 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 232 235 >}}
|
||||
|
||||
We can also get a fresh temporary variable like this:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 276 279 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 237 240 >}}
|
||||
|
||||
Now, the
|
||||
{{< sidenote "left" "code-note" "code" >}}
|
||||
@ -297,7 +297,7 @@ source code for the blog (which includes this project)
|
||||
<a href="https://dev.danilafe.com/Web-Projects/blog-static">here</a>.
|
||||
{{< /sidenote >}}
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 364 369 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 325 330 >}}
|
||||
|
||||
##### Implementing "lazy evaluation"
|
||||
Lazy evaluation in functional programs usually arises from
|
||||
@ -344,20 +344,20 @@ and also of the dependencies of each variable (the variables that need
|
||||
to be access before the variable itself). We compute such a map for
|
||||
each selector as follows:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 337 337 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 298 298 >}}
|
||||
|
||||
We update the existing map using `Map.union`:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 338 338 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 299 >}}
|
||||
|
||||
And, after we're done generating expressions in the body of this selector,
|
||||
we clear it to its previous value `vs`:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 341 341 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 302 302 >}}
|
||||
|
||||
We generate a single selector as follows:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 307 320 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 268 281 >}}
|
||||
|
||||
This generates a function definition statement, which we will examine in
|
||||
generated Python code later on.
|
||||
@ -366,7 +366,7 @@ Solving the problem this way also introduces another gotcha: sometimes,
|
||||
a variable is produced by a function call, and other times the variable
|
||||
is just a Python variable. We write this as follows:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 322 327 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 283 288 >}}
|
||||
|
||||
##### Special Case Insertion
|
||||
This is a silly language for a single homework assignment. I'm not
|
||||
@ -377,7 +377,7 @@ a list, it can also return the list from its base case. Thus,
|
||||
that's all we will try to figure out. The checking code is so
|
||||
short that we can include the whole snippet at once:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 258 266 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 219 227 >}}
|
||||
|
||||
`mergePossibleType`
|
||||
{{< sidenote "right" "bool-identity-note" "figures out" >}}
|
||||
@ -404,7 +404,7 @@ My Haskell linter actually suggested a pretty clever way of writing
|
||||
the whole "add a base case if this function returns a list" code.
|
||||
Check it out:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 305 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 260 266 >}}
|
||||
|
||||
Specifically, look at the line with `let fastReturn = ...`. It
|
||||
uses a list comprehension: we take a parameter `p` from the list of
|
||||
|
@ -87,14 +87,14 @@ time, and nor do we have to perform any fancy Python nested function declaration
|
||||
To keep with the Python convention of lowercase variables, we'll translate the
|
||||
uppercase "global" variables to lowercase. We'll do it like so:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 211 220 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 167 176 >}}
|
||||
|
||||
Note that we translated "L" and "R" to integer literals. We'll indicate the source of
|
||||
each element with an integer, since there's no real point to representing it with
|
||||
a string or a variable. We'll need to be aware of this when we implement the actual, generic
|
||||
mergesort code. Let's do that now:
|
||||
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 145 205 >}}
|
||||
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 101 161 >}}
|
||||
|
||||
This is probably the ugliest part of this assignment: we handwrote a Python
|
||||
AST in Haskell that implements mergesort with our augmentations. Note that
|
||||
|
Loading…
Reference in New Issue
Block a user