178 lines
4.5 KiB
Haskell
178 lines
4.5 KiB
Haskell
{-# LANGUAGE LambdaCase #-}
|
|
module Hasklet2 where
|
|
import Control.Applicative (liftA2)
|
|
import qualified Control.Applicative as CA
|
|
import Data.Bifunctor
|
|
|
|
--
|
|
-- * Parser type
|
|
--
|
|
|
|
-- | Given a string, a parser either fails or returns a parsed value and
|
|
-- the rest of the string to be parsed.
|
|
newtype Parser a = Parser { runParser :: String -> Maybe (a, String) }
|
|
|
|
instance Functor Parser where
|
|
fmap f (Parser nf) = Parser $ (first f<$>) <$> nf
|
|
|
|
instance Applicative Parser where
|
|
pure v = Parser $ Just . (,) v
|
|
pf <*> pa = Parser $ \s -> do
|
|
(f, s') <- runParser pf s
|
|
(v, s'') <- runParser pa s'
|
|
return (f v, s'')
|
|
|
|
--
|
|
-- * Single character parsers
|
|
--
|
|
|
|
-- | Match the end of the input string.
|
|
end :: Parser ()
|
|
end = Parser $ \case
|
|
"" -> Just ((), "")
|
|
_ -> Nothing
|
|
|
|
-- | Return the next character if it satisfies the given predicate.
|
|
nextIf :: (Char -> Bool) -> Parser Char
|
|
nextIf f = Parser $ \case
|
|
(c:s') | f c -> Just (c,s')
|
|
_ -> Nothing
|
|
|
|
-- | Parse the given character.
|
|
char :: Char -> Parser Char
|
|
char c = nextIf (c ==)
|
|
|
|
-- | Parse one of the given characters.
|
|
oneOf :: [Char] -> Parser Char
|
|
oneOf cs = nextIf (`elem` cs)
|
|
|
|
-- | Parse a particular class of character.
|
|
lower, upper, digit, space :: Parser Char
|
|
lower = oneOf ['a'..'z']
|
|
upper = oneOf ['A'..'Z']
|
|
digit = oneOf ['0'..'9']
|
|
space = oneOf " \t\n\r"
|
|
|
|
-- | Parse a digit as an integer.
|
|
digitInt :: Parser Int
|
|
digitInt = flip (-) (fromEnum '0') . fromEnum <$> digit
|
|
|
|
--
|
|
-- * Alternative and repeating parsers
|
|
--
|
|
|
|
-- | Run the first parser. If it succeeds, return the result. Otherwise run
|
|
-- the second parser.
|
|
--
|
|
-- >>> runParser (upper <|> digit) "Hi"
|
|
-- Just ('H',"i")
|
|
--
|
|
-- >>> runParser (upper <|> digit) "42"
|
|
-- Just ('4',"2")
|
|
--
|
|
-- >>> runParser (upper <|> digit) "w00t"
|
|
-- Nothing
|
|
--
|
|
(<|>) :: Parser a -> Parser a -> Parser a
|
|
p1 <|> p2 = Parser $ \s -> runParser p1 s CA.<|> runParser p2 s
|
|
|
|
|
|
-- | Parse a sequence of one or more items, returning the results as a list.
|
|
-- Parses the longest possible sequence (i.e. until the given parser fails).
|
|
--
|
|
-- >>> runParser (many1 lower) "abcDEF123"
|
|
-- Just ("abc","DEF123")
|
|
--
|
|
-- >>> runParser (many1 lower) "ABCdef123"
|
|
-- Nothing
|
|
--
|
|
-- >>> runParser (many1 (lower <|> upper)) "ABCdef123"
|
|
-- Just ("ABCdef","123")
|
|
--
|
|
-- >>> runParser (many1 digitInt) "123abc"
|
|
-- Just ([1,2,3],"abc")
|
|
--
|
|
many1 :: Parser a -> Parser [a]
|
|
many1 p = liftA2 (:) p (many p)
|
|
|
|
|
|
-- | Parse a sequence of zero or more items, returning the results as a list.
|
|
--
|
|
-- >>> runParser (many lower) "abcDEF123"
|
|
-- Just ("abc","DEF123")
|
|
--
|
|
-- >>> runParser (many lower) "ABCdef123"
|
|
-- Just ("","ABCdef123")
|
|
--
|
|
-- >>> runParser (many (lower <|> upper)) "abcDEF123"
|
|
-- Just ("abcDEF","123")
|
|
--
|
|
-- >>> runParser (many digitInt) "123abc"
|
|
-- Just ([1,2,3],"abc")
|
|
--
|
|
-- >>> runParser (many digitInt) "abc123"
|
|
-- Just ([],"abc123")
|
|
--
|
|
many :: Parser a -> Parser [a]
|
|
many p = liftA2 (:) p (many p) <|> pure []
|
|
|
|
|
|
-- | Parse a natural number into a Haskell integer.
|
|
--
|
|
-- >>> runParser nat "123abc"
|
|
-- Just (123,"abc")
|
|
--
|
|
-- >>> runParser nat "abc"
|
|
-- Nothing
|
|
--
|
|
nat :: Parser Int
|
|
nat = foldl ((+).(*10)) 0 <$> many1 digitInt
|
|
|
|
parenth :: Parser a -> Parser b -> Parser (a, b)
|
|
parenth p1 p2 = liftA2 (,) (char '(' *> p1 <* char ',') (p2 <* char ')')
|
|
|
|
--
|
|
-- * Parsing structured data
|
|
--
|
|
|
|
-- | Parse a pair of natural numbers into a Haskell pair of integers. You can
|
|
-- assume there are no spaces within the substring encoding the pair,
|
|
-- although you're welcome to try to generalize it to handle whitespace too,
|
|
-- e.g. before/after parentheses and the comma.
|
|
--
|
|
-- This may get a little bit hairy, but the ugliness here will motivate some
|
|
-- key abstractions later. :-)
|
|
--
|
|
-- >>> runParser natPair "(123,45) 678"
|
|
-- Just ((123,45)," 678")
|
|
--
|
|
-- >>> runParser natPair "(123,45"
|
|
-- Nothing
|
|
--
|
|
-- >>> runParser natPair "(123,x) 678"
|
|
-- Nothing
|
|
--
|
|
natPair = parenth nat nat
|
|
|
|
|
|
-- | A simple tree data structure, isomorphic to arbitrarily nested pairs with
|
|
-- integers at the leaves.
|
|
data Tree
|
|
= Leaf Int
|
|
| Node Tree Tree
|
|
deriving (Eq,Show)
|
|
|
|
|
|
-- | Parse a tree encoded as arbitrarily nested pairs. This is basically just
|
|
-- the 'natPair' parser, now with recursion.
|
|
--
|
|
-- >>> runParser natTree "((1,2),3) abc"
|
|
-- Just (Node (Node (Leaf 1) (Leaf 2)) (Leaf 3)," abc")
|
|
--
|
|
-- >>> runParser natTree "(1,((100,101),10))"
|
|
-- Just (Node (Leaf 1) (Node (Node (Leaf 100) (Leaf 101)) (Leaf 10)),"")
|
|
--
|
|
natTree :: Parser Tree
|
|
natTree = (uncurry Node <$> parenth natTree natTree) <|> (Leaf <$> nat)
|
|
|