From bfee4ec322d4351ef33df64ea2bad28d2a4b8997 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Sat, 29 Jul 2017 21:37:32 -0700 Subject: [PATCH] Implement a LexerTokenizer and a ShuntingYard parser. These are basically two pieces of the old TreeBuilder, but decoupled and reimplemented conventionally. --- .../nwapw/abacus/parsing/LexerTokenizer.java | 67 +++++++ .../abacus/parsing/ShuntingYardParser.java | 173 ++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 src/org/nwapw/abacus/parsing/LexerTokenizer.java create mode 100644 src/org/nwapw/abacus/parsing/ShuntingYardParser.java diff --git a/src/org/nwapw/abacus/parsing/LexerTokenizer.java b/src/org/nwapw/abacus/parsing/LexerTokenizer.java new file mode 100644 index 0000000..104541e --- /dev/null +++ b/src/org/nwapw/abacus/parsing/LexerTokenizer.java @@ -0,0 +1,67 @@ +package org.nwapw.abacus.parsing; + +import org.nwapw.abacus.lexing.Lexer; +import org.nwapw.abacus.lexing.pattern.Match; +import org.nwapw.abacus.lexing.pattern.Pattern; +import org.nwapw.abacus.plugin.PluginListener; +import org.nwapw.abacus.plugin.PluginManager; +import org.nwapw.abacus.tree.TokenType; + +import java.util.Comparator; +import java.util.List; + +/** + * A tokenzier that uses the lexer class and registered function and operator + * names to turn input into tokens in O(n) time. + */ +public class LexerTokenizer implements Tokenizer>, PluginListener { + + /** + * Comparator used to sort the tokens produced by the lexer. + */ + protected static final Comparator TOKEN_SORTER = Comparator.comparingInt(e -> e.priority); + + /** + * The lexer instance used to turn strings into matches. + */ + private Lexer lexer; + + /** + * Creates a new lexer tokenizer. + */ + public LexerTokenizer(){ + lexer = new Lexer() {{ + register(" ", TokenType.WHITESPACE); + register(",", TokenType.COMMA); + register("[0-9]*(\\.[0-9]+)?", TokenType.NUM); + register("\\(", TokenType.OPEN_PARENTH); + register("\\)", TokenType.CLOSE_PARENTH); + }}; + } + + @Override + public List> tokenizeString(String string) { + return lexer.lexAll(string, 0, TOKEN_SORTER); + } + + @Override + public void onLoad(PluginManager manager) { + for(String operator : manager.getAllOperators()){ + lexer.register(Pattern.sanitize(operator), TokenType.OP); + } + for(String function : manager.getAllFunctions()){ + lexer.register(Pattern.sanitize(function), TokenType.FUNCTION); + } + } + + @Override + public void onUnload(PluginManager manager) { + for(String operator : manager.getAllOperators()){ + lexer.unregister(Pattern.sanitize(operator), TokenType.OP); + } + for(String function : manager.getAllFunctions()){ + lexer.unregister(Pattern.sanitize(function), TokenType.FUNCTION); + } + } + +} diff --git a/src/org/nwapw/abacus/parsing/ShuntingYardParser.java b/src/org/nwapw/abacus/parsing/ShuntingYardParser.java new file mode 100644 index 0000000..9934006 --- /dev/null +++ b/src/org/nwapw/abacus/parsing/ShuntingYardParser.java @@ -0,0 +1,173 @@ +package org.nwapw.abacus.parsing; + +import org.nwapw.abacus.Abacus; +import org.nwapw.abacus.function.Operator; +import org.nwapw.abacus.function.OperatorAssociativity; +import org.nwapw.abacus.function.OperatorType; +import org.nwapw.abacus.lexing.pattern.Match; +import org.nwapw.abacus.plugin.PluginListener; +import org.nwapw.abacus.plugin.PluginManager; +import org.nwapw.abacus.tree.*; + +import java.util.*; + +/** + * A parser that uses shunting yard to rearranged matches into postfix + * and then convert them into a parse tree. + */ +public class ShuntingYardParser implements Parser>, PluginListener { + + /** + * The Abacus instance used to create number instances. + */ + private Abacus abacus; + /** + * Map of operator precedences, loaded from the plugin operators. + */ + private Map precedenceMap; + /** + * Map of operator associativity, loaded from the plugin operators. + */ + private Map associativityMap; + /** + * Map of operator types, loaded from plugin operators. + */ + private Map typeMap; + + /** + * Creates a new Shunting Yard parser with the given Abacus instance. + * @param abacus the abacus instance. + */ + public ShuntingYardParser(Abacus abacus){ + this.abacus = abacus; + precedenceMap = new HashMap<>(); + associativityMap = new HashMap<>(); + typeMap = new HashMap<>(); + } + + /** + * Rearranges tokens into a postfix list, using Shunting Yard. + * @param from the tokens to be rearranged. + * @return the resulting list of rearranged tokens. + */ + public List> intoPostfix(List> from){ + ArrayList> output = new ArrayList<>(); + Stack> tokenStack = new Stack<>(); + while(!from.isEmpty()){ + Match match = from.remove(0); + TokenType matchType = match.getType(); + if(matchType == TokenType.NUM) { + output.add(match); + } else if(matchType == TokenType.FUNCTION) { + output.add(new Match<>("" , TokenType.INTERNAL_FUNCTION_END)); + tokenStack.push(match); + } else if(matchType == TokenType.OP){ + String tokenString = match.getContent(); + OperatorType type = typeMap.get(tokenString); + int precedence = precedenceMap.get(tokenString); + OperatorAssociativity associativity = associativityMap.get(tokenString); + + if(type == OperatorType.UNARY_POSTFIX){ + output.add(match); + continue; + } + + while(!tokenStack.empty()) { + Match otherMatch = tokenStack.peek(); + TokenType otherMatchType = otherMatch.getType(); + if(!(otherMatchType == TokenType.OP || otherMatchType == TokenType.FUNCTION)) break; + + if(otherMatchType == TokenType.OP){ + int otherPrecedence = precedenceMap.get(match.getContent()); + if(otherPrecedence < precedence || + (associativity == OperatorAssociativity.RIGHT && otherPrecedence == precedence)) { + break; + } + } + output.add(tokenStack.pop()); + } + tokenStack.push(match); + } else if(matchType == TokenType.OPEN_PARENTH){ + tokenStack.push(match); + } else if(matchType == TokenType.CLOSE_PARENTH || matchType == TokenType.COMMA){ + while(!tokenStack.empty() && tokenStack.peek().getType() != TokenType.OPEN_PARENTH){ + output.add(tokenStack.pop()); + } + if(tokenStack.empty()) return null; + if(matchType == TokenType.CLOSE_PARENTH){ + tokenStack.pop(); + } + } + } + while(!tokenStack.empty()){ + Match match = tokenStack.peek(); + TokenType matchType = match.getType(); + if(!(matchType == TokenType.OP || matchType == TokenType.FUNCTION)) return null; + output.add(tokenStack.pop()); + } + return output; + } + + /** + * Constructs a tree recursively from a list of tokens. + * @param matches the list of tokens from the source string. + * @return the construct tree expression. + */ + public TreeNode constructRecursive(List> matches){ + if(matches.size() == 0) return null; + Match match = matches.remove(0); + TokenType matchType = match.getType(); + if(matchType == TokenType.OP){ + String operator = match.getContent(); + OperatorType type = typeMap.get(operator); + if(type == OperatorType.BINARY_INFIX){ + TreeNode right = constructRecursive(matches); + TreeNode left = constructRecursive(matches); + if(left == null || right == null) return null; + else return new BinaryInfixNode(operator, left, right); + } else { + TreeNode applyTo = constructRecursive(matches); + if(applyTo == null) return null; + else return new UnaryPrefixNode(operator, applyTo); + } + } else if(matchType == TokenType.NUM){ + return new NumberNode(abacus.numberFromString(match.getContent())); + } else if(matchType == TokenType.FUNCTION){ + String functionName = match.getContent(); + FunctionNode node = new FunctionNode(functionName); + while(!matches.isEmpty() && matches.get(0).getType() != TokenType.INTERNAL_FUNCTION_END){ + TreeNode argument = constructRecursive(matches); + if(argument == null) return null; + node.prependChild(argument); + } + if(matches.isEmpty()) return null; + matches.remove(0); + return node; + } + return null; + } + + @Override + public TreeNode constructTree(List> tokens) { + tokens = intoPostfix(new ArrayList<>(tokens)); + Collections.reverse(tokens); + return constructRecursive(tokens); + } + + @Override + public void onLoad(PluginManager manager) { + for(String operator : manager.getAllOperators()){ + Operator operatorInstance = manager.operatorFor(operator); + precedenceMap.put(operator, operatorInstance.getPrecedence()); + associativityMap.put(operator, operatorInstance.getAssociativity()); + typeMap.put(operator, operatorInstance.getType()); + } + } + + @Override + public void onUnload(PluginManager manager) { + precedenceMap.clear(); + associativityMap.clear(); + typeMap.clear(); + } +}