1
0
mirror of https://github.com/DanilaFe/abacus synced 2024-12-24 00:10:09 -08:00

Implement a LexerTokenizer and a ShuntingYard parser.

These are basically two pieces of the old TreeBuilder, but decoupled
and reimplemented conventionally.
This commit is contained in:
Danila Fedorin 2017-07-29 21:37:32 -07:00
parent dcbda5b255
commit e61e2b07b2
2 changed files with 240 additions and 0 deletions

View File

@ -0,0 +1,67 @@
package org.nwapw.abacus.parsing;
import org.nwapw.abacus.lexing.Lexer;
import org.nwapw.abacus.lexing.pattern.Match;
import org.nwapw.abacus.lexing.pattern.Pattern;
import org.nwapw.abacus.plugin.PluginListener;
import org.nwapw.abacus.plugin.PluginManager;
import org.nwapw.abacus.tree.TokenType;
import java.util.Comparator;
import java.util.List;
/**
* A tokenzier that uses the lexer class and registered function and operator
* names to turn input into tokens in O(n) time.
*/
public class LexerTokenizer implements Tokenizer<Match<TokenType>>, PluginListener {
/**
* Comparator used to sort the tokens produced by the lexer.
*/
protected static final Comparator<TokenType> TOKEN_SORTER = Comparator.comparingInt(e -> e.priority);
/**
* The lexer instance used to turn strings into matches.
*/
private Lexer<TokenType> lexer;
/**
* Creates a new lexer tokenizer.
*/
public LexerTokenizer(){
lexer = new Lexer<TokenType>() {{
register(" ", TokenType.WHITESPACE);
register(",", TokenType.COMMA);
register("[0-9]*(\\.[0-9]+)?", TokenType.NUM);
register("\\(", TokenType.OPEN_PARENTH);
register("\\)", TokenType.CLOSE_PARENTH);
}};
}
@Override
public List<Match<TokenType>> tokenizeString(String string) {
return lexer.lexAll(string, 0, TOKEN_SORTER);
}
@Override
public void onLoad(PluginManager manager) {
for(String operator : manager.getAllOperators()){
lexer.register(Pattern.sanitize(operator), TokenType.OP);
}
for(String function : manager.getAllFunctions()){
lexer.register(Pattern.sanitize(function), TokenType.FUNCTION);
}
}
@Override
public void onUnload(PluginManager manager) {
for(String operator : manager.getAllOperators()){
lexer.unregister(Pattern.sanitize(operator), TokenType.OP);
}
for(String function : manager.getAllFunctions()){
lexer.unregister(Pattern.sanitize(function), TokenType.FUNCTION);
}
}
}

View File

@ -0,0 +1,173 @@
package org.nwapw.abacus.parsing;
import org.nwapw.abacus.Abacus;
import org.nwapw.abacus.function.Operator;
import org.nwapw.abacus.function.OperatorAssociativity;
import org.nwapw.abacus.function.OperatorType;
import org.nwapw.abacus.lexing.pattern.Match;
import org.nwapw.abacus.plugin.PluginListener;
import org.nwapw.abacus.plugin.PluginManager;
import org.nwapw.abacus.tree.*;
import java.util.*;
/**
* A parser that uses shunting yard to rearranged matches into postfix
* and then convert them into a parse tree.
*/
public class ShuntingYardParser implements Parser<Match<TokenType>>, PluginListener {
/**
* The Abacus instance used to create number instances.
*/
private Abacus abacus;
/**
* Map of operator precedences, loaded from the plugin operators.
*/
private Map<String, Integer> precedenceMap;
/**
* Map of operator associativity, loaded from the plugin operators.
*/
private Map<String, OperatorAssociativity> associativityMap;
/**
* Map of operator types, loaded from plugin operators.
*/
private Map<String, OperatorType> typeMap;
/**
* Creates a new Shunting Yard parser with the given Abacus instance.
* @param abacus the abacus instance.
*/
public ShuntingYardParser(Abacus abacus){
this.abacus = abacus;
precedenceMap = new HashMap<>();
associativityMap = new HashMap<>();
typeMap = new HashMap<>();
}
/**
* Rearranges tokens into a postfix list, using Shunting Yard.
* @param from the tokens to be rearranged.
* @return the resulting list of rearranged tokens.
*/
public List<Match<TokenType>> intoPostfix(List<Match<TokenType>> from){
ArrayList<Match<TokenType>> output = new ArrayList<>();
Stack<Match<TokenType>> tokenStack = new Stack<>();
while(!from.isEmpty()){
Match<TokenType> match = from.remove(0);
TokenType matchType = match.getType();
if(matchType == TokenType.NUM) {
output.add(match);
} else if(matchType == TokenType.FUNCTION) {
output.add(new Match<>("" , TokenType.INTERNAL_FUNCTION_END));
tokenStack.push(match);
} else if(matchType == TokenType.OP){
String tokenString = match.getContent();
OperatorType type = typeMap.get(tokenString);
int precedence = precedenceMap.get(tokenString);
OperatorAssociativity associativity = associativityMap.get(tokenString);
if(type == OperatorType.UNARY_POSTFIX){
output.add(match);
continue;
}
while(!tokenStack.empty()) {
Match<TokenType> otherMatch = tokenStack.peek();
TokenType otherMatchType = otherMatch.getType();
if(!(otherMatchType == TokenType.OP || otherMatchType == TokenType.FUNCTION)) break;
if(otherMatchType == TokenType.OP){
int otherPrecedence = precedenceMap.get(match.getContent());
if(otherPrecedence < precedence ||
(associativity == OperatorAssociativity.RIGHT && otherPrecedence == precedence)) {
break;
}
}
output.add(tokenStack.pop());
}
tokenStack.push(match);
} else if(matchType == TokenType.OPEN_PARENTH){
tokenStack.push(match);
} else if(matchType == TokenType.CLOSE_PARENTH || matchType == TokenType.COMMA){
while(!tokenStack.empty() && tokenStack.peek().getType() != TokenType.OPEN_PARENTH){
output.add(tokenStack.pop());
}
if(tokenStack.empty()) return null;
if(matchType == TokenType.CLOSE_PARENTH){
tokenStack.pop();
}
}
}
while(!tokenStack.empty()){
Match<TokenType> match = tokenStack.peek();
TokenType matchType = match.getType();
if(!(matchType == TokenType.OP || matchType == TokenType.FUNCTION)) return null;
output.add(tokenStack.pop());
}
return output;
}
/**
* Constructs a tree recursively from a list of tokens.
* @param matches the list of tokens from the source string.
* @return the construct tree expression.
*/
public TreeNode constructRecursive(List<Match<TokenType>> matches){
if(matches.size() == 0) return null;
Match<TokenType> match = matches.remove(0);
TokenType matchType = match.getType();
if(matchType == TokenType.OP){
String operator = match.getContent();
OperatorType type = typeMap.get(operator);
if(type == OperatorType.BINARY_INFIX){
TreeNode right = constructRecursive(matches);
TreeNode left = constructRecursive(matches);
if(left == null || right == null) return null;
else return new BinaryInfixNode(operator, left, right);
} else {
TreeNode applyTo = constructRecursive(matches);
if(applyTo == null) return null;
else return new UnaryPrefixNode(operator, applyTo);
}
} else if(matchType == TokenType.NUM){
return new NumberNode(abacus.numberFromString(match.getContent()));
} else if(matchType == TokenType.FUNCTION){
String functionName = match.getContent();
FunctionNode node = new FunctionNode(functionName);
while(!matches.isEmpty() && matches.get(0).getType() != TokenType.INTERNAL_FUNCTION_END){
TreeNode argument = constructRecursive(matches);
if(argument == null) return null;
node.prependChild(argument);
}
if(matches.isEmpty()) return null;
matches.remove(0);
return node;
}
return null;
}
@Override
public TreeNode constructTree(List<Match<TokenType>> tokens) {
tokens = intoPostfix(new ArrayList<>(tokens));
Collections.reverse(tokens);
return constructRecursive(tokens);
}
@Override
public void onLoad(PluginManager manager) {
for(String operator : manager.getAllOperators()){
Operator operatorInstance = manager.operatorFor(operator);
precedenceMap.put(operator, operatorInstance.getPrecedence());
associativityMap.put(operator, operatorInstance.getAssociativity());
typeMap.put(operator, operatorInstance.getType());
}
}
@Override
public void onUnload(PluginManager manager) {
precedenceMap.clear();
associativityMap.clear();
typeMap.clear();
}
}