diff --git a/src/org/nwapw/abacus/lexing/Lexer.java b/src/org/nwapw/abacus/lexing/Lexer.java new file mode 100644 index 0000000..94457a1 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/Lexer.java @@ -0,0 +1,68 @@ +package org.nwapw.abacus.lexing; + +import org.nwapw.abacus.lexing.pattern.EndNode; +import org.nwapw.abacus.lexing.pattern.Match; +import org.nwapw.abacus.lexing.pattern.Pattern; +import org.nwapw.abacus.lexing.pattern.PatternNode; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; + +public class Lexer { + + private ArrayList> patterns; + + public Lexer(){ + patterns = new ArrayList<>(); + } + + public void register(String pattern, T id){ + Pattern compiledPattern = new Pattern<>(pattern, id); + if(compiledPattern.getHead() != null) patterns.add(compiledPattern); + } + + public Match lexOne(String from, int startAt, Comparator compare){ + ArrayList> matches = new ArrayList<>(); + HashSet> currentSet = new HashSet<>(); + HashSet> futureSet = new HashSet<>(); + int index = startAt; + for(Pattern pattern : patterns){ + pattern.getHead().addInto(currentSet); + } + while(!currentSet.isEmpty()){ + for(PatternNode node : currentSet){ + if(index < from.length() && node.matches(from.charAt(index))) { + node.addOutputsInto(futureSet); + } else if(node instanceof EndNode){ + matches.add(new Match<>(startAt, index, ((EndNode) node).getPatternId())); + } + } + + HashSet> tmp = currentSet; + currentSet = futureSet; + futureSet = tmp; + futureSet.clear(); + + index++; + } + matches.sort((a, b) -> compare.compare(a.getType(), b.getType())); + if(compare != null) { + matches.sort(Comparator.comparingInt(a -> a.getTo() - a.getFrom())); + } + return matches.isEmpty() ? null : matches.get(matches.size() - 1); + } + + public ArrayList> lexAll(String from, int startAt, Comparator compare){ + int index = startAt; + ArrayList> matches = new ArrayList<>(); + Match lastMatch = null; + while((lastMatch = lexOne(from, index, compare)) != null && index < from.length()){ + if(lastMatch.getTo() == lastMatch.getFrom()) return null; + matches.add(lastMatch); + index += lastMatch.getTo() - lastMatch.getFrom(); + } + return matches; + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/AnyNode.java b/src/org/nwapw/abacus/lexing/pattern/AnyNode.java new file mode 100644 index 0000000..1cfa635 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/AnyNode.java @@ -0,0 +1,10 @@ +package org.nwapw.abacus.lexing.pattern; + +public class AnyNode extends PatternNode { + + @Override + public boolean matches(char other) { + return true; + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/EndNode.java b/src/org/nwapw/abacus/lexing/pattern/EndNode.java new file mode 100644 index 0000000..d8e621d --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/EndNode.java @@ -0,0 +1,15 @@ +package org.nwapw.abacus.lexing.pattern; + +public class EndNode extends PatternNode { + + private T patternId; + + public EndNode(T patternId){ + this.patternId = patternId; + } + + public T getPatternId(){ + return patternId; + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/LinkNode.java b/src/org/nwapw/abacus/lexing/pattern/LinkNode.java new file mode 100644 index 0000000..460c418 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/LinkNode.java @@ -0,0 +1,13 @@ +package org.nwapw.abacus.lexing.pattern; + +import java.util.ArrayList; +import java.util.Collection; + +public class LinkNode extends PatternNode { + + @Override + public void addInto(Collection> into) { + addOutputsInto(into); + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/Match.java b/src/org/nwapw/abacus/lexing/pattern/Match.java new file mode 100644 index 0000000..06e0b27 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/Match.java @@ -0,0 +1,26 @@ +package org.nwapw.abacus.lexing.pattern; + +public class Match { + + private int from; + private int to; + private T type; + + public Match(int from, int to, T type){ + this.from = from; + this.to = to; + this.type = type; + } + + public int getFrom() { + return from; + } + + public int getTo() { + return to; + } + + public T getType() { + return type; + } +} diff --git a/src/org/nwapw/abacus/lexing/pattern/Pattern.java b/src/org/nwapw/abacus/lexing/pattern/Pattern.java new file mode 100644 index 0000000..4d40d77 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/Pattern.java @@ -0,0 +1,172 @@ +package org.nwapw.abacus.lexing.pattern; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Stack; +import java.util.function.Function; + +public class Pattern { + + private T id; + private PatternNode head; + private String source; + private int index; + + private HashMap, PatternChain>> operations = + new HashMap, PatternChain>>() {{ + put('+', Pattern.this::transformPlus); + put('*', Pattern.this::transformStar); + put('?', Pattern.this::transformQuestion); + }}; + + private PatternChain transformPlus(PatternChain chain){ + chain.tail.outputStates.add(chain.head); + return chain; + } + + private PatternChain transformStar(PatternChain chain){ + LinkNode newTail = new LinkNode<>(); + LinkNode newHead = new LinkNode<>(); + newHead.outputStates.add(chain.head); + newHead.outputStates.add(newTail); + chain.tail.outputStates.add(newTail); + newTail.outputStates.add(newHead); + chain.head = newHead; + chain.tail = newTail; + return chain; + } + + private PatternChain transformQuestion(PatternChain chain){ + LinkNode newTail = new LinkNode<>(); + LinkNode newHead = new LinkNode<>(); + newHead.outputStates.add(chain.head); + newHead.outputStates.add(newTail); + chain.tail.outputStates.add(newTail); + chain.head = newHead; + chain.tail = newTail; + return chain; + } + + private PatternChain combineChains(Collection> collection){ + LinkNode head = new LinkNode<>(); + LinkNode tail = new LinkNode<>(); + PatternChain newChain = new PatternChain<>(head, tail); + for(PatternChain chain : collection){ + head.outputStates.add(chain.head); + chain.tail.outputStates.add(tail); + } + return newChain; + } + + private PatternChain parseValue(){ + if(index >= source.length()) return null; + if(source.charAt(index) == '\\'){ + if(++index >= source.length()) return null; + } + return new PatternChain<>(new ValueNode<>(source.charAt(index++))); + } + + private PatternChain parseOr(){ + Stack> orStack = new Stack<>(); + index++; + while(index < source.length() && source.charAt(index) != ']'){ + if(source.charAt(index) == '-'){ + index++; + if(orStack.empty() || orStack.peek().tail.range() == '\0') return null; + PatternChain bottomRange = orStack.pop(); + PatternChain topRange = parseValue(); + if(topRange == null || topRange.tail.range() == '\0') return null; + + orStack.push(new PatternChain<>(new RangeNode<>(bottomRange.tail.range(), topRange.tail.range()))); + } else { + PatternChain newChain = parseValue(); + if(newChain == null) return null; + orStack.push(newChain); + } + } + if(index++ >= source.length()) return null; + return (orStack.size() == 1) ? orStack.pop() : combineChains(orStack); + } + + private PatternChain parseSegment(boolean isSubsegment){ + if(index >= source.length() || ((source.charAt(index) != '(') && isSubsegment)) return null; + if(isSubsegment) index++; + + Stack> orChain = new Stack<>(); + PatternChain fullChain = new PatternChain<>(); + PatternChain currentChain = null; + while (index < source.length() && source.charAt(index) != ')'){ + char currentChar = source.charAt(index); + if(operations.containsKey(currentChar)){ + if(currentChain == null) return null; + + currentChain = operations.get(currentChar).apply(currentChain); + fullChain.append(currentChain); + currentChain = null; + index++; + } else if(currentChar == '|'){ + if(currentChain == null) return null; + + fullChain.append(currentChain); + orChain.push(fullChain); + currentChain = null; + fullChain = new PatternChain<>(); + if(++index >= source.length()) return null; + } else if(currentChar == '('){ + if(currentChain != null) { + fullChain.append(currentChain); + } + + currentChain = parseSegment(true); + if(currentChain == null) return null; + } else if(currentChar == '['){ + if(currentChain != null){ + fullChain.append(currentChain); + } + currentChain = parseOr(); + if(currentChain == null) return null; + } else if(currentChar == '.'){ + if(currentChain != null){ + fullChain.append(currentChain); + } + currentChain = new PatternChain<>(new AnyNode<>()); + index++; + } else { + if(currentChain != null){ + fullChain.append(currentChain); + } + currentChain = parseValue(); + if(currentChain == null) return null; + } + } + + if(!(!isSubsegment || (index < source.length() && source.charAt(index) == ')'))) return null; + if(isSubsegment) index++; + + if(currentChain != null) fullChain.append(currentChain); + if(!orChain.empty()){ + orChain.push(fullChain); + fullChain = combineChains(orChain); + } + + return fullChain; + } + + public Pattern(String from, T id){ + this.id = id; + index = 0; + source = from; + + PatternChain chain = parseSegment(false); + if(chain == null) { + head = null; + } else { + chain.append(new EndNode<>(id)); + head = chain.head; + } + } + + public PatternNode getHead() { + return head; + } +} diff --git a/src/org/nwapw/abacus/lexing/pattern/PatternChain.java b/src/org/nwapw/abacus/lexing/pattern/PatternChain.java new file mode 100644 index 0000000..aad9be5 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/PatternChain.java @@ -0,0 +1,40 @@ +package org.nwapw.abacus.lexing.pattern; + +public class PatternChain { + + public PatternNode head; + public PatternNode tail; + + public PatternChain(PatternNode head, PatternNode tail){ + this.head = head; + this.tail = tail; + } + + public PatternChain(PatternNode node){ + this(node, node); + } + + public PatternChain(){ + this(null); + } + + public void append(PatternChain other){ + if(other.head == null || tail == null) { + this.head = other.head; + this.tail = other.tail; + } else { + tail.outputStates.add(other.head); + tail = other.tail; + } + } + + public void append(PatternNode node){ + if(tail == null){ + head = tail = node; + } else { + tail.outputStates.add(node); + tail = node; + } + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/PatternNode.java b/src/org/nwapw/abacus/lexing/pattern/PatternNode.java new file mode 100644 index 0000000..4c0908a --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/PatternNode.java @@ -0,0 +1,31 @@ +package org.nwapw.abacus.lexing.pattern; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; + +public class PatternNode { + + protected HashSet> outputStates; + + public PatternNode(){ + outputStates = new HashSet<>(); + } + + public boolean matches(char other){ + return false; + } + + public char range(){ + return '\0'; + } + + public void addInto(Collection> into){ + into.add(this); + } + + public void addOutputsInto(Collection> into){ + outputStates.forEach(e -> e.addInto(into)); + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/RangeNode.java b/src/org/nwapw/abacus/lexing/pattern/RangeNode.java new file mode 100644 index 0000000..4ff9e5b --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/RangeNode.java @@ -0,0 +1,18 @@ +package org.nwapw.abacus.lexing.pattern; + +public class RangeNode extends PatternNode { + + private char from; + private char to; + + public RangeNode(char from, char to){ + this.from = from; + this.to = to; + } + + @Override + public boolean matches(char other) { + return other >= from && other <= to; + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/ValueNode.java b/src/org/nwapw/abacus/lexing/pattern/ValueNode.java new file mode 100644 index 0000000..855f805 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/ValueNode.java @@ -0,0 +1,20 @@ +package org.nwapw.abacus.lexing.pattern; + +public class ValueNode extends PatternNode { + + private char value; + + public ValueNode(char value){ + this.value = value; + } + + @Override + public boolean matches(char other) { + return other == value; + } + + @Override + public char range() { + return value; + } +} diff --git a/src/org/nwapw/abacus/tree/NumberNode.java b/src/org/nwapw/abacus/tree/NumberNode.java new file mode 100644 index 0000000..1ad6da5 --- /dev/null +++ b/src/org/nwapw/abacus/tree/NumberNode.java @@ -0,0 +1,25 @@ +package org.nwapw.abacus.tree; + +import org.nwapw.abacus.number.NaiveNumber; +import org.nwapw.abacus.number.NumberInterface; + +public class NumberNode extends TreeNode { + + private NumberInterface number; + + public NumberNode(){ + number = null; + } + + public NumberNode(double value){ + number = new NaiveNumber(value); + } + + public NumberNode(String value){ + this(Double.parseDouble(value)); + } + + public NumberInterface getNumber() { + return number; + } +} diff --git a/src/org/nwapw/abacus/tree/OpNode.java b/src/org/nwapw/abacus/tree/OpNode.java new file mode 100644 index 0000000..0fcdaed --- /dev/null +++ b/src/org/nwapw/abacus/tree/OpNode.java @@ -0,0 +1,38 @@ +package org.nwapw.abacus.tree; + +public class OpNode extends TreeNode { + + private String operation; + private TreeNode left; + private TreeNode right; + + public OpNode(String operation){ + this(operation, null, null); + } + + public OpNode(String operation, TreeNode left, TreeNode right){ + this.operation = operation; + this.left = left; + this.right = right; + } + + public String getOperation() { + return operation; + } + + public TreeNode getLeft() { + return left; + } + + public void setLeft(TreeNode left) { + this.left = left; + } + + public TreeNode getRight() { + return right; + } + + public void setRight(TreeNode right) { + this.right = right; + } +} diff --git a/src/org/nwapw/abacus/tree/OperatorAssociativity.java b/src/org/nwapw/abacus/tree/OperatorAssociativity.java new file mode 100644 index 0000000..78b633b --- /dev/null +++ b/src/org/nwapw/abacus/tree/OperatorAssociativity.java @@ -0,0 +1,5 @@ +package org.nwapw.abacus.tree; + +public enum OperatorAssociativity { + LEFT, RIGHT +} diff --git a/src/org/nwapw/abacus/tree/TokenType.java b/src/org/nwapw/abacus/tree/TokenType.java new file mode 100644 index 0000000..e0d7d97 --- /dev/null +++ b/src/org/nwapw/abacus/tree/TokenType.java @@ -0,0 +1,13 @@ +package org.nwapw.abacus.tree; + +public enum TokenType { + + ANY(0), OP(1), NUM(2), WORD(3), OPEN_PARENTH(4), CLOSE_PARENTH(5); + + public final int priority; + + TokenType(int priority){ + this.priority = priority; + } + +} diff --git a/src/org/nwapw/abacus/tree/TreeNode.java b/src/org/nwapw/abacus/tree/TreeNode.java new file mode 100644 index 0000000..6581826 --- /dev/null +++ b/src/org/nwapw/abacus/tree/TreeNode.java @@ -0,0 +1,103 @@ +package org.nwapw.abacus.tree; + +import org.nwapw.abacus.lexing.Lexer; +import org.nwapw.abacus.lexing.pattern.Match; + +import java.util.*; + +public abstract class TreeNode { + + private static Lexer lexer = new Lexer(){{ + register(".", TokenType.ANY); + register("\\+|-|\\*|/|^", TokenType.OP); + register("[0-9]+(\\.[0-9]+)?", TokenType.NUM); + register("[a-zA-Z]+", TokenType.WORD); + register("\\(", TokenType.OPEN_PARENTH); + register("\\)", TokenType.CLOSE_PARENTH); + }}; + private static HashMap precedenceMap = new HashMap(){{ + put("+", 0); + put("-", 0); + put("*", 1); + put("/", 1); + put("^", 2); + }}; + private static HashMap associativityMap = + new HashMap() {{ + put("+", OperatorAssociativity.LEFT); + put("-", OperatorAssociativity.LEFT); + put("*", OperatorAssociativity.LEFT); + put("/", OperatorAssociativity.LEFT); + put("^", OperatorAssociativity.RIGHT); + }}; + + private static Comparator tokenSorter = Comparator.comparingInt(e -> e.priority); + + public static ArrayList> tokenize(String string){ + return lexer.lexAll(string, 0, tokenSorter); + } + + public static ArrayList> intoPostfix(String source, ArrayList> from){ + ArrayList> output = new ArrayList<>(); + Stack> tokenStack = new Stack<>(); + while(!from.isEmpty()){ + Match match = from.remove(0); + if(match.getType() == TokenType.NUM) { + output.add(match); + } else if(match.getType() == TokenType.OP){ + String tokenString = source.substring(match.getFrom(), match.getTo()); + int precedence = precedenceMap.get(tokenString); + OperatorAssociativity associativity = associativityMap.get(tokenString); + + while(!tokenStack.empty()) { + Match otherMatch = tokenStack.peek(); + if(otherMatch.getType() != TokenType.OP) break; + + int otherPrecdence = precedenceMap.get(source.substring(otherMatch.getFrom(), otherMatch.getTo())); + if(otherPrecdence < precedence || + (associativity == OperatorAssociativity.RIGHT && otherPrecdence == precedence)) { + break; + } + output.add(tokenStack.pop()); + } + tokenStack.push(match); + } else if(match.getType() == TokenType.OPEN_PARENTH){ + tokenStack.push(match); + } else if(match.getType() == TokenType.CLOSE_PARENTH){ + while(!tokenStack.empty() && tokenStack.peek().getType() != TokenType.OPEN_PARENTH){ + output.add(tokenStack.pop()); + } + if(tokenStack.empty()) return null; + tokenStack.pop(); + } + } + while(!tokenStack.empty()){ + if(!(tokenStack.peek().getType() == TokenType.OP)) return null; + output.add(tokenStack.pop()); + } + return output; + } + + public static TreeNode fromStringRecursive(String source, ArrayList> matches){ + if(matches.size() == 0) return null; + Match match = matches.remove(0); + if(match.getType() == TokenType.OP){ + TreeNode right = fromStringRecursive(source, matches); + TreeNode left = fromStringRecursive(source, matches); + if(left == null || right == null) return null; + else return new OpNode(source.substring(match.getFrom(), match.getTo()), left, right); + } else if(match.getType() == TokenType.NUM){ + return new NumberNode(Double.parseDouble(source.substring(match.getFrom(), match.getTo()))); + } + return null; + } + + public static TreeNode fromString(String string){ + ArrayList> matches = intoPostfix(string, tokenize(string)); + if(matches == null) return null; + + Collections.reverse(matches); + return fromStringRecursive(string, matches); + } + +}