From 2374c167a44f35ceae95e038c00762ee018d1af4 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 24 Jul 2017 17:42:25 -0700 Subject: [PATCH 01/11] Implement a few nodes for the pattern matching. --- .../nwapw/abacus/lexing/pattern/AnyNode.java | 10 ++++++++ .../nwapw/abacus/lexing/pattern/EndNode.java | 15 +++++++++++ .../nwapw/abacus/lexing/pattern/LinkNode.java | 14 +++++++++++ .../abacus/lexing/pattern/PatternNode.java | 25 +++++++++++++++++++ .../abacus/lexing/pattern/RangeNode.java | 18 +++++++++++++ .../abacus/lexing/pattern/ValueNode.java | 20 +++++++++++++++ 6 files changed, 102 insertions(+) create mode 100644 src/org/nwapw/abacus/lexing/pattern/AnyNode.java create mode 100644 src/org/nwapw/abacus/lexing/pattern/EndNode.java create mode 100644 src/org/nwapw/abacus/lexing/pattern/LinkNode.java create mode 100644 src/org/nwapw/abacus/lexing/pattern/PatternNode.java create mode 100644 src/org/nwapw/abacus/lexing/pattern/RangeNode.java create mode 100644 src/org/nwapw/abacus/lexing/pattern/ValueNode.java diff --git a/src/org/nwapw/abacus/lexing/pattern/AnyNode.java b/src/org/nwapw/abacus/lexing/pattern/AnyNode.java new file mode 100644 index 0000000..1cfa635 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/AnyNode.java @@ -0,0 +1,10 @@ +package org.nwapw.abacus.lexing.pattern; + +public class AnyNode extends PatternNode { + + @Override + public boolean matches(char other) { + return true; + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/EndNode.java b/src/org/nwapw/abacus/lexing/pattern/EndNode.java new file mode 100644 index 0000000..d8e621d --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/EndNode.java @@ -0,0 +1,15 @@ +package org.nwapw.abacus.lexing.pattern; + +public class EndNode extends PatternNode { + + private T patternId; + + public EndNode(T patternId){ + this.patternId = patternId; + } + + public T getPatternId(){ + return patternId; + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/LinkNode.java b/src/org/nwapw/abacus/lexing/pattern/LinkNode.java new file mode 100644 index 0000000..69b16ed --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/LinkNode.java @@ -0,0 +1,14 @@ +package org.nwapw.abacus.lexing.pattern; + +import java.util.ArrayList; + +public class LinkNode extends PatternNode { + + @Override + public void addInto(ArrayList> into) { + for(PatternNode node : outputStates){ + node.addInto(into); + } + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/PatternNode.java b/src/org/nwapw/abacus/lexing/pattern/PatternNode.java new file mode 100644 index 0000000..adf7bf5 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/PatternNode.java @@ -0,0 +1,25 @@ +package org.nwapw.abacus.lexing.pattern; + +import java.util.ArrayList; + +public class PatternNode { + + protected ArrayList> outputStates; + + public PatternNode(){ + outputStates = new ArrayList<>(); + } + + public boolean matches(char other){ + return false; + } + + public char range(){ + return '\0'; + } + + public void addInto(ArrayList> into){ + into.add(this); + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/RangeNode.java b/src/org/nwapw/abacus/lexing/pattern/RangeNode.java new file mode 100644 index 0000000..4ff9e5b --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/RangeNode.java @@ -0,0 +1,18 @@ +package org.nwapw.abacus.lexing.pattern; + +public class RangeNode extends PatternNode { + + private char from; + private char to; + + public RangeNode(char from, char to){ + this.from = from; + this.to = to; + } + + @Override + public boolean matches(char other) { + return other >= from && other <= to; + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/ValueNode.java b/src/org/nwapw/abacus/lexing/pattern/ValueNode.java new file mode 100644 index 0000000..855f805 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/ValueNode.java @@ -0,0 +1,20 @@ +package org.nwapw.abacus.lexing.pattern; + +public class ValueNode extends PatternNode { + + private char value; + + public ValueNode(char value){ + this.value = value; + } + + @Override + public boolean matches(char other) { + return other == value; + } + + @Override + public char range() { + return value; + } +} From 62003810169a52a266fbcebd8b6e25f8ce9f9056 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 24 Jul 2017 17:42:52 -0700 Subject: [PATCH 02/11] Implement a tentative pattern class that can be compiled from a string. --- .../nwapw/abacus/lexing/pattern/Pattern.java | 162 ++++++++++++++++++ .../abacus/lexing/pattern/PatternChain.java | 40 +++++ 2 files changed, 202 insertions(+) create mode 100644 src/org/nwapw/abacus/lexing/pattern/Pattern.java create mode 100644 src/org/nwapw/abacus/lexing/pattern/PatternChain.java diff --git a/src/org/nwapw/abacus/lexing/pattern/Pattern.java b/src/org/nwapw/abacus/lexing/pattern/Pattern.java new file mode 100644 index 0000000..364152b --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/Pattern.java @@ -0,0 +1,162 @@ +package org.nwapw.abacus.lexing.pattern; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Stack; +import java.util.function.Function; + +public class Pattern { + + private T id; + private PatternNode head; + private String source; + private int index; + + private HashMap, PatternChain>> operations = + new HashMap, PatternChain>>() {{ + put('+', Pattern.this::transformPlus); + put('*', Pattern.this::transformStar); + put('?', Pattern.this::transformQuestion); + }}; + + private PatternChain transformPlus(PatternChain chain){ + chain.tail.outputStates.add(chain.head); + return chain; + } + + private PatternChain transformStar(PatternChain chain){ + LinkNode newTail = new LinkNode<>(); + LinkNode newHead = new LinkNode<>(); + newHead.outputStates.add(chain.head); + newHead.outputStates.add(newTail); + chain.tail.outputStates.add(newTail); + newTail.outputStates.add(newHead); + chain.head = newHead; + chain.tail = newTail; + return chain; + } + + private PatternChain transformQuestion(PatternChain chain){ + LinkNode newTail = new LinkNode<>(); + LinkNode newHead = new LinkNode<>(); + newHead.outputStates.add(chain.head); + newHead.outputStates.add(newTail); + chain.tail.outputStates.add(newTail); + chain.head = newHead; + chain.tail = newTail; + return chain; + } + + private PatternChain combineChains(Collection> collection){ + LinkNode head = new LinkNode<>(); + LinkNode tail = new LinkNode<>(); + PatternChain newChain = new PatternChain<>(head, tail); + for(PatternChain chain : collection){ + head.outputStates.add(chain.head); + chain.tail.outputStates.add(tail); + } + return newChain; + } + + private PatternChain parseValue(){ + if(index >= source.length()) return null; + if(source.charAt(index) == '\\'){ + if(++index >= source.length()) return null; + } + return new PatternChain<>(new ValueNode<>(source.charAt(index++))); + } + + private PatternChain parseOr(){ + Stack> orStack = new Stack<>(); + while(index < source.length() && source.charAt(index) != ']'){ + if(source.charAt(index) == '-'){ + index++; + if(orStack.empty() || orStack.peek().tail.range() == '\0') return null; + PatternChain bottomRange = orStack.pop(); + PatternChain topRange = parseValue(); + if(topRange == null || topRange.tail.range() == '\0') return null; + + orStack.push(new PatternChain<>(new RangeNode<>(bottomRange.tail.range(), topRange.tail.range()))); + } else { + PatternChain newChain = parseValue(); + if(newChain == null) return null; + orStack.push(newChain); + } + index++; + } + if(index >= source.length()) return null; + return combineChains(orStack); + } + + private PatternChain parseSegment(boolean isSubsegment){ + if(index >= source.length() || ((source.charAt(index) != '(') && isSubsegment)) return null; + if(isSubsegment) index++; + + Stack> orChain = new Stack<>(); + PatternChain fullChain = new PatternChain<>(); + PatternChain currentChain = null; + while (index < source.length() && source.charAt(index) != ')'){ + char currentChar = source.charAt(index); + if(operations.containsKey(currentChar)){ + if(currentChain == null) return null; + + currentChain = operations.get(currentChar).apply(currentChain); + fullChain.append(currentChain); + currentChain = null; + index++; + } else if(currentChar == '|'){ + if(currentChain == null) return null; + + fullChain.append(currentChain); + orChain.push(fullChain); + currentChain = null; + fullChain = new PatternChain<>(); + } else if(currentChar == '('){ + if(currentChain != null) { + fullChain.append(currentChain); + } + + currentChain = parseSegment(true); + if(currentChain == null) return null; + } else if(currentChar == '['){ + if(currentChain != null){ + fullChain.append(currentChain); + } + currentChain = parseOr(); + if(currentChain == null) return null; + } else { + if(currentChain != null){ + fullChain.append(currentChain); + } + currentChain = parseValue(); + if(currentChain == null) return null; + } + } + + if(!(!isSubsegment || (index < source.length() && source.charAt(index) == ')'))) return null; + if(isSubsegment) index++; + + if(currentChain != null) fullChain.append(currentChain); + if(!orChain.empty()){ + orChain.push(fullChain); + fullChain = combineChains(orChain); + } + + return fullChain; + } + + public Pattern(String from, T id){ + this.id = id; + index = 0; + source = from; + + PatternChain chain = parseSegment(false); + if(chain == null) { + head = null; + } else { + chain.append(new EndNode<>(id)); + head = chain.head; + } + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/PatternChain.java b/src/org/nwapw/abacus/lexing/pattern/PatternChain.java new file mode 100644 index 0000000..aad9be5 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/PatternChain.java @@ -0,0 +1,40 @@ +package org.nwapw.abacus.lexing.pattern; + +public class PatternChain { + + public PatternNode head; + public PatternNode tail; + + public PatternChain(PatternNode head, PatternNode tail){ + this.head = head; + this.tail = tail; + } + + public PatternChain(PatternNode node){ + this(node, node); + } + + public PatternChain(){ + this(null); + } + + public void append(PatternChain other){ + if(other.head == null || tail == null) { + this.head = other.head; + this.tail = other.tail; + } else { + tail.outputStates.add(other.head); + tail = other.tail; + } + } + + public void append(PatternNode node){ + if(tail == null){ + head = tail = node; + } else { + tail.outputStates.add(node); + tail = node; + } + } + +} From 02141d8df02091e89cb02144417753032337525b Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 24 Jul 2017 19:46:22 -0700 Subject: [PATCH 03/11] Switch underlying implementation to Set from List. --- src/org/nwapw/abacus/lexing/pattern/PatternNode.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/org/nwapw/abacus/lexing/pattern/PatternNode.java b/src/org/nwapw/abacus/lexing/pattern/PatternNode.java index adf7bf5..3ef36d7 100644 --- a/src/org/nwapw/abacus/lexing/pattern/PatternNode.java +++ b/src/org/nwapw/abacus/lexing/pattern/PatternNode.java @@ -1,13 +1,14 @@ package org.nwapw.abacus.lexing.pattern; import java.util.ArrayList; +import java.util.HashSet; public class PatternNode { - protected ArrayList> outputStates; + protected HashSet> outputStates; public PatternNode(){ - outputStates = new ArrayList<>(); + outputStates = new HashSet<>(); } public boolean matches(char other){ From ac3087fc3f0362a66c0c8fac3ea8b6ec547c6a14 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 24 Jul 2017 20:45:56 -0700 Subject: [PATCH 04/11] Add a new function to add children into collection. --- src/org/nwapw/abacus/lexing/pattern/LinkNode.java | 7 +++---- src/org/nwapw/abacus/lexing/pattern/PatternNode.java | 7 ++++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/org/nwapw/abacus/lexing/pattern/LinkNode.java b/src/org/nwapw/abacus/lexing/pattern/LinkNode.java index 69b16ed..460c418 100644 --- a/src/org/nwapw/abacus/lexing/pattern/LinkNode.java +++ b/src/org/nwapw/abacus/lexing/pattern/LinkNode.java @@ -1,14 +1,13 @@ package org.nwapw.abacus.lexing.pattern; import java.util.ArrayList; +import java.util.Collection; public class LinkNode extends PatternNode { @Override - public void addInto(ArrayList> into) { - for(PatternNode node : outputStates){ - node.addInto(into); - } + public void addInto(Collection> into) { + addOutputsInto(into); } } diff --git a/src/org/nwapw/abacus/lexing/pattern/PatternNode.java b/src/org/nwapw/abacus/lexing/pattern/PatternNode.java index 3ef36d7..4c0908a 100644 --- a/src/org/nwapw/abacus/lexing/pattern/PatternNode.java +++ b/src/org/nwapw/abacus/lexing/pattern/PatternNode.java @@ -1,6 +1,7 @@ package org.nwapw.abacus.lexing.pattern; import java.util.ArrayList; +import java.util.Collection; import java.util.HashSet; public class PatternNode { @@ -19,8 +20,12 @@ public class PatternNode { return '\0'; } - public void addInto(ArrayList> into){ + public void addInto(Collection> into){ into.add(this); } + public void addOutputsInto(Collection> into){ + outputStates.forEach(e -> e.addInto(into)); + } + } From 43c3d5f7548f5075b1e0d24ab3e1a1eac116efd9 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 24 Jul 2017 20:47:13 -0700 Subject: [PATCH 05/11] Fix a few bug in the pattern compilation code. --- src/org/nwapw/abacus/lexing/pattern/Pattern.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/org/nwapw/abacus/lexing/pattern/Pattern.java b/src/org/nwapw/abacus/lexing/pattern/Pattern.java index 364152b..860e036 100644 --- a/src/org/nwapw/abacus/lexing/pattern/Pattern.java +++ b/src/org/nwapw/abacus/lexing/pattern/Pattern.java @@ -68,6 +68,7 @@ public class Pattern { private PatternChain parseOr(){ Stack> orStack = new Stack<>(); + index++; while(index < source.length() && source.charAt(index) != ']'){ if(source.charAt(index) == '-'){ index++; @@ -82,10 +83,9 @@ public class Pattern { if(newChain == null) return null; orStack.push(newChain); } - index++; } - if(index >= source.length()) return null; - return combineChains(orStack); + if(index++ >= source.length()) return null; + return (orStack.size() == 1) ? orStack.pop() : combineChains(orStack); } private PatternChain parseSegment(boolean isSubsegment){ @@ -124,6 +124,12 @@ public class Pattern { } currentChain = parseOr(); if(currentChain == null) return null; + } else if(currentChar == '.'){ + if(currentChain != null){ + fullChain.append(currentChain); + } + currentChain = new PatternChain<>(new AnyNode<>()); + index++; } else { if(currentChain != null){ fullChain.append(currentChain); @@ -159,4 +165,7 @@ public class Pattern { } } + public PatternNode getHead() { + return head; + } } From c86e192d2e3ed6d5c6359a707cfee18f44501155 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 24 Jul 2017 20:47:25 -0700 Subject: [PATCH 06/11] Implement a lexer. --- src/org/nwapw/abacus/lexing/Lexer.java | 70 +++++++++++++++++++ .../nwapw/abacus/lexing/pattern/Match.java | 26 +++++++ 2 files changed, 96 insertions(+) create mode 100644 src/org/nwapw/abacus/lexing/Lexer.java create mode 100644 src/org/nwapw/abacus/lexing/pattern/Match.java diff --git a/src/org/nwapw/abacus/lexing/Lexer.java b/src/org/nwapw/abacus/lexing/Lexer.java new file mode 100644 index 0000000..22ed298 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/Lexer.java @@ -0,0 +1,70 @@ +package org.nwapw.abacus.lexing; + +import org.nwapw.abacus.lexing.pattern.EndNode; +import org.nwapw.abacus.lexing.pattern.Match; +import org.nwapw.abacus.lexing.pattern.Pattern; +import org.nwapw.abacus.lexing.pattern.PatternNode; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; + +public class Lexer { + + private ArrayList> patterns; + + public Lexer(){ + patterns = new ArrayList<>(); + } + + public void register(String pattern, T id){ + Pattern compiledPattern = new Pattern<>(pattern, id); + if(compiledPattern.getHead() != null) patterns.add(compiledPattern); + } + + public Match lexOne(String from, int startAt, Comparator compare){ + ArrayList> matches = new ArrayList<>(); + HashSet> currentSet = new HashSet<>(); + HashSet> futureSet = new HashSet<>(); + int index = startAt; + for(Pattern pattern : patterns){ + pattern.getHead().addInto(currentSet); + } + while(!currentSet.isEmpty() && index < from.length()){ + char currentChar = from.charAt(index); + for(PatternNode node : currentSet){ + if(node.matches(currentChar)) { + node.addOutputsInto(futureSet); + } else if(node instanceof EndNode){ + matches.add(new Match<>(startAt, index, ((EndNode) node).getPatternId())); + } + } + + HashSet> tmp = currentSet; + currentSet = futureSet; + futureSet = tmp; + futureSet.clear(); + + index++; + } + matches.sort((a, b) -> compare.compare(a.getType(), b.getType())); + if(compare != null) { + matches.sort(Comparator.comparingInt(a -> a.getTo() - a.getFrom())); + } + return matches.isEmpty() ? null : matches.get(matches.size() - 1); + } + + public ArrayList> lexAll(String from, int startAt, Comparator compare){ + int index = startAt; + ArrayList> matches = new ArrayList<>(); + Match lastMatch = null; + while((lastMatch = lexOne(from, index, compare)) != null && index < from.length()){ + if(lastMatch.getTo() == lastMatch.getFrom()) return null; + matches.add(lastMatch); + index += lastMatch.getTo() - lastMatch.getFrom(); + } + return matches; + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/Match.java b/src/org/nwapw/abacus/lexing/pattern/Match.java new file mode 100644 index 0000000..06e0b27 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/Match.java @@ -0,0 +1,26 @@ +package org.nwapw.abacus.lexing.pattern; + +public class Match { + + private int from; + private int to; + private T type; + + public Match(int from, int to, T type){ + this.from = from; + this.to = to; + this.type = type; + } + + public int getFrom() { + return from; + } + + public int getTo() { + return to; + } + + public T getType() { + return type; + } +} From 42db6b3c2fd3d93945f01e239c788114bc569192 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 24 Jul 2017 21:00:35 -0700 Subject: [PATCH 07/11] Fix bug causing the last character not to be matched. --- src/org/nwapw/abacus/lexing/Lexer.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/org/nwapw/abacus/lexing/Lexer.java b/src/org/nwapw/abacus/lexing/Lexer.java index 22ed298..94457a1 100644 --- a/src/org/nwapw/abacus/lexing/Lexer.java +++ b/src/org/nwapw/abacus/lexing/Lexer.java @@ -6,7 +6,6 @@ import org.nwapw.abacus.lexing.pattern.Pattern; import org.nwapw.abacus.lexing.pattern.PatternNode; import java.util.ArrayList; -import java.util.Collections; import java.util.Comparator; import java.util.HashSet; @@ -31,10 +30,9 @@ public class Lexer { for(Pattern pattern : patterns){ pattern.getHead().addInto(currentSet); } - while(!currentSet.isEmpty() && index < from.length()){ - char currentChar = from.charAt(index); + while(!currentSet.isEmpty()){ for(PatternNode node : currentSet){ - if(node.matches(currentChar)) { + if(index < from.length() && node.matches(from.charAt(index))) { node.addOutputsInto(futureSet); } else if(node instanceof EndNode){ matches.add(new Match<>(startAt, index, ((EndNode) node).getPatternId())); From 1dcd6beb1ccc34c01f90bcc0e35d115020c7f891 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Tue, 25 Jul 2017 11:08:03 -0700 Subject: [PATCH 08/11] Implement two enums for conversion between strings and tokens. --- .../nwapw/abacus/tree/OperatorAssociativity.java | 5 +++++ src/org/nwapw/abacus/tree/TokenType.java | 13 +++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 src/org/nwapw/abacus/tree/OperatorAssociativity.java create mode 100644 src/org/nwapw/abacus/tree/TokenType.java diff --git a/src/org/nwapw/abacus/tree/OperatorAssociativity.java b/src/org/nwapw/abacus/tree/OperatorAssociativity.java new file mode 100644 index 0000000..78b633b --- /dev/null +++ b/src/org/nwapw/abacus/tree/OperatorAssociativity.java @@ -0,0 +1,5 @@ +package org.nwapw.abacus.tree; + +public enum OperatorAssociativity { + LEFT, RIGHT +} diff --git a/src/org/nwapw/abacus/tree/TokenType.java b/src/org/nwapw/abacus/tree/TokenType.java new file mode 100644 index 0000000..e0d7d97 --- /dev/null +++ b/src/org/nwapw/abacus/tree/TokenType.java @@ -0,0 +1,13 @@ +package org.nwapw.abacus.tree; + +public enum TokenType { + + ANY(0), OP(1), NUM(2), WORD(3), OPEN_PARENTH(4), CLOSE_PARENTH(5); + + public final int priority; + + TokenType(int priority){ + this.priority = priority; + } + +} From 043d02ead2c1ae6dc73339fd2674e33aa7ff54ba Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Tue, 25 Jul 2017 13:53:19 -0700 Subject: [PATCH 09/11] Fix a bug in the pattern class. --- src/org/nwapw/abacus/lexing/pattern/Pattern.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/org/nwapw/abacus/lexing/pattern/Pattern.java b/src/org/nwapw/abacus/lexing/pattern/Pattern.java index 860e036..4d40d77 100644 --- a/src/org/nwapw/abacus/lexing/pattern/Pattern.java +++ b/src/org/nwapw/abacus/lexing/pattern/Pattern.java @@ -111,6 +111,7 @@ public class Pattern { orChain.push(fullChain); currentChain = null; fullChain = new PatternChain<>(); + if(++index >= source.length()) return null; } else if(currentChar == '('){ if(currentChain != null) { fullChain.append(currentChain); From 36846733622caa4294ee75a9f4c85b442608826b Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Tue, 25 Jul 2017 13:53:38 -0700 Subject: [PATCH 10/11] Implement Shunting Yard. --- src/org/nwapw/abacus/tree/TreeNode.java | 85 +++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 src/org/nwapw/abacus/tree/TreeNode.java diff --git a/src/org/nwapw/abacus/tree/TreeNode.java b/src/org/nwapw/abacus/tree/TreeNode.java new file mode 100644 index 0000000..f34d48d --- /dev/null +++ b/src/org/nwapw/abacus/tree/TreeNode.java @@ -0,0 +1,85 @@ +package org.nwapw.abacus.tree; + +import org.nwapw.abacus.lexing.Lexer; +import org.nwapw.abacus.lexing.pattern.Match; + +import java.util.*; + +public abstract class TreeNode { + + private static Lexer lexer = new Lexer(){{ + register(".", TokenType.ANY); + register("\\+|-|\\*|/|^", TokenType.OP); + register("[0-9]+(\\.[0-9]+)?", TokenType.NUM); + register("[a-zA-Z]+", TokenType.WORD); + register("\\(", TokenType.OPEN_PARENTH); + register("\\)", TokenType.CLOSE_PARENTH); + }}; + private static HashMap precedenceMap = new HashMap(){{ + put("+", 0); + put("-", 0); + put("*", 1); + put("/", 1); + put("^", 2); + }}; + private static HashMap associativityMap = + new HashMap() {{ + put("+", OperatorAssociativity.LEFT); + put("-", OperatorAssociativity.LEFT); + put("*", OperatorAssociativity.LEFT); + put("/", OperatorAssociativity.LEFT); + put("^", OperatorAssociativity.RIGHT); + }}; + + private static Comparator tokenSorter = Comparator.comparingInt(e -> e.priority); + + public static ArrayList> tokenize(String string){ + return lexer.lexAll(string, 0, tokenSorter); + } + + public static ArrayList> intoPostfix(String source, ArrayList> from){ + ArrayList> output = new ArrayList<>(); + Stack> tokenStack = new Stack<>(); + while(!from.isEmpty()){ + Match match = from.remove(0); + if(match.getType() == TokenType.NUM) { + output.add(match); + } else if(match.getType() == TokenType.OP){ + String tokenString = source.substring(match.getFrom(), match.getTo()); + int precedence = precedenceMap.get(tokenString); + OperatorAssociativity associativity = associativityMap.get(tokenString); + + while(!tokenStack.empty()) { + Match otherMatch = tokenStack.peek(); + if(otherMatch.getType() != TokenType.OP) break; + + int otherPrecdence = precedenceMap.get(source.substring(otherMatch.getFrom(), otherMatch.getTo())); + if(otherPrecdence < precedence || + (associativity == OperatorAssociativity.RIGHT && otherPrecdence == precedence)) { + break; + } + output.add(tokenStack.pop()); + } + tokenStack.push(match); + } else if(match.getType() == TokenType.OPEN_PARENTH){ + tokenStack.push(match); + } else if(match.getType() == TokenType.CLOSE_PARENTH){ + while(!tokenStack.empty() && tokenStack.peek().getType() != TokenType.OPEN_PARENTH){ + output.add(tokenStack.pop()); + } + if(tokenStack.empty()) return null; + tokenStack.pop(); + } + } + while(!tokenStack.empty()){ + if(!(tokenStack.peek().getType() == TokenType.OP)) return null; + output.add(tokenStack.pop()); + } + return output; + } + + public static TreeNode fromString(String string){ + return null; + } + +} From e4e9e2ce2115391b616d383706b27a35d3a5c719 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Tue, 25 Jul 2017 14:21:00 -0700 Subject: [PATCH 11/11] Implement parsing a postfix expression into a tree. --- src/org/nwapw/abacus/tree/NumberNode.java | 25 +++++++++++++++ src/org/nwapw/abacus/tree/OpNode.java | 38 +++++++++++++++++++++++ src/org/nwapw/abacus/tree/TreeNode.java | 20 +++++++++++- 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 src/org/nwapw/abacus/tree/NumberNode.java create mode 100644 src/org/nwapw/abacus/tree/OpNode.java diff --git a/src/org/nwapw/abacus/tree/NumberNode.java b/src/org/nwapw/abacus/tree/NumberNode.java new file mode 100644 index 0000000..bc034b0 --- /dev/null +++ b/src/org/nwapw/abacus/tree/NumberNode.java @@ -0,0 +1,25 @@ +package org.nwapw.abacus.tree; + +import org.nwapw.abacus.number.NaiveNumber; +import org.nwapw.abacus.number.Number; + +public class NumberNode extends TreeNode { + + private Number number; + + public NumberNode(){ + number = null; + } + + public NumberNode(double value){ + number = new NaiveNumber(value); + } + + public NumberNode(String value){ + this(Double.parseDouble(value)); + } + + public Number getNumber() { + return number; + } +} diff --git a/src/org/nwapw/abacus/tree/OpNode.java b/src/org/nwapw/abacus/tree/OpNode.java new file mode 100644 index 0000000..0fcdaed --- /dev/null +++ b/src/org/nwapw/abacus/tree/OpNode.java @@ -0,0 +1,38 @@ +package org.nwapw.abacus.tree; + +public class OpNode extends TreeNode { + + private String operation; + private TreeNode left; + private TreeNode right; + + public OpNode(String operation){ + this(operation, null, null); + } + + public OpNode(String operation, TreeNode left, TreeNode right){ + this.operation = operation; + this.left = left; + this.right = right; + } + + public String getOperation() { + return operation; + } + + public TreeNode getLeft() { + return left; + } + + public void setLeft(TreeNode left) { + this.left = left; + } + + public TreeNode getRight() { + return right; + } + + public void setRight(TreeNode right) { + this.right = right; + } +} diff --git a/src/org/nwapw/abacus/tree/TreeNode.java b/src/org/nwapw/abacus/tree/TreeNode.java index f34d48d..6581826 100644 --- a/src/org/nwapw/abacus/tree/TreeNode.java +++ b/src/org/nwapw/abacus/tree/TreeNode.java @@ -78,8 +78,26 @@ public abstract class TreeNode { return output; } - public static TreeNode fromString(String string){ + public static TreeNode fromStringRecursive(String source, ArrayList> matches){ + if(matches.size() == 0) return null; + Match match = matches.remove(0); + if(match.getType() == TokenType.OP){ + TreeNode right = fromStringRecursive(source, matches); + TreeNode left = fromStringRecursive(source, matches); + if(left == null || right == null) return null; + else return new OpNode(source.substring(match.getFrom(), match.getTo()), left, right); + } else if(match.getType() == TokenType.NUM){ + return new NumberNode(Double.parseDouble(source.substring(match.getFrom(), match.getTo()))); + } return null; } + public static TreeNode fromString(String string){ + ArrayList> matches = intoPostfix(string, tokenize(string)); + if(matches == null) return null; + + Collections.reverse(matches); + return fromStringRecursive(string, matches); + } + }