diff --git a/src/org/nwapw/abacus/lexing/pattern/Pattern.java b/src/org/nwapw/abacus/lexing/pattern/Pattern.java new file mode 100644 index 0000000..364152b --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/Pattern.java @@ -0,0 +1,162 @@ +package org.nwapw.abacus.lexing.pattern; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Stack; +import java.util.function.Function; + +public class Pattern { + + private T id; + private PatternNode head; + private String source; + private int index; + + private HashMap, PatternChain>> operations = + new HashMap, PatternChain>>() {{ + put('+', Pattern.this::transformPlus); + put('*', Pattern.this::transformStar); + put('?', Pattern.this::transformQuestion); + }}; + + private PatternChain transformPlus(PatternChain chain){ + chain.tail.outputStates.add(chain.head); + return chain; + } + + private PatternChain transformStar(PatternChain chain){ + LinkNode newTail = new LinkNode<>(); + LinkNode newHead = new LinkNode<>(); + newHead.outputStates.add(chain.head); + newHead.outputStates.add(newTail); + chain.tail.outputStates.add(newTail); + newTail.outputStates.add(newHead); + chain.head = newHead; + chain.tail = newTail; + return chain; + } + + private PatternChain transformQuestion(PatternChain chain){ + LinkNode newTail = new LinkNode<>(); + LinkNode newHead = new LinkNode<>(); + newHead.outputStates.add(chain.head); + newHead.outputStates.add(newTail); + chain.tail.outputStates.add(newTail); + chain.head = newHead; + chain.tail = newTail; + return chain; + } + + private PatternChain combineChains(Collection> collection){ + LinkNode head = new LinkNode<>(); + LinkNode tail = new LinkNode<>(); + PatternChain newChain = new PatternChain<>(head, tail); + for(PatternChain chain : collection){ + head.outputStates.add(chain.head); + chain.tail.outputStates.add(tail); + } + return newChain; + } + + private PatternChain parseValue(){ + if(index >= source.length()) return null; + if(source.charAt(index) == '\\'){ + if(++index >= source.length()) return null; + } + return new PatternChain<>(new ValueNode<>(source.charAt(index++))); + } + + private PatternChain parseOr(){ + Stack> orStack = new Stack<>(); + while(index < source.length() && source.charAt(index) != ']'){ + if(source.charAt(index) == '-'){ + index++; + if(orStack.empty() || orStack.peek().tail.range() == '\0') return null; + PatternChain bottomRange = orStack.pop(); + PatternChain topRange = parseValue(); + if(topRange == null || topRange.tail.range() == '\0') return null; + + orStack.push(new PatternChain<>(new RangeNode<>(bottomRange.tail.range(), topRange.tail.range()))); + } else { + PatternChain newChain = parseValue(); + if(newChain == null) return null; + orStack.push(newChain); + } + index++; + } + if(index >= source.length()) return null; + return combineChains(orStack); + } + + private PatternChain parseSegment(boolean isSubsegment){ + if(index >= source.length() || ((source.charAt(index) != '(') && isSubsegment)) return null; + if(isSubsegment) index++; + + Stack> orChain = new Stack<>(); + PatternChain fullChain = new PatternChain<>(); + PatternChain currentChain = null; + while (index < source.length() && source.charAt(index) != ')'){ + char currentChar = source.charAt(index); + if(operations.containsKey(currentChar)){ + if(currentChain == null) return null; + + currentChain = operations.get(currentChar).apply(currentChain); + fullChain.append(currentChain); + currentChain = null; + index++; + } else if(currentChar == '|'){ + if(currentChain == null) return null; + + fullChain.append(currentChain); + orChain.push(fullChain); + currentChain = null; + fullChain = new PatternChain<>(); + } else if(currentChar == '('){ + if(currentChain != null) { + fullChain.append(currentChain); + } + + currentChain = parseSegment(true); + if(currentChain == null) return null; + } else if(currentChar == '['){ + if(currentChain != null){ + fullChain.append(currentChain); + } + currentChain = parseOr(); + if(currentChain == null) return null; + } else { + if(currentChain != null){ + fullChain.append(currentChain); + } + currentChain = parseValue(); + if(currentChain == null) return null; + } + } + + if(!(!isSubsegment || (index < source.length() && source.charAt(index) == ')'))) return null; + if(isSubsegment) index++; + + if(currentChain != null) fullChain.append(currentChain); + if(!orChain.empty()){ + orChain.push(fullChain); + fullChain = combineChains(orChain); + } + + return fullChain; + } + + public Pattern(String from, T id){ + this.id = id; + index = 0; + source = from; + + PatternChain chain = parseSegment(false); + if(chain == null) { + head = null; + } else { + chain.append(new EndNode<>(id)); + head = chain.head; + } + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/PatternChain.java b/src/org/nwapw/abacus/lexing/pattern/PatternChain.java new file mode 100644 index 0000000..aad9be5 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/PatternChain.java @@ -0,0 +1,40 @@ +package org.nwapw.abacus.lexing.pattern; + +public class PatternChain { + + public PatternNode head; + public PatternNode tail; + + public PatternChain(PatternNode head, PatternNode tail){ + this.head = head; + this.tail = tail; + } + + public PatternChain(PatternNode node){ + this(node, node); + } + + public PatternChain(){ + this(null); + } + + public void append(PatternChain other){ + if(other.head == null || tail == null) { + this.head = other.head; + this.tail = other.tail; + } else { + tail.outputStates.add(other.head); + tail = other.tail; + } + } + + public void append(PatternNode node){ + if(tail == null){ + head = tail = node; + } else { + tail.outputStates.add(node); + tail = node; + } + } + +}