package org.nwapw.abacus.lexing.pattern; import java.util.Collection; import java.util.HashMap; import java.util.Stack; import java.util.function.Function; /** * A pattern that can be compiled from a string and used in lexing. * @param the type that is used to identify and sort this pattern. */ public class Pattern { /** * The ID of this pattern. */ private T id; /** * The head of this pattern. */ private PatternNode head; /** * The source string of this pattern. */ private String source; /** * The index at which the compilation has stopped. */ private int index; /** * A map of regex operator to functions that modify a PatternChain * with the appropriate operation. */ private HashMap, PatternChain>> operations = new HashMap, PatternChain>>() {{ put('+', Pattern.this::transformPlus); put('*', Pattern.this::transformStar); put('?', Pattern.this::transformQuestion); }}; /** * A regex operator function that turns the chain * into a one-or-more chain. * @param chain the chain to transform. * @return the modified chain. */ private PatternChain transformPlus(PatternChain chain){ chain.tail.outputStates.add(chain.head); return chain; } /** * A regex operator function that turns the chain * into a zero-or-more chain. * @param chain the chain to transform. * @return the modified chain. */ private PatternChain transformStar(PatternChain chain){ LinkNode newTail = new LinkNode<>(); LinkNode newHead = new LinkNode<>(); newHead.outputStates.add(chain.head); newHead.outputStates.add(newTail); chain.tail.outputStates.add(newTail); newTail.outputStates.add(newHead); chain.head = newHead; chain.tail = newTail; return chain; } /** * A regex operator function that turns the chain * into a zero-or-one chain. * @param chain the chain to transform. * @return the modified chain. */ private PatternChain transformQuestion(PatternChain chain){ LinkNode newTail = new LinkNode<>(); LinkNode newHead = new LinkNode<>(); newHead.outputStates.add(chain.head); newHead.outputStates.add(newTail); chain.tail.outputStates.add(newTail); chain.head = newHead; chain.tail = newTail; return chain; } /** * Combines a collection of chains into one OR chain. * @param collection the collection of chains to combine. * @return the resulting OR chain. */ private PatternChain combineChains(Collection> collection){ LinkNode head = new LinkNode<>(); LinkNode tail = new LinkNode<>(); PatternChain newChain = new PatternChain<>(head, tail); for(PatternChain chain : collection){ head.outputStates.add(chain.head); chain.tail.outputStates.add(tail); } return newChain; } /** * Parses a single value from the input into a chain. * @return the resulting chain, or null on error. */ private PatternChain parseValue(){ if(index >= source.length()) return null; if(source.charAt(index) == '\\'){ if(++index >= source.length()) return null; } return new PatternChain<>(new ValueNode<>(source.charAt(index++))); } /** * Parses a [] range from the input into a chain. * @return the resulting chain, or null on error. */ private PatternChain parseOr(){ Stack> orStack = new Stack<>(); index++; while(index < source.length() && source.charAt(index) != ']'){ if(source.charAt(index) == '-'){ index++; if(orStack.empty() || orStack.peek().tail.range() == '\0') return null; PatternChain bottomRange = orStack.pop(); PatternChain topRange = parseValue(); if(topRange == null || topRange.tail.range() == '\0') return null; orStack.push(new PatternChain<>(new RangeNode<>(bottomRange.tail.range(), topRange.tail.range()))); } else { PatternChain newChain = parseValue(); if(newChain == null) return null; orStack.push(newChain); } } if(index++ >= source.length()) return null; return (orStack.size() == 1) ? orStack.pop() : combineChains(orStack); } /** * Parses a repeatable segment from the input into a chain * @param isSubsegment whether the segment is a sub-expression "()", and therefore * whether to expect a closing brace. * @return the resulting chain, or null on error. */ private PatternChain parseSegment(boolean isSubsegment){ if(index >= source.length() || ((source.charAt(index) != '(') && isSubsegment)) return null; if(isSubsegment) index++; Stack> orChain = new Stack<>(); PatternChain fullChain = new PatternChain<>(); PatternChain currentChain = null; while (index < source.length() && source.charAt(index) != ')'){ char currentChar = source.charAt(index); if(operations.containsKey(currentChar)){ if(currentChain == null) return null; currentChain = operations.get(currentChar).apply(currentChain); fullChain.append(currentChain); currentChain = null; index++; } else if(currentChar == '|'){ if(currentChain == null) return null; fullChain.append(currentChain); orChain.push(fullChain); currentChain = null; fullChain = new PatternChain<>(); if(++index >= source.length()) return null; } else if(currentChar == '('){ if(currentChain != null) { fullChain.append(currentChain); } currentChain = parseSegment(true); if(currentChain == null) return null; } else if(currentChar == '['){ if(currentChain != null){ fullChain.append(currentChain); } currentChain = parseOr(); if(currentChain == null) return null; } else if(currentChar == '.'){ if(currentChain != null){ fullChain.append(currentChain); } currentChain = new PatternChain<>(new AnyNode<>()); index++; } else { if(currentChain != null){ fullChain.append(currentChain); } currentChain = parseValue(); if(currentChain == null) return null; } } if(!(!isSubsegment || (index < source.length() && source.charAt(index) == ')'))) return null; if(isSubsegment) index++; if(currentChain != null) fullChain.append(currentChain); if(!orChain.empty()){ orChain.push(fullChain); fullChain = combineChains(orChain); } return fullChain; } /** * Creates / compiles a new pattern with the given id from the given string. * @param from the string to compile a pattern from. * @param id the ID to use. */ public Pattern(String from, T id){ this.id = id; index = 0; source = from; PatternChain chain = parseSegment(false); if(chain == null) { head = null; } else { chain.append(new EndNode<>(id)); head = chain.head; } } /** * Gets the head PatternNode, for use in matching * @return the pattern node. */ public PatternNode getHead() { return head; } public static String sanitize(String from){ Pattern pattern = new Pattern<>("", 0); from = from.replace(".", "\\."); from = from.replace("|", "\\|"); from = from.replace("(", "\\("); from = from.replace(")", "\\)"); for(Character key : pattern.operations.keySet()){ from = from.replace("" + key, "\\" + key); } return from; } }