package org.nwapw.abacus.lexing.pattern; import org.nwapw.abacus.lexing.pattern.nodes.*; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.Stack; /** * A pattern that can be compiled from a string and used in lexing. * * @param the type that is used to identify and sort this pattern. */ public class Pattern { /** * The ID of this pattern. */ private T id; /** * The head of this pattern. */ private PatternNode head; /** * The source string of this pattern. */ private String source; /** * The index at which the compilation has stopped. */ private int index; /** * A map of regex operator to functions that modify a PatternChain * with the appropriate operation. */ private Map> operations = new HashMap>() {{ put('+', Pattern.this::transformPlus); put('*', Pattern.this::transformStar); put('?', Pattern.this::transformQuestion); }}; /** * Creates / compiles a new pattern with the given id from the given string. * * @param from the string to compile a pattern from. * @param id the ID to use. */ public Pattern(String from, T id) { this.id = id; index = 0; source = from; PatternChain chain = parseSegment(false); if (chain == null) { head = null; } else { chain.append(new EndNode<>(id)); head = chain.head; } } /** * Removes all characters that are considered "special" from * the given string. * * @param from the string to sanitize. * @return the resulting string. */ public static String sanitize(String from) { Pattern pattern = new Pattern<>("", 0); from = from.replace(".", "\\."); from = from.replace("|", "\\|"); from = from.replace("(", "\\("); from = from.replace(")", "\\)"); for (Character key : pattern.operations.keySet()) { from = from.replace("" + key, "\\" + key); } return from; } /** * A regex operator function that turns the chain * into a one-or-more chain. * * @param chain the chain to transform. * @return the modified chain. */ private PatternChain transformPlus(PatternChain chain) { chain.tail.getOutputStates().add(chain.head); return chain; } /** * A regex operator function that turns the chain * into a zero-or-more chain. * * @param chain the chain to transform. * @return the modified chain. */ private PatternChain transformStar(PatternChain chain) { LinkNode newTail = new LinkNode<>(); LinkNode newHead = new LinkNode<>(); newHead.getOutputStates().add(chain.head); newHead.getOutputStates().add(newTail); chain.tail.getOutputStates().add(newTail); newTail.getOutputStates().add(newHead); chain.head = newHead; chain.tail = newTail; return chain; } /** * A regex operator function that turns the chain * into a zero-or-one chain. * * @param chain the chain to transform. * @return the modified chain. */ private PatternChain transformQuestion(PatternChain chain) { LinkNode newTail = new LinkNode<>(); LinkNode newHead = new LinkNode<>(); newHead.getOutputStates().add(chain.head); newHead.getOutputStates().add(newTail); chain.tail.getOutputStates().add(newTail); chain.head = newHead; chain.tail = newTail; return chain; } /** * Combines a collection of chains into one OR chain. * * @param collection the collection of chains to combine. * @return the resulting OR chain. */ private PatternChain combineChains(Collection> collection) { LinkNode head = new LinkNode<>(); LinkNode tail = new LinkNode<>(); PatternChain newChain = new PatternChain<>(head, tail); for (PatternChain chain : collection) { head.getOutputStates().add(chain.head); chain.tail.getOutputStates().add(tail); } return newChain; } /** * Parses a single value from the input into a chain. * * @return the resulting chain, or null on error. */ private PatternChain parseValue() { if (index >= source.length()) return null; if (source.charAt(index) == '\\') { if (++index >= source.length()) return null; } return new PatternChain<>(new ValueNode<>(source.charAt(index++))); } /** * Parses a [] range from the input into a chain. * * @return the resulting chain, or null on error. */ private PatternChain parseOr() { Stack> orStack = new Stack<>(); index++; while (index < source.length() && source.charAt(index) != ']') { if (source.charAt(index) == '-') { index++; if (orStack.empty() || orStack.peek().tail.range() == '\0') return null; PatternChain bottomRange = orStack.pop(); PatternChain topRange = parseValue(); if (topRange == null || topRange.tail.range() == '\0') return null; orStack.push(new PatternChain<>(new RangeNode<>(bottomRange.tail.range(), topRange.tail.range()))); } else { PatternChain newChain = parseValue(); if (newChain == null) return null; orStack.push(newChain); } } if (index++ >= source.length()) return null; return (orStack.size() == 1) ? orStack.pop() : combineChains(orStack); } /** * Parses a repeatable segment from the input into a chain * * @param isSubsegment whether the segment is a sub-expression "()", and therefore * whether to expect a closing brace. * @return the resulting chain, or null on error. */ private PatternChain parseSegment(boolean isSubsegment) { if (index >= source.length() || ((source.charAt(index) != '(') && isSubsegment)) return null; if (isSubsegment) index++; Stack> orChain = new Stack<>(); PatternChain fullChain = new PatternChain<>(); PatternChain currentChain = null; while (index < source.length() && source.charAt(index) != ')') { char currentChar = source.charAt(index); if (operations.containsKey(currentChar)) { if (currentChain == null) return null; currentChain = operations.get(currentChar).transform(currentChain); fullChain.append(currentChain); currentChain = null; index++; } else if (currentChar == '|') { if (currentChain == null) return null; fullChain.append(currentChain); orChain.push(fullChain); currentChain = null; fullChain = new PatternChain<>(); if (++index >= source.length()) return null; } else if (currentChar == '(') { if (currentChain != null) { fullChain.append(currentChain); } currentChain = parseSegment(true); if (currentChain == null) return null; } else if (currentChar == '[') { if (currentChain != null) { fullChain.append(currentChain); } currentChain = parseOr(); if (currentChain == null) return null; } else if (currentChar == '.') { if (currentChain != null) { fullChain.append(currentChain); } currentChain = new PatternChain<>(new AnyNode<>()); index++; } else { if (currentChain != null) { fullChain.append(currentChain); } currentChain = parseValue(); if (currentChain == null) return null; } } if (!(!isSubsegment || (index < source.length() && source.charAt(index) == ')'))) return null; if (isSubsegment) index++; if (currentChain != null) fullChain.append(currentChain); if (!orChain.empty()) { orChain.push(fullChain); fullChain = combineChains(orChain); } return fullChain; } /** * Gets the head PatternNode, for use in matching * * @return the pattern node. */ public PatternNode getHead() { return head; } }