mirror of
https://github.com/DanilaFe/abacus
synced 2026-01-28 09:35:19 +00:00
Format code.
This commit is contained in:
@@ -10,15 +10,111 @@ import java.util.*;
|
||||
/**
|
||||
* A lexer that can generate tokens of a given type given a list of regular expressions
|
||||
* to operate on.
|
||||
*
|
||||
* @param <T> the type used to identify which match belongs to which pattern.
|
||||
*/
|
||||
public class Lexer<T> {
|
||||
|
||||
/**
|
||||
* The registered patterns.
|
||||
*/
|
||||
private Map<PatternEntry<T>, Pattern<T>> patterns;
|
||||
|
||||
/**
|
||||
* Creates a new lexer with no registered patterns.
|
||||
*/
|
||||
public Lexer() {
|
||||
patterns = new HashMap<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a single pattern.
|
||||
*
|
||||
* @param pattern the pattern regex
|
||||
* @param id the ID by which to identify the pattern.
|
||||
*/
|
||||
public void register(String pattern, T id) {
|
||||
Pattern<T> compiledPattern = new Pattern<>(pattern, id);
|
||||
if (compiledPattern.getHead() != null) patterns.put(new PatternEntry<>(pattern, id), compiledPattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unregisters a pattern.
|
||||
*
|
||||
* @param pattern the pattern to unregister
|
||||
* @param id the ID by which to identify the pattern.
|
||||
*/
|
||||
public void unregister(String pattern, T id) {
|
||||
patterns.remove(new PatternEntry<>(pattern, id));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads one token from the given string.
|
||||
*
|
||||
* @param from the string to read from
|
||||
* @param startAt the index to start at
|
||||
* @param compare the comparator used to sort tokens by their ID.
|
||||
* @return the best match.
|
||||
*/
|
||||
public Match<T> lexOne(String from, int startAt, Comparator<T> compare) {
|
||||
ArrayList<Match<T>> matches = new ArrayList<>();
|
||||
HashSet<PatternNode<T>> currentSet = new HashSet<>();
|
||||
HashSet<PatternNode<T>> futureSet = new HashSet<>();
|
||||
int index = startAt;
|
||||
for (Pattern<T> pattern : patterns.values()) {
|
||||
pattern.getHead().addInto(currentSet);
|
||||
}
|
||||
while (!currentSet.isEmpty()) {
|
||||
for (PatternNode<T> node : currentSet) {
|
||||
if (index < from.length() && node.matches(from.charAt(index))) {
|
||||
node.addOutputsInto(futureSet);
|
||||
} else if (node instanceof EndNode) {
|
||||
matches.add(new Match<>(from.substring(startAt, index), ((EndNode<T>) node).getPatternId()));
|
||||
}
|
||||
}
|
||||
|
||||
HashSet<PatternNode<T>> tmp = currentSet;
|
||||
currentSet = futureSet;
|
||||
futureSet = tmp;
|
||||
futureSet.clear();
|
||||
|
||||
index++;
|
||||
}
|
||||
matches.sort((a, b) -> compare.compare(a.getType(), b.getType()));
|
||||
if (compare != null) {
|
||||
matches.sort(Comparator.comparingInt(a -> a.getContent().length()));
|
||||
}
|
||||
return matches.isEmpty() ? null : matches.get(matches.size() - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all tokens from a string.
|
||||
*
|
||||
* @param from the string to start from.
|
||||
* @param startAt the index to start at.
|
||||
* @param compare the comparator used to sort matches by their IDs.
|
||||
* @return the resulting list of matches, in order, or null on error.
|
||||
*/
|
||||
public List<Match<T>> lexAll(String from, int startAt, Comparator<T> compare) {
|
||||
int index = startAt;
|
||||
ArrayList<Match<T>> matches = new ArrayList<>();
|
||||
Match<T> lastMatch = null;
|
||||
while (index < from.length() && (lastMatch = lexOne(from, index, compare)) != null) {
|
||||
int length = lastMatch.getContent().length();
|
||||
if (length == 0) return null;
|
||||
matches.add(lastMatch);
|
||||
index += length;
|
||||
}
|
||||
if (lastMatch == null) return null;
|
||||
return matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* An entry that represents a pattern that has been registered with the lexer.
|
||||
*
|
||||
* @param <T> the type used to identify the pattern.
|
||||
*/
|
||||
private static class PatternEntry<T>{
|
||||
private static class PatternEntry<T> {
|
||||
/**
|
||||
* The name of the entry.
|
||||
*/
|
||||
@@ -30,10 +126,11 @@ public class Lexer<T> {
|
||||
|
||||
/**
|
||||
* Creates a new pattern entry with the given name and id.
|
||||
*
|
||||
* @param name the name of the pattern entry.
|
||||
* @param id the id of the pattern entry.
|
||||
* @param id the id of the pattern entry.
|
||||
*/
|
||||
public PatternEntry(String name, T id){
|
||||
public PatternEntry(String name, T id) {
|
||||
this.name = name;
|
||||
this.id = id;
|
||||
}
|
||||
@@ -51,94 +148,4 @@ public class Lexer<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The registered patterns.
|
||||
*/
|
||||
private Map<PatternEntry<T>, Pattern<T>> patterns;
|
||||
|
||||
/**
|
||||
* Creates a new lexer with no registered patterns.
|
||||
*/
|
||||
public Lexer(){
|
||||
patterns = new HashMap<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a single pattern.
|
||||
* @param pattern the pattern regex
|
||||
* @param id the ID by which to identify the pattern.
|
||||
*/
|
||||
public void register(String pattern, T id){
|
||||
Pattern<T> compiledPattern = new Pattern<>(pattern, id);
|
||||
if(compiledPattern.getHead() != null) patterns.put(new PatternEntry<>(pattern, id), compiledPattern);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unregisters a pattern.
|
||||
* @param pattern the pattern to unregister
|
||||
* @param id the ID by which to identify the pattern.
|
||||
*/
|
||||
public void unregister(String pattern, T id){
|
||||
patterns.remove(new PatternEntry<>(pattern, id));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads one token from the given string.
|
||||
* @param from the string to read from
|
||||
* @param startAt the index to start at
|
||||
* @param compare the comparator used to sort tokens by their ID.
|
||||
* @return the best match.
|
||||
*/
|
||||
public Match<T> lexOne(String from, int startAt, Comparator<T> compare){
|
||||
ArrayList<Match<T>> matches = new ArrayList<>();
|
||||
HashSet<PatternNode<T>> currentSet = new HashSet<>();
|
||||
HashSet<PatternNode<T>> futureSet = new HashSet<>();
|
||||
int index = startAt;
|
||||
for(Pattern<T> pattern : patterns.values()){
|
||||
pattern.getHead().addInto(currentSet);
|
||||
}
|
||||
while(!currentSet.isEmpty()){
|
||||
for(PatternNode<T> node : currentSet){
|
||||
if(index < from.length() && node.matches(from.charAt(index))) {
|
||||
node.addOutputsInto(futureSet);
|
||||
} else if(node instanceof EndNode){
|
||||
matches.add(new Match<>(from.substring(startAt, index), ((EndNode<T>) node).getPatternId()));
|
||||
}
|
||||
}
|
||||
|
||||
HashSet<PatternNode<T>> tmp = currentSet;
|
||||
currentSet = futureSet;
|
||||
futureSet = tmp;
|
||||
futureSet.clear();
|
||||
|
||||
index++;
|
||||
}
|
||||
matches.sort((a, b) -> compare.compare(a.getType(), b.getType()));
|
||||
if(compare != null) {
|
||||
matches.sort(Comparator.comparingInt(a -> a.getContent().length()));
|
||||
}
|
||||
return matches.isEmpty() ? null : matches.get(matches.size() - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all tokens from a string.
|
||||
* @param from the string to start from.
|
||||
* @param startAt the index to start at.
|
||||
* @param compare the comparator used to sort matches by their IDs.
|
||||
* @return the resulting list of matches, in order, or null on error.
|
||||
*/
|
||||
public List<Match<T>> lexAll(String from, int startAt, Comparator<T> compare){
|
||||
int index = startAt;
|
||||
ArrayList<Match<T>> matches = new ArrayList<>();
|
||||
Match<T> lastMatch = null;
|
||||
while(index < from.length() && (lastMatch = lexOne(from, index, compare)) != null){
|
||||
int length = lastMatch.getContent().length();
|
||||
if(length == 0) return null;
|
||||
matches.add(lastMatch);
|
||||
index += length;
|
||||
}
|
||||
if(lastMatch == null) return null;
|
||||
return matches;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package org.nwapw.abacus.lexing.pattern;
|
||||
|
||||
/**
|
||||
* A pattern node that matches any character.
|
||||
*
|
||||
* @param <T> the type that's used to tell which pattern this node belongs to.
|
||||
*/
|
||||
public class AnyNode<T> extends PatternNode<T> {
|
||||
|
||||
@@ -2,6 +2,7 @@ package org.nwapw.abacus.lexing.pattern;
|
||||
|
||||
/**
|
||||
* A node that represents a successful match.
|
||||
*
|
||||
* @param <T> the type that's used to tell which pattern this node belongs to.
|
||||
*/
|
||||
public class EndNode<T> extends PatternNode<T> {
|
||||
@@ -13,17 +14,19 @@ public class EndNode<T> extends PatternNode<T> {
|
||||
|
||||
/**
|
||||
* Creates a new end node with the given ID.
|
||||
*
|
||||
* @param patternId the pattern ID.
|
||||
*/
|
||||
public EndNode(T patternId){
|
||||
public EndNode(T patternId) {
|
||||
this.patternId = patternId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the pattern ID.
|
||||
*
|
||||
* @return the pattern ID.
|
||||
*/
|
||||
public T getPatternId(){
|
||||
public T getPatternId() {
|
||||
return patternId;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
package org.nwapw.abacus.lexing.pattern;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* A node that is used as structural glue in pattern compilation.
|
||||
*
|
||||
* @param <T> the type that's used to tell which pattern this node belongs to.
|
||||
*/
|
||||
public class LinkNode<T> extends PatternNode<T> {
|
||||
|
||||
@Override
|
||||
public void addInto(Collection<PatternNode<T>> into) {
|
||||
if(!into.contains(this)) {
|
||||
if (!into.contains(this)) {
|
||||
into.add(this);
|
||||
addOutputsInto(into);
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package org.nwapw.abacus.lexing.pattern;
|
||||
|
||||
/**
|
||||
* A match that has been generated by the lexer.
|
||||
*
|
||||
* @param <T> the type used to represent the ID of the pattern this match belongs to.
|
||||
*/
|
||||
public class Match<T> {
|
||||
@@ -17,16 +18,18 @@ public class Match<T> {
|
||||
|
||||
/**
|
||||
* Creates a new match with the given parameters.
|
||||
*
|
||||
* @param content the content of this match.
|
||||
* @param type the type of the match.
|
||||
* @param type the type of the match.
|
||||
*/
|
||||
public Match(String content, T type){
|
||||
public Match(String content, T type) {
|
||||
this.content = content;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the content of this match.
|
||||
*
|
||||
* @return the content.
|
||||
*/
|
||||
public String getContent() {
|
||||
@@ -35,6 +38,7 @@ public class Match<T> {
|
||||
|
||||
/**
|
||||
* Gets the pattern type of the node.
|
||||
*
|
||||
* @return the ID of the pattern that this match matched.
|
||||
*/
|
||||
public T getType() {
|
||||
|
||||
@@ -8,6 +8,7 @@ import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* A pattern that can be compiled from a string and used in lexing.
|
||||
*
|
||||
* @param <T> the type that is used to identify and sort this pattern.
|
||||
*/
|
||||
public class Pattern<T> {
|
||||
@@ -40,13 +41,53 @@ public class Pattern<T> {
|
||||
put('?', Pattern.this::transformQuestion);
|
||||
}};
|
||||
|
||||
/**
|
||||
* Creates / compiles a new pattern with the given id from the given string.
|
||||
*
|
||||
* @param from the string to compile a pattern from.
|
||||
* @param id the ID to use.
|
||||
*/
|
||||
public Pattern(String from, T id) {
|
||||
this.id = id;
|
||||
index = 0;
|
||||
source = from;
|
||||
|
||||
PatternChain<T> chain = parseSegment(false);
|
||||
if (chain == null) {
|
||||
head = null;
|
||||
} else {
|
||||
chain.append(new EndNode<>(id));
|
||||
head = chain.head;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all characters that are considered "special" from
|
||||
* the given string.
|
||||
*
|
||||
* @param from the string to sanitize.
|
||||
* @return the resulting string.
|
||||
*/
|
||||
public static String sanitize(String from) {
|
||||
Pattern<Integer> pattern = new Pattern<>("", 0);
|
||||
from = from.replace(".", "\\.");
|
||||
from = from.replace("|", "\\|");
|
||||
from = from.replace("(", "\\(");
|
||||
from = from.replace(")", "\\)");
|
||||
for (Character key : pattern.operations.keySet()) {
|
||||
from = from.replace("" + key, "\\" + key);
|
||||
}
|
||||
return from;
|
||||
}
|
||||
|
||||
/**
|
||||
* A regex operator function that turns the chain
|
||||
* into a one-or-more chain.
|
||||
*
|
||||
* @param chain the chain to transform.
|
||||
* @return the modified chain.
|
||||
*/
|
||||
private PatternChain<T> transformPlus(PatternChain<T> chain){
|
||||
private PatternChain<T> transformPlus(PatternChain<T> chain) {
|
||||
chain.tail.outputStates.add(chain.head);
|
||||
return chain;
|
||||
}
|
||||
@@ -54,10 +95,11 @@ public class Pattern<T> {
|
||||
/**
|
||||
* A regex operator function that turns the chain
|
||||
* into a zero-or-more chain.
|
||||
*
|
||||
* @param chain the chain to transform.
|
||||
* @return the modified chain.
|
||||
*/
|
||||
private PatternChain<T> transformStar(PatternChain<T> chain){
|
||||
private PatternChain<T> transformStar(PatternChain<T> chain) {
|
||||
LinkNode<T> newTail = new LinkNode<>();
|
||||
LinkNode<T> newHead = new LinkNode<>();
|
||||
newHead.outputStates.add(chain.head);
|
||||
@@ -72,10 +114,11 @@ public class Pattern<T> {
|
||||
/**
|
||||
* A regex operator function that turns the chain
|
||||
* into a zero-or-one chain.
|
||||
*
|
||||
* @param chain the chain to transform.
|
||||
* @return the modified chain.
|
||||
*/
|
||||
private PatternChain<T> transformQuestion(PatternChain<T> chain){
|
||||
private PatternChain<T> transformQuestion(PatternChain<T> chain) {
|
||||
LinkNode<T> newTail = new LinkNode<>();
|
||||
LinkNode<T> newHead = new LinkNode<>();
|
||||
newHead.outputStates.add(chain.head);
|
||||
@@ -88,14 +131,15 @@ public class Pattern<T> {
|
||||
|
||||
/**
|
||||
* Combines a collection of chains into one OR chain.
|
||||
*
|
||||
* @param collection the collection of chains to combine.
|
||||
* @return the resulting OR chain.
|
||||
*/
|
||||
private PatternChain<T> combineChains(Collection<PatternChain<T>> collection){
|
||||
private PatternChain<T> combineChains(Collection<PatternChain<T>> collection) {
|
||||
LinkNode<T> head = new LinkNode<>();
|
||||
LinkNode<T> tail = new LinkNode<>();
|
||||
PatternChain<T> newChain = new PatternChain<>(head, tail);
|
||||
for(PatternChain<T> chain : collection){
|
||||
for (PatternChain<T> chain : collection) {
|
||||
head.outputStates.add(chain.head);
|
||||
chain.tail.outputStates.add(tail);
|
||||
}
|
||||
@@ -104,105 +148,108 @@ public class Pattern<T> {
|
||||
|
||||
/**
|
||||
* Parses a single value from the input into a chain.
|
||||
*
|
||||
* @return the resulting chain, or null on error.
|
||||
*/
|
||||
private PatternChain<T> parseValue(){
|
||||
if(index >= source.length()) return null;
|
||||
if(source.charAt(index) == '\\'){
|
||||
if(++index >= source.length()) return null;
|
||||
private PatternChain<T> parseValue() {
|
||||
if (index >= source.length()) return null;
|
||||
if (source.charAt(index) == '\\') {
|
||||
if (++index >= source.length()) return null;
|
||||
}
|
||||
return new PatternChain<>(new ValueNode<>(source.charAt(index++)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a [] range from the input into a chain.
|
||||
*
|
||||
* @return the resulting chain, or null on error.
|
||||
*/
|
||||
private PatternChain<T> parseOr(){
|
||||
private PatternChain<T> parseOr() {
|
||||
Stack<PatternChain<T>> orStack = new Stack<>();
|
||||
index++;
|
||||
while(index < source.length() && source.charAt(index) != ']'){
|
||||
if(source.charAt(index) == '-'){
|
||||
while (index < source.length() && source.charAt(index) != ']') {
|
||||
if (source.charAt(index) == '-') {
|
||||
index++;
|
||||
if(orStack.empty() || orStack.peek().tail.range() == '\0') return null;
|
||||
if (orStack.empty() || orStack.peek().tail.range() == '\0') return null;
|
||||
PatternChain<T> bottomRange = orStack.pop();
|
||||
PatternChain<T> topRange = parseValue();
|
||||
if(topRange == null || topRange.tail.range() == '\0') return null;
|
||||
if (topRange == null || topRange.tail.range() == '\0') return null;
|
||||
|
||||
orStack.push(new PatternChain<>(new RangeNode<>(bottomRange.tail.range(), topRange.tail.range())));
|
||||
} else {
|
||||
PatternChain<T> newChain = parseValue();
|
||||
if(newChain == null) return null;
|
||||
if (newChain == null) return null;
|
||||
orStack.push(newChain);
|
||||
}
|
||||
}
|
||||
if(index++ >= source.length()) return null;
|
||||
if (index++ >= source.length()) return null;
|
||||
return (orStack.size() == 1) ? orStack.pop() : combineChains(orStack);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a repeatable segment from the input into a chain
|
||||
*
|
||||
* @param isSubsegment whether the segment is a sub-expression "()", and therefore
|
||||
* whether to expect a closing brace.
|
||||
* @return the resulting chain, or null on error.
|
||||
*/
|
||||
private PatternChain<T> parseSegment(boolean isSubsegment){
|
||||
if(index >= source.length() || ((source.charAt(index) != '(') && isSubsegment)) return null;
|
||||
if(isSubsegment) index++;
|
||||
private PatternChain<T> parseSegment(boolean isSubsegment) {
|
||||
if (index >= source.length() || ((source.charAt(index) != '(') && isSubsegment)) return null;
|
||||
if (isSubsegment) index++;
|
||||
|
||||
Stack<PatternChain<T>> orChain = new Stack<>();
|
||||
PatternChain<T> fullChain = new PatternChain<>();
|
||||
PatternChain<T> currentChain = null;
|
||||
while (index < source.length() && source.charAt(index) != ')'){
|
||||
while (index < source.length() && source.charAt(index) != ')') {
|
||||
char currentChar = source.charAt(index);
|
||||
if(operations.containsKey(currentChar)){
|
||||
if(currentChain == null) return null;
|
||||
if (operations.containsKey(currentChar)) {
|
||||
if (currentChain == null) return null;
|
||||
|
||||
currentChain = operations.get(currentChar).apply(currentChain);
|
||||
fullChain.append(currentChain);
|
||||
currentChain = null;
|
||||
index++;
|
||||
} else if(currentChar == '|'){
|
||||
if(currentChain == null) return null;
|
||||
} else if (currentChar == '|') {
|
||||
if (currentChain == null) return null;
|
||||
|
||||
fullChain.append(currentChain);
|
||||
orChain.push(fullChain);
|
||||
currentChain = null;
|
||||
fullChain = new PatternChain<>();
|
||||
if(++index >= source.length()) return null;
|
||||
} else if(currentChar == '('){
|
||||
if(currentChain != null) {
|
||||
if (++index >= source.length()) return null;
|
||||
} else if (currentChar == '(') {
|
||||
if (currentChain != null) {
|
||||
fullChain.append(currentChain);
|
||||
}
|
||||
|
||||
currentChain = parseSegment(true);
|
||||
if(currentChain == null) return null;
|
||||
} else if(currentChar == '['){
|
||||
if(currentChain != null){
|
||||
if (currentChain == null) return null;
|
||||
} else if (currentChar == '[') {
|
||||
if (currentChain != null) {
|
||||
fullChain.append(currentChain);
|
||||
}
|
||||
currentChain = parseOr();
|
||||
if(currentChain == null) return null;
|
||||
} else if(currentChar == '.'){
|
||||
if(currentChain != null){
|
||||
if (currentChain == null) return null;
|
||||
} else if (currentChar == '.') {
|
||||
if (currentChain != null) {
|
||||
fullChain.append(currentChain);
|
||||
}
|
||||
currentChain = new PatternChain<>(new AnyNode<>());
|
||||
index++;
|
||||
} else {
|
||||
if(currentChain != null){
|
||||
if (currentChain != null) {
|
||||
fullChain.append(currentChain);
|
||||
}
|
||||
currentChain = parseValue();
|
||||
if(currentChain == null) return null;
|
||||
if (currentChain == null) return null;
|
||||
}
|
||||
}
|
||||
|
||||
if(!(!isSubsegment || (index < source.length() && source.charAt(index) == ')'))) return null;
|
||||
if(isSubsegment) index++;
|
||||
if (!(!isSubsegment || (index < source.length() && source.charAt(index) == ')'))) return null;
|
||||
if (isSubsegment) index++;
|
||||
|
||||
if(currentChain != null) fullChain.append(currentChain);
|
||||
if(!orChain.empty()){
|
||||
if (currentChain != null) fullChain.append(currentChain);
|
||||
if (!orChain.empty()) {
|
||||
orChain.push(fullChain);
|
||||
fullChain = combineChains(orChain);
|
||||
}
|
||||
@@ -210,48 +257,12 @@ public class Pattern<T> {
|
||||
return fullChain;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates / compiles a new pattern with the given id from the given string.
|
||||
* @param from the string to compile a pattern from.
|
||||
* @param id the ID to use.
|
||||
*/
|
||||
public Pattern(String from, T id){
|
||||
this.id = id;
|
||||
index = 0;
|
||||
source = from;
|
||||
|
||||
PatternChain<T> chain = parseSegment(false);
|
||||
if(chain == null) {
|
||||
head = null;
|
||||
} else {
|
||||
chain.append(new EndNode<>(id));
|
||||
head = chain.head;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the head PatternNode, for use in matching
|
||||
*
|
||||
* @return the pattern node.
|
||||
*/
|
||||
public PatternNode<T> getHead() {
|
||||
return head;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all characters that are considered "special" from
|
||||
* the given string.
|
||||
* @param from the string to sanitize.
|
||||
* @return the resulting string.
|
||||
*/
|
||||
public static String sanitize(String from){
|
||||
Pattern<Integer> pattern = new Pattern<>("", 0);
|
||||
from = from.replace(".", "\\.");
|
||||
from = from.replace("|", "\\|");
|
||||
from = from.replace("(", "\\(");
|
||||
from = from.replace(")", "\\)");
|
||||
for(Character key : pattern.operations.keySet()){
|
||||
from = from.replace("" + key, "\\" + key);
|
||||
}
|
||||
return from;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package org.nwapw.abacus.lexing.pattern;
|
||||
/**
|
||||
* A chain of nodes that can be treated as a single unit.
|
||||
* Used during pattern compilation.
|
||||
*
|
||||
* @param <T> the type used to identify which pattern has been matched.
|
||||
*/
|
||||
public class PatternChain<T> {
|
||||
@@ -18,26 +19,28 @@ public class PatternChain<T> {
|
||||
|
||||
/**
|
||||
* Creates a new chain with the given start and end.
|
||||
*
|
||||
* @param head the start of the chain.
|
||||
* @param tail the end of the chain.
|
||||
*/
|
||||
public PatternChain(PatternNode<T> head, PatternNode<T> tail){
|
||||
public PatternChain(PatternNode<T> head, PatternNode<T> tail) {
|
||||
this.head = head;
|
||||
this.tail = tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a chain that starts and ends with the same node.
|
||||
*
|
||||
* @param node the node to use.
|
||||
*/
|
||||
public PatternChain(PatternNode<T> node){
|
||||
public PatternChain(PatternNode<T> node) {
|
||||
this(node, node);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an empty chain.
|
||||
*/
|
||||
public PatternChain(){
|
||||
public PatternChain() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
@@ -45,10 +48,11 @@ public class PatternChain<T> {
|
||||
* Appends the other chain to this one. This modifies
|
||||
* the nodes, as well.
|
||||
* If this chain is empty, it is set to the other.
|
||||
*
|
||||
* @param other the other chain to append.
|
||||
*/
|
||||
public void append(PatternChain<T> other){
|
||||
if(other.head == null || tail == null) {
|
||||
public void append(PatternChain<T> other) {
|
||||
if (other.head == null || tail == null) {
|
||||
this.head = other.head;
|
||||
this.tail = other.tail;
|
||||
} else {
|
||||
@@ -61,10 +65,11 @@ public class PatternChain<T> {
|
||||
* Appends a single node to this chain. This modifies
|
||||
* the nodes, as well.
|
||||
* If this chain is empty, it is set to the node.
|
||||
*
|
||||
* @param node the node to append to this chain.
|
||||
*/
|
||||
public void append(PatternNode<T> node){
|
||||
if(tail == null){
|
||||
public void append(PatternNode<T> node) {
|
||||
if (tail == null) {
|
||||
head = tail = node;
|
||||
} else {
|
||||
tail.outputStates.add(node);
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
package org.nwapw.abacus.lexing.pattern;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
@@ -9,6 +8,7 @@ import java.util.Set;
|
||||
* A base class for a pattern node. Provides all functions
|
||||
* necessary for matching, and is constructed by a Pattern instance
|
||||
* from a string.
|
||||
*
|
||||
* @param <T> the type that's used to tell which pattern this node belongs to.
|
||||
*/
|
||||
public class PatternNode<T> {
|
||||
@@ -22,42 +22,46 @@ public class PatternNode<T> {
|
||||
/**
|
||||
* Creates a new pattern node.
|
||||
*/
|
||||
public PatternNode(){
|
||||
public PatternNode() {
|
||||
outputStates = new HashSet<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the current input character can
|
||||
* be matched by this node.
|
||||
*
|
||||
* @param other the character being matched.
|
||||
* @return true if the character can be matched, false otherwise.
|
||||
*/
|
||||
public boolean matches(char other){
|
||||
public boolean matches(char other) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* If this node can be used as part of a range, returns that value.
|
||||
*
|
||||
* @return a NULL terminator if this character cannot be converted
|
||||
* into a range bound, or the appropriate range bound if it can.
|
||||
*/
|
||||
public char range(){
|
||||
public char range() {
|
||||
return '\0';
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this node in a collection of other nodes.
|
||||
*
|
||||
* @param into the collection to add into.
|
||||
*/
|
||||
public void addInto(Collection<PatternNode<T>> into){
|
||||
public void addInto(Collection<PatternNode<T>> into) {
|
||||
into.add(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the node's children into a collection of other nodes.
|
||||
*
|
||||
* @param into the collection to add into.
|
||||
*/
|
||||
public void addOutputsInto(Collection<PatternNode<T>> into){
|
||||
public void addOutputsInto(Collection<PatternNode<T>> into) {
|
||||
outputStates.forEach(e -> e.addInto(into));
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package org.nwapw.abacus.lexing.pattern;
|
||||
|
||||
/**
|
||||
* A node that matches a range of characters.
|
||||
*
|
||||
* @param <T> the type that's used to tell which pattern this node belongs to.
|
||||
*/
|
||||
public class RangeNode<T> extends PatternNode<T> {
|
||||
@@ -17,10 +18,11 @@ public class RangeNode<T> extends PatternNode<T> {
|
||||
|
||||
/**
|
||||
* Creates a new range node from the given range.
|
||||
*
|
||||
* @param from the bottom bound of the range.
|
||||
* @param to the top bound of hte range.
|
||||
* @param to the top bound of hte range.
|
||||
*/
|
||||
public RangeNode(char from, char to){
|
||||
public RangeNode(char from, char to) {
|
||||
this.from = from;
|
||||
this.to = to;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package org.nwapw.abacus.lexing.pattern;
|
||||
|
||||
/**
|
||||
* A node that matches a single value.
|
||||
*
|
||||
* @param <T> the type that's used to tell which pattern this node belongs to.
|
||||
*/
|
||||
public class ValueNode<T> extends PatternNode<T> {
|
||||
@@ -13,9 +14,10 @@ public class ValueNode<T> extends PatternNode<T> {
|
||||
|
||||
/**
|
||||
* Creates a new node that matches the given character.
|
||||
*
|
||||
* @param value
|
||||
*/
|
||||
public ValueNode(char value){
|
||||
public ValueNode(char value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user