1
0
mirror of https://github.com/DanilaFe/abacus synced 2026-01-28 09:35:19 +00:00

Move the source files into a new default directory.

This commit is contained in:
2017-07-29 23:44:21 -07:00
parent 3131d96d07
commit 43c11f8454
41 changed files with 0 additions and 8 deletions

View File

@@ -0,0 +1,144 @@
package org.nwapw.abacus.lexing;
import org.nwapw.abacus.lexing.pattern.EndNode;
import org.nwapw.abacus.lexing.pattern.Match;
import org.nwapw.abacus.lexing.pattern.Pattern;
import org.nwapw.abacus.lexing.pattern.PatternNode;
import java.util.*;
/**
* A lexer that can generate tokens of a given type given a list of regular expressions
* to operate on.
* @param <T> the type used to identify which match belongs to which pattern.
*/
public class Lexer<T> {
/**
* An entry that represents a pattern that has been registered with the lexer.
* @param <T> the type used to identify the pattern.
*/
private static class PatternEntry<T>{
/**
* The name of the entry.
*/
public String name;
/**
* The id of the entry.
*/
public T id;
/**
* Creates a new pattern entry with the given name and id.
* @param name the name of the pattern entry.
* @param id the id of the pattern entry.
*/
public PatternEntry(String name, T id){
this.name = name;
this.id = id;
}
@Override
public int hashCode() {
return Objects.hash(name, id);
}
@Override
public boolean equals(Object obj) {
return obj instanceof PatternEntry &&
((PatternEntry) obj).name.equals(name) &&
((PatternEntry) obj).id.equals(id);
}
}
/**
* The registered patterns.
*/
private Map<PatternEntry<T>, Pattern<T>> patterns;
/**
* Creates a new lexer with no registered patterns.
*/
public Lexer(){
patterns = new HashMap<>();
}
/**
* Registers a single pattern.
* @param pattern the pattern regex
* @param id the ID by which to identify the pattern.
*/
public void register(String pattern, T id){
Pattern<T> compiledPattern = new Pattern<>(pattern, id);
if(compiledPattern.getHead() != null) patterns.put(new PatternEntry<>(pattern, id), compiledPattern);
}
/**
* Unregisters a pattern.
* @param pattern the pattern to unregister
* @param id the ID by which to identify the pattern.
*/
public void unregister(String pattern, T id){
patterns.remove(new PatternEntry<>(pattern, id));
}
/**
* Reads one token from the given string.
* @param from the string to read from
* @param startAt the index to start at
* @param compare the comparator used to sort tokens by their ID.
* @return the best match.
*/
public Match<T> lexOne(String from, int startAt, Comparator<T> compare){
ArrayList<Match<T>> matches = new ArrayList<>();
HashSet<PatternNode<T>> currentSet = new HashSet<>();
HashSet<PatternNode<T>> futureSet = new HashSet<>();
int index = startAt;
for(Pattern<T> pattern : patterns.values()){
pattern.getHead().addInto(currentSet);
}
while(!currentSet.isEmpty()){
for(PatternNode<T> node : currentSet){
if(index < from.length() && node.matches(from.charAt(index))) {
node.addOutputsInto(futureSet);
} else if(node instanceof EndNode){
matches.add(new Match<>(from.substring(startAt, index), ((EndNode<T>) node).getPatternId()));
}
}
HashSet<PatternNode<T>> tmp = currentSet;
currentSet = futureSet;
futureSet = tmp;
futureSet.clear();
index++;
}
matches.sort((a, b) -> compare.compare(a.getType(), b.getType()));
if(compare != null) {
matches.sort(Comparator.comparingInt(a -> a.getContent().length()));
}
return matches.isEmpty() ? null : matches.get(matches.size() - 1);
}
/**
* Reads all tokens from a string.
* @param from the string to start from.
* @param startAt the index to start at.
* @param compare the comparator used to sort matches by their IDs.
* @return the resulting list of matches, in order, or null on error.
*/
public List<Match<T>> lexAll(String from, int startAt, Comparator<T> compare){
int index = startAt;
ArrayList<Match<T>> matches = new ArrayList<>();
Match<T> lastMatch = null;
while(index < from.length() && (lastMatch = lexOne(from, index, compare)) != null){
int length = lastMatch.getContent().length();
if(length == 0) return null;
matches.add(lastMatch);
index += length;
}
if(lastMatch == null) return null;
return matches;
}
}

View File

@@ -0,0 +1,14 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A pattern node that matches any character.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class AnyNode<T> extends PatternNode<T> {
@Override
public boolean matches(char other) {
return true;
}
}

View File

@@ -0,0 +1,30 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A node that represents a successful match.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class EndNode<T> extends PatternNode<T> {
/**
* The ID of the pattenr that has been matched.
*/
private T patternId;
/**
* Creates a new end node with the given ID.
* @param patternId the pattern ID.
*/
public EndNode(T patternId){
this.patternId = patternId;
}
/**
* Gets the pattern ID.
* @return the pattern ID.
*/
public T getPatternId(){
return patternId;
}
}

View File

@@ -0,0 +1,20 @@
package org.nwapw.abacus.lexing.pattern;
import java.util.ArrayList;
import java.util.Collection;
/**
* A node that is used as structural glue in pattern compilation.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class LinkNode<T> extends PatternNode<T> {
@Override
public void addInto(Collection<PatternNode<T>> into) {
if(!into.contains(this)) {
into.add(this);
addOutputsInto(into);
}
}
}

View File

@@ -0,0 +1,43 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A match that has been generated by the lexer.
* @param <T> the type used to represent the ID of the pattern this match belongs to.
*/
public class Match<T> {
/**
* The content of this match.
*/
private String content;
/**
* The pattern type this match matched.
*/
private T type;
/**
* Creates a new match with the given parameters.
* @param content the content of this match.
* @param type the type of the match.
*/
public Match(String content, T type){
this.content = content;
this.type = type;
}
/**
* Gets the content of this match.
* @return the content.
*/
public String getContent() {
return content;
}
/**
* Gets the pattern type of the node.
* @return the ID of the pattern that this match matched.
*/
public T getType() {
return type;
}
}

View File

@@ -0,0 +1,257 @@
package org.nwapw.abacus.lexing.pattern;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import java.util.function.Function;
/**
* A pattern that can be compiled from a string and used in lexing.
* @param <T> the type that is used to identify and sort this pattern.
*/
public class Pattern<T> {
/**
* The ID of this pattern.
*/
private T id;
/**
* The head of this pattern.
*/
private PatternNode<T> head;
/**
* The source string of this pattern.
*/
private String source;
/**
* The index at which the compilation has stopped.
*/
private int index;
/**
* A map of regex operator to functions that modify a PatternChain
* with the appropriate operation.
*/
private Map<Character, Function<PatternChain<T>, PatternChain<T>>> operations =
new HashMap<Character, Function<PatternChain<T>, PatternChain<T>>>() {{
put('+', Pattern.this::transformPlus);
put('*', Pattern.this::transformStar);
put('?', Pattern.this::transformQuestion);
}};
/**
* A regex operator function that turns the chain
* into a one-or-more chain.
* @param chain the chain to transform.
* @return the modified chain.
*/
private PatternChain<T> transformPlus(PatternChain<T> chain){
chain.tail.outputStates.add(chain.head);
return chain;
}
/**
* A regex operator function that turns the chain
* into a zero-or-more chain.
* @param chain the chain to transform.
* @return the modified chain.
*/
private PatternChain<T> transformStar(PatternChain<T> chain){
LinkNode<T> newTail = new LinkNode<>();
LinkNode<T> newHead = new LinkNode<>();
newHead.outputStates.add(chain.head);
newHead.outputStates.add(newTail);
chain.tail.outputStates.add(newTail);
newTail.outputStates.add(newHead);
chain.head = newHead;
chain.tail = newTail;
return chain;
}
/**
* A regex operator function that turns the chain
* into a zero-or-one chain.
* @param chain the chain to transform.
* @return the modified chain.
*/
private PatternChain<T> transformQuestion(PatternChain<T> chain){
LinkNode<T> newTail = new LinkNode<>();
LinkNode<T> newHead = new LinkNode<>();
newHead.outputStates.add(chain.head);
newHead.outputStates.add(newTail);
chain.tail.outputStates.add(newTail);
chain.head = newHead;
chain.tail = newTail;
return chain;
}
/**
* Combines a collection of chains into one OR chain.
* @param collection the collection of chains to combine.
* @return the resulting OR chain.
*/
private PatternChain<T> combineChains(Collection<PatternChain<T>> collection){
LinkNode<T> head = new LinkNode<>();
LinkNode<T> tail = new LinkNode<>();
PatternChain<T> newChain = new PatternChain<>(head, tail);
for(PatternChain<T> chain : collection){
head.outputStates.add(chain.head);
chain.tail.outputStates.add(tail);
}
return newChain;
}
/**
* Parses a single value from the input into a chain.
* @return the resulting chain, or null on error.
*/
private PatternChain<T> parseValue(){
if(index >= source.length()) return null;
if(source.charAt(index) == '\\'){
if(++index >= source.length()) return null;
}
return new PatternChain<>(new ValueNode<>(source.charAt(index++)));
}
/**
* Parses a [] range from the input into a chain.
* @return the resulting chain, or null on error.
*/
private PatternChain<T> parseOr(){
Stack<PatternChain<T>> orStack = new Stack<>();
index++;
while(index < source.length() && source.charAt(index) != ']'){
if(source.charAt(index) == '-'){
index++;
if(orStack.empty() || orStack.peek().tail.range() == '\0') return null;
PatternChain<T> bottomRange = orStack.pop();
PatternChain<T> topRange = parseValue();
if(topRange == null || topRange.tail.range() == '\0') return null;
orStack.push(new PatternChain<>(new RangeNode<>(bottomRange.tail.range(), topRange.tail.range())));
} else {
PatternChain<T> newChain = parseValue();
if(newChain == null) return null;
orStack.push(newChain);
}
}
if(index++ >= source.length()) return null;
return (orStack.size() == 1) ? orStack.pop() : combineChains(orStack);
}
/**
* Parses a repeatable segment from the input into a chain
* @param isSubsegment whether the segment is a sub-expression "()", and therefore
* whether to expect a closing brace.
* @return the resulting chain, or null on error.
*/
private PatternChain<T> parseSegment(boolean isSubsegment){
if(index >= source.length() || ((source.charAt(index) != '(') && isSubsegment)) return null;
if(isSubsegment) index++;
Stack<PatternChain<T>> orChain = new Stack<>();
PatternChain<T> fullChain = new PatternChain<>();
PatternChain<T> currentChain = null;
while (index < source.length() && source.charAt(index) != ')'){
char currentChar = source.charAt(index);
if(operations.containsKey(currentChar)){
if(currentChain == null) return null;
currentChain = operations.get(currentChar).apply(currentChain);
fullChain.append(currentChain);
currentChain = null;
index++;
} else if(currentChar == '|'){
if(currentChain == null) return null;
fullChain.append(currentChain);
orChain.push(fullChain);
currentChain = null;
fullChain = new PatternChain<>();
if(++index >= source.length()) return null;
} else if(currentChar == '('){
if(currentChain != null) {
fullChain.append(currentChain);
}
currentChain = parseSegment(true);
if(currentChain == null) return null;
} else if(currentChar == '['){
if(currentChain != null){
fullChain.append(currentChain);
}
currentChain = parseOr();
if(currentChain == null) return null;
} else if(currentChar == '.'){
if(currentChain != null){
fullChain.append(currentChain);
}
currentChain = new PatternChain<>(new AnyNode<>());
index++;
} else {
if(currentChain != null){
fullChain.append(currentChain);
}
currentChain = parseValue();
if(currentChain == null) return null;
}
}
if(!(!isSubsegment || (index < source.length() && source.charAt(index) == ')'))) return null;
if(isSubsegment) index++;
if(currentChain != null) fullChain.append(currentChain);
if(!orChain.empty()){
orChain.push(fullChain);
fullChain = combineChains(orChain);
}
return fullChain;
}
/**
* Creates / compiles a new pattern with the given id from the given string.
* @param from the string to compile a pattern from.
* @param id the ID to use.
*/
public Pattern(String from, T id){
this.id = id;
index = 0;
source = from;
PatternChain<T> chain = parseSegment(false);
if(chain == null) {
head = null;
} else {
chain.append(new EndNode<>(id));
head = chain.head;
}
}
/**
* Gets the head PatternNode, for use in matching
* @return the pattern node.
*/
public PatternNode<T> getHead() {
return head;
}
/**
* Removes all characters that are considered "special" from
* the given string.
* @param from the string to sanitize.
* @return the resulting string.
*/
public static String sanitize(String from){
Pattern<Integer> pattern = new Pattern<>("", 0);
from = from.replace(".", "\\.");
from = from.replace("|", "\\|");
from = from.replace("(", "\\(");
from = from.replace(")", "\\)");
for(Character key : pattern.operations.keySet()){
from = from.replace("" + key, "\\" + key);
}
return from;
}
}

View File

@@ -0,0 +1,75 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A chain of nodes that can be treated as a single unit.
* Used during pattern compilation.
* @param <T> the type used to identify which pattern has been matched.
*/
public class PatternChain<T> {
/**
* The head node of the chain.
*/
public PatternNode<T> head;
/**
* The tail node of the chain.
*/
public PatternNode<T> tail;
/**
* Creates a new chain with the given start and end.
* @param head the start of the chain.
* @param tail the end of the chain.
*/
public PatternChain(PatternNode<T> head, PatternNode<T> tail){
this.head = head;
this.tail = tail;
}
/**
* Creates a chain that starts and ends with the same node.
* @param node the node to use.
*/
public PatternChain(PatternNode<T> node){
this(node, node);
}
/**
* Creates an empty chain.
*/
public PatternChain(){
this(null);
}
/**
* Appends the other chain to this one. This modifies
* the nodes, as well.
* If this chain is empty, it is set to the other.
* @param other the other chain to append.
*/
public void append(PatternChain<T> other){
if(other.head == null || tail == null) {
this.head = other.head;
this.tail = other.tail;
} else {
tail.outputStates.add(other.head);
tail = other.tail;
}
}
/**
* Appends a single node to this chain. This modifies
* the nodes, as well.
* If this chain is empty, it is set to the node.
* @param node the node to append to this chain.
*/
public void append(PatternNode<T> node){
if(tail == null){
head = tail = node;
} else {
tail.outputStates.add(node);
tail = node;
}
}
}

View File

@@ -0,0 +1,64 @@
package org.nwapw.abacus.lexing.pattern;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
/**
* A base class for a pattern node. Provides all functions
* necessary for matching, and is constructed by a Pattern instance
* from a string.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class PatternNode<T> {
/**
* The set of states to which the lexer should continue
* should this node be correctly matched.
*/
protected Set<PatternNode<T>> outputStates;
/**
* Creates a new pattern node.
*/
public PatternNode(){
outputStates = new HashSet<>();
}
/**
* Determines whether the current input character can
* be matched by this node.
* @param other the character being matched.
* @return true if the character can be matched, false otherwise.
*/
public boolean matches(char other){
return false;
}
/**
* If this node can be used as part of a range, returns that value.
* @return a NULL terminator if this character cannot be converted
* into a range bound, or the appropriate range bound if it can.
*/
public char range(){
return '\0';
}
/**
* Adds this node in a collection of other nodes.
* @param into the collection to add into.
*/
public void addInto(Collection<PatternNode<T>> into){
into.add(this);
}
/**
* Adds the node's children into a collection of other nodes.
* @param into the collection to add into.
*/
public void addOutputsInto(Collection<PatternNode<T>> into){
outputStates.forEach(e -> e.addInto(into));
}
}

View File

@@ -0,0 +1,33 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A node that matches a range of characters.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class RangeNode<T> extends PatternNode<T> {
/**
* The bottom bound of the range, inclusive.
*/
private char from;
/**
* The top bound of the range, inclusive.
*/
private char to;
/**
* Creates a new range node from the given range.
* @param from the bottom bound of the range.
* @param to the top bound of hte range.
*/
public RangeNode(char from, char to){
this.from = from;
this.to = to;
}
@Override
public boolean matches(char other) {
return other >= from && other <= to;
}
}

View File

@@ -0,0 +1,31 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A node that matches a single value.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class ValueNode<T> extends PatternNode<T> {
/**
* The value this node matches.
*/
private char value;
/**
* Creates a new node that matches the given character.
* @param value
*/
public ValueNode(char value){
this.value = value;
}
@Override
public boolean matches(char other) {
return other == value;
}
@Override
public char range() {
return value;
}
}