1
0
mirror of https://github.com/DanilaFe/abacus synced 2024-06-30 06:40:58 -07:00
Abacus/core/src/main/java/org/nwapw/abacus/lexing/Lexer.java

154 lines
4.8 KiB
Java
Raw Normal View History

2017-07-24 20:47:25 -07:00
package org.nwapw.abacus.lexing;
import org.nwapw.abacus.lexing.pattern.nodes.EndNode;
2017-07-24 20:47:25 -07:00
import org.nwapw.abacus.lexing.pattern.Pattern;
import org.nwapw.abacus.lexing.pattern.nodes.PatternNode;
2017-07-24 20:47:25 -07:00
2017-07-27 10:54:11 -07:00
import java.util.*;
2017-07-24 20:47:25 -07:00
2017-07-25 22:47:48 -07:00
/**
* A lexer that can generate tokens of a given type given a list of regular expressions
* to operate on.
2017-07-30 21:11:32 -07:00
*
2017-07-25 22:47:48 -07:00
* @param <T> the type used to identify which match belongs to which pattern.
*/
2017-07-24 20:47:25 -07:00
public class Lexer<T> {
2017-07-25 22:47:48 -07:00
/**
* The registered patterns.
*/
private Map<PatternEntry<T>, Pattern<T>> patterns;
2017-07-24 20:47:25 -07:00
2017-07-25 22:47:48 -07:00
/**
* Creates a new lexer with no registered patterns.
*/
2017-07-30 21:11:32 -07:00
public Lexer() {
patterns = new HashMap<>();
2017-07-24 20:47:25 -07:00
}
2017-07-25 22:47:48 -07:00
/**
* Registers a single pattern.
2017-07-30 21:11:32 -07:00
*
2017-07-25 22:47:48 -07:00
* @param pattern the pattern regex
2017-07-30 21:11:32 -07:00
* @param id the ID by which to identify the pattern.
2017-07-25 22:47:48 -07:00
*/
2017-07-30 21:11:32 -07:00
public void register(String pattern, T id) {
2017-07-24 20:47:25 -07:00
Pattern<T> compiledPattern = new Pattern<>(pattern, id);
2017-07-30 21:11:32 -07:00
if (compiledPattern.getHead() != null) patterns.put(new PatternEntry<>(pattern, id), compiledPattern);
2017-07-27 10:54:11 -07:00
}
/**
* Unregisters a pattern.
2017-07-30 21:11:32 -07:00
*
2017-07-27 10:54:11 -07:00
* @param pattern the pattern to unregister
2017-07-30 21:11:32 -07:00
* @param id the ID by which to identify the pattern.
2017-07-27 10:54:11 -07:00
*/
2017-07-30 21:11:32 -07:00
public void unregister(String pattern, T id) {
2017-07-27 10:54:11 -07:00
patterns.remove(new PatternEntry<>(pattern, id));
2017-07-24 20:47:25 -07:00
}
2017-07-25 22:47:48 -07:00
/**
* Reads one token from the given string.
2017-07-30 21:11:32 -07:00
*
* @param from the string to read from
2017-07-25 22:47:48 -07:00
* @param startAt the index to start at
* @param compare the comparator used to sort tokens by their ID.
* @return the best match.
*/
2017-07-30 21:11:32 -07:00
public Match<T> lexOne(String from, int startAt, Comparator<T> compare) {
2017-07-24 20:47:25 -07:00
ArrayList<Match<T>> matches = new ArrayList<>();
HashSet<PatternNode<T>> currentSet = new HashSet<>();
HashSet<PatternNode<T>> futureSet = new HashSet<>();
int index = startAt;
2017-07-30 21:11:32 -07:00
for (Pattern<T> pattern : patterns.values()) {
2017-07-24 20:47:25 -07:00
pattern.getHead().addInto(currentSet);
}
2017-07-30 21:11:32 -07:00
while (!currentSet.isEmpty()) {
for (PatternNode<T> node : currentSet) {
if (index < from.length() && node.matches(from.charAt(index))) {
2017-07-24 20:47:25 -07:00
node.addOutputsInto(futureSet);
2017-07-30 21:11:32 -07:00
} else if (node instanceof EndNode) {
matches.add(new Match<>(from.substring(startAt, index), ((EndNode<T>) node).getPatternId()));
2017-07-24 20:47:25 -07:00
}
}
HashSet<PatternNode<T>> tmp = currentSet;
currentSet = futureSet;
futureSet = tmp;
futureSet.clear();
index++;
}
2017-07-30 21:11:32 -07:00
if (compare != null) {
Collections.sort(matches, (a, b) -> compare.compare(a.getType(), b.getType()));
2017-07-24 20:47:25 -07:00
}
Collections.sort(matches, (o1, o2) -> o1.getContent().length() - o2.getContent().length());
2017-07-24 20:47:25 -07:00
return matches.isEmpty() ? null : matches.get(matches.size() - 1);
}
2017-07-25 22:47:48 -07:00
/**
* Reads all tokens from a string.
2017-07-30 21:11:32 -07:00
*
* @param from the string to start from.
2017-07-25 22:47:48 -07:00
* @param startAt the index to start at.
* @param compare the comparator used to sort matches by their IDs.
* @return the resulting list of matches, in order, or null on error.
*/
2017-07-30 21:11:32 -07:00
public List<Match<T>> lexAll(String from, int startAt, Comparator<T> compare) {
2017-07-24 20:47:25 -07:00
int index = startAt;
ArrayList<Match<T>> matches = new ArrayList<>();
Match<T> lastMatch = null;
2017-07-30 21:11:32 -07:00
while (index < from.length() && (lastMatch = lexOne(from, index, compare)) != null) {
int length = lastMatch.getContent().length();
2017-07-30 21:11:32 -07:00
if (length == 0) return null;
2017-07-24 20:47:25 -07:00
matches.add(lastMatch);
index += length;
2017-07-24 20:47:25 -07:00
}
2017-07-30 21:11:32 -07:00
if (lastMatch == null) return null;
2017-07-24 20:47:25 -07:00
return matches;
}
2017-07-30 21:11:32 -07:00
/**
* An entry that represents a pattern that has been registered with the lexer.
*
* @param <T> the type used to identify the pattern.
*/
private static class PatternEntry<T> {
/**
* The name of the entry.
*/
public String name;
/**
* The id of the entry.
*/
public T id;
/**
* Creates a new pattern entry with the given name and id.
*
* @param name the name of the pattern entry.
* @param id the id of the pattern entry.
*/
public PatternEntry(String name, T id) {
this.name = name;
this.id = id;
}
@Override
public int hashCode() {
return Arrays.hashCode(new Object[] {
this.name,
this.id
});
2017-07-30 21:11:32 -07:00
}
@Override
public boolean equals(Object obj) {
return obj instanceof PatternEntry &&
((PatternEntry) obj).name.equals(name) &&
((PatternEntry) obj).id.equals(id);
}
}
2017-07-24 20:47:25 -07:00
}