diff --git a/src/org/nwapw/abacus/lexing/Lexer.java b/src/org/nwapw/abacus/lexing/Lexer.java new file mode 100644 index 0000000..22ed298 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/Lexer.java @@ -0,0 +1,70 @@ +package org.nwapw.abacus.lexing; + +import org.nwapw.abacus.lexing.pattern.EndNode; +import org.nwapw.abacus.lexing.pattern.Match; +import org.nwapw.abacus.lexing.pattern.Pattern; +import org.nwapw.abacus.lexing.pattern.PatternNode; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; + +public class Lexer { + + private ArrayList> patterns; + + public Lexer(){ + patterns = new ArrayList<>(); + } + + public void register(String pattern, T id){ + Pattern compiledPattern = new Pattern<>(pattern, id); + if(compiledPattern.getHead() != null) patterns.add(compiledPattern); + } + + public Match lexOne(String from, int startAt, Comparator compare){ + ArrayList> matches = new ArrayList<>(); + HashSet> currentSet = new HashSet<>(); + HashSet> futureSet = new HashSet<>(); + int index = startAt; + for(Pattern pattern : patterns){ + pattern.getHead().addInto(currentSet); + } + while(!currentSet.isEmpty() && index < from.length()){ + char currentChar = from.charAt(index); + for(PatternNode node : currentSet){ + if(node.matches(currentChar)) { + node.addOutputsInto(futureSet); + } else if(node instanceof EndNode){ + matches.add(new Match<>(startAt, index, ((EndNode) node).getPatternId())); + } + } + + HashSet> tmp = currentSet; + currentSet = futureSet; + futureSet = tmp; + futureSet.clear(); + + index++; + } + matches.sort((a, b) -> compare.compare(a.getType(), b.getType())); + if(compare != null) { + matches.sort(Comparator.comparingInt(a -> a.getTo() - a.getFrom())); + } + return matches.isEmpty() ? null : matches.get(matches.size() - 1); + } + + public ArrayList> lexAll(String from, int startAt, Comparator compare){ + int index = startAt; + ArrayList> matches = new ArrayList<>(); + Match lastMatch = null; + while((lastMatch = lexOne(from, index, compare)) != null && index < from.length()){ + if(lastMatch.getTo() == lastMatch.getFrom()) return null; + matches.add(lastMatch); + index += lastMatch.getTo() - lastMatch.getFrom(); + } + return matches; + } + +} diff --git a/src/org/nwapw/abacus/lexing/pattern/Match.java b/src/org/nwapw/abacus/lexing/pattern/Match.java new file mode 100644 index 0000000..06e0b27 --- /dev/null +++ b/src/org/nwapw/abacus/lexing/pattern/Match.java @@ -0,0 +1,26 @@ +package org.nwapw.abacus.lexing.pattern; + +public class Match { + + private int from; + private int to; + private T type; + + public Match(int from, int to, T type){ + this.from = from; + this.to = to; + this.type = type; + } + + public int getFrom() { + return from; + } + + public int getTo() { + return to; + } + + public T getType() { + return type; + } +}