1
0
mirror of https://github.com/DanilaFe/abacus synced 2026-01-01 21:35:19 +00:00

Add a lot of comments. More to come.

This commit is contained in:
2017-07-25 22:47:48 -07:00
parent ade4eb1035
commit c19ae3b071
13 changed files with 343 additions and 0 deletions

View File

@@ -1,5 +1,9 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A pattern node that matches any character.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class AnyNode<T> extends PatternNode<T> {
@Override

View File

@@ -1,13 +1,28 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A node that represents a successful match.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class EndNode<T> extends PatternNode<T> {
/**
* The ID of the pattenr that has been matched.
*/
private T patternId;
/**
* Creates a new end node with the given ID.
* @param patternId the pattern ID.
*/
public EndNode(T patternId){
this.patternId = patternId;
}
/**
* Gets the pattern ID.
* @return the pattern ID.
*/
public T getPatternId(){
return patternId;
}

View File

@@ -3,6 +3,10 @@ package org.nwapw.abacus.lexing.pattern;
import java.util.ArrayList;
import java.util.Collection;
/**
* A node that is used as structural glue in pattern compilation.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class LinkNode<T> extends PatternNode<T> {
@Override

View File

@@ -1,25 +1,56 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A match that has been generated by the lexer.
* @param <T> the type used to represent the ID of the pattern this match belongs to.
*/
public class Match<T> {
/**
* The bottom range of the string, inclusive.
*/
private int from;
/**
* The top range of the string, exclusive.
*/
private int to;
/**
* The pattern type this match matched.
*/
private T type;
/**
* Creates a new match with the given parameters.
* @param from the bottom range of the string.
* @param to the top range of the string.
* @param type the type of the match.
*/
public Match(int from, int to, T type){
this.from = from;
this.to = to;
this.type = type;
}
/**
* Gets the bottom range bound of the string.
* @return the bottom range bound of the string.
*/
public int getFrom() {
return from;
}
/**
* Gets the top range bound of the string.
* @return the top range bound of the string.
*/
public int getTo() {
return to;
}
/**
* Gets the pattern type of the node.
* @return the ID of the pattern that this match matched.
*/
public T getType() {
return type;
}

View File

@@ -5,13 +5,33 @@ import java.util.HashMap;
import java.util.Stack;
import java.util.function.Function;
/**
* A pattern that can be compiled from a string and used in lexing.
* @param <T> the type that is used to identify and sort this pattern.
*/
public class Pattern<T> {
/**
* The ID of this pattern.
*/
private T id;
/**
* The head of this pattern.
*/
private PatternNode<T> head;
/**
* The source string of this pattern.
*/
private String source;
/**
* The index at which the compilation has stopped.
*/
private int index;
/**
* A map of regex operator to functions that modify a PatternChain
* with the appropriate operation.
*/
private HashMap<Character, Function<PatternChain<T>, PatternChain<T>>> operations =
new HashMap<Character, Function<PatternChain<T>, PatternChain<T>>>() {{
put('+', Pattern.this::transformPlus);
@@ -19,11 +39,23 @@ public class Pattern<T> {
put('?', Pattern.this::transformQuestion);
}};
/**
* A regex operator function that turns the chain
* into a one-or-more chain.
* @param chain the chain to transform.
* @return the modified chain.
*/
private PatternChain<T> transformPlus(PatternChain<T> chain){
chain.tail.outputStates.add(chain.head);
return chain;
}
/**
* A regex operator function that turns the chain
* into a zero-or-more chain.
* @param chain the chain to transform.
* @return the modified chain.
*/
private PatternChain<T> transformStar(PatternChain<T> chain){
LinkNode<T> newTail = new LinkNode<>();
LinkNode<T> newHead = new LinkNode<>();
@@ -36,6 +68,12 @@ public class Pattern<T> {
return chain;
}
/**
* A regex operator function that turns the chain
* into a zero-or-one chain.
* @param chain the chain to transform.
* @return the modified chain.
*/
private PatternChain<T> transformQuestion(PatternChain<T> chain){
LinkNode<T> newTail = new LinkNode<>();
LinkNode<T> newHead = new LinkNode<>();
@@ -47,6 +85,11 @@ public class Pattern<T> {
return chain;
}
/**
* Combines a collection of chains into one OR chain.
* @param collection the collection of chains to combine.
* @return the resulting OR chain.
*/
private PatternChain<T> combineChains(Collection<PatternChain<T>> collection){
LinkNode<T> head = new LinkNode<>();
LinkNode<T> tail = new LinkNode<>();
@@ -58,6 +101,10 @@ public class Pattern<T> {
return newChain;
}
/**
* Parses a single value from the input into a chain.
* @return the resulting chain, or null on error.
*/
private PatternChain<T> parseValue(){
if(index >= source.length()) return null;
if(source.charAt(index) == '\\'){
@@ -66,6 +113,10 @@ public class Pattern<T> {
return new PatternChain<>(new ValueNode<>(source.charAt(index++)));
}
/**
* Parses a [] range from the input into a chain.
* @return the resulting chain, or null on error.
*/
private PatternChain<T> parseOr(){
Stack<PatternChain<T>> orStack = new Stack<>();
index++;
@@ -88,6 +139,12 @@ public class Pattern<T> {
return (orStack.size() == 1) ? orStack.pop() : combineChains(orStack);
}
/**
* Parses a repeatable segment from the input into a chain
* @param isSubsegment whether the segment is a sub-expression "()", and therefore
* whether to expect a closing brace.
* @return the resulting chain, or null on error.
*/
private PatternChain<T> parseSegment(boolean isSubsegment){
if(index >= source.length() || ((source.charAt(index) != '(') && isSubsegment)) return null;
if(isSubsegment) index++;
@@ -152,6 +209,11 @@ public class Pattern<T> {
return fullChain;
}
/**
* Creates / compiles a new pattern with the given id from the given string.
* @param from the string to compile a pattern from.
* @param id the ID to use.
*/
public Pattern(String from, T id){
this.id = id;
index = 0;
@@ -166,6 +228,10 @@ public class Pattern<T> {
}
}
/**
* Gets the head PatternNode, for use in matching
* @return the pattern node.
*/
public PatternNode<T> getHead() {
return head;
}

View File

@@ -1,23 +1,52 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A chain of nodes that can be treated as a single unit.
* Used during pattern compilation.
* @param <T> the type used to identify which pattern has been matched.
*/
public class PatternChain<T> {
/**
* The head node of the chain.
*/
public PatternNode<T> head;
/**
* The tail node of the chain.
*/
public PatternNode<T> tail;
/**
* Creates a new chain with the given start and end.
* @param head the start of the chain.
* @param tail the end of the chain.
*/
public PatternChain(PatternNode<T> head, PatternNode<T> tail){
this.head = head;
this.tail = tail;
}
/**
* Creates a chain that starts and ends with the same node.
* @param node the node to use.
*/
public PatternChain(PatternNode<T> node){
this(node, node);
}
/**
* Creates an empty chain.
*/
public PatternChain(){
this(null);
}
/**
* Appends the other chain to this one. This modifies
* the nodes, as well.
* If this chain is empty, it is set to the other.
* @param other the other chain to append.
*/
public void append(PatternChain<T> other){
if(other.head == null || tail == null) {
this.head = other.head;
@@ -28,6 +57,12 @@ public class PatternChain<T> {
}
}
/**
* Appends a single node to this chain. This modifies
* the nodes, as well.
* If this chain is empty, it is set to the node.
* @param node the node to append to this chain.
*/
public void append(PatternNode<T> node){
if(tail == null){
head = tail = node;

View File

@@ -4,26 +4,58 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
/**
* A base class for a pattern node. Provides all functions
* necessary for matching, and is constructed by a Pattern instance
* from a string.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class PatternNode<T> {
/**
* The set of states to which the lexer should continue
* should this node be correctly matched.
*/
protected HashSet<PatternNode<T>> outputStates;
/**
* Creates a new pattern node.
*/
public PatternNode(){
outputStates = new HashSet<>();
}
/**
* Determines whether the current input character can
* be matched by this node.
* @param other the character being matched.
* @return true if the character can be matched, false otherwise.
*/
public boolean matches(char other){
return false;
}
/**
* If this node can be used as part of a range, returns that value.
* @return a NULL terminator if this character cannot be converted
* into a range bound, or the appropriate range bound if it can.
*/
public char range(){
return '\0';
}
/**
* Adds this node in a collection of other nodes.
* @param into the collection to add into.
*/
public void addInto(Collection<PatternNode<T>> into){
into.add(this);
}
/**
* Adds the node's children into a collection of other nodes.
* @param into the collection to add into.
*/
public void addOutputsInto(Collection<PatternNode<T>> into){
outputStates.forEach(e -> e.addInto(into));
}

View File

@@ -1,10 +1,25 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A node that matches a range of characters.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class RangeNode<T> extends PatternNode<T> {
/**
* The bottom bound of the range, inclusive.
*/
private char from;
/**
* The top bound of the range, inclusive.
*/
private char to;
/**
* Creates a new range node from the given range.
* @param from the bottom bound of the range.
* @param to the top bound of hte range.
*/
public RangeNode(char from, char to){
this.from = from;
this.to = to;

View File

@@ -1,9 +1,20 @@
package org.nwapw.abacus.lexing.pattern;
/**
* A node that matches a single value.
* @param <T> the type that's used to tell which pattern this node belongs to.
*/
public class ValueNode<T> extends PatternNode<T> {
/**
* The value this node matches.
*/
private char value;
/**
* Creates a new node that matches the given character.
* @param value
*/
public ValueNode(char value){
this.value = value;
}