Add struct declarations to libregex.h, and also typedef them.
This commit is contained in:
parent
ad5fade9f6
commit
7f370983f2
|
@ -1,4 +1,295 @@
|
|||
#ifndef LIBREGEX_HEADER
|
||||
#define LIBREGEX_HEADER
|
||||
|
||||
#define LIBREGEX_MAX_NODE_COUNT 64
|
||||
#define LIBREGEX_MAX_GROUP_COUNT 64
|
||||
|
||||
/**
|
||||
* Enum used to represent the result
|
||||
* of error prone libregex functions.
|
||||
*/
|
||||
enum libregex_result_e {
|
||||
/**
|
||||
* Represents a successful operation. All went well.
|
||||
*/
|
||||
LIBREGEX_SUCCESS,
|
||||
/**
|
||||
* Represents an invalid regular expression.
|
||||
*/
|
||||
LIBREGEX_INVALID,
|
||||
/**
|
||||
* Represents an allocation failure.
|
||||
*/
|
||||
LIBREGEX_MALLOC
|
||||
};
|
||||
|
||||
/**
|
||||
* An enum to represent the tagged union that is
|
||||
* a regular expression NFA node.
|
||||
*/
|
||||
enum regex_node_type_e {
|
||||
/**
|
||||
* Represents a node whose value was not yet set.
|
||||
* This is usually the case after the initial creation of the node
|
||||
*/
|
||||
REGEX_CLEAR,
|
||||
/**
|
||||
* Represents a value node.
|
||||
* The value node matches a single character from the input.
|
||||
*/
|
||||
REGEX_VALUE,
|
||||
/**
|
||||
* Represents a range node.
|
||||
* The range node matches a range of characters from the input.
|
||||
*/
|
||||
REGEX_RANGE,
|
||||
/**
|
||||
* Represents an "any" node.
|
||||
* The any node matches any character from the input.
|
||||
*/
|
||||
REGEX_ANY,
|
||||
/**
|
||||
* Represents a connection node, only used for structural
|
||||
* purposes.
|
||||
*/
|
||||
REGEX_CONNECT,
|
||||
/**
|
||||
* Represents a fork node.
|
||||
* This node is used for structural purposes, but, unlike the
|
||||
* connect node, can transition into two NFA nodes.
|
||||
*/
|
||||
REGEX_FORK,
|
||||
/**
|
||||
* Represents the beginning or end of a group.
|
||||
* Beyond its use for matching substrings, the group node is treated
|
||||
* like a connection node.
|
||||
*/
|
||||
REGEX_GROUP,
|
||||
/**
|
||||
* Represents the end of the NFA, and a successful match.
|
||||
*/
|
||||
REGEX_END
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct representing a single NFA node.
|
||||
*/
|
||||
struct regex_node_s {
|
||||
/**
|
||||
* The type of the NFA node.
|
||||
*/
|
||||
enum regex_node_type_e type;
|
||||
/**
|
||||
* The ID of the list this node was last added into.
|
||||
* The list ID represents the last "state" list this node
|
||||
* was added to, to prevent the node being checked multiple times
|
||||
* during matching.
|
||||
*
|
||||
* Outside of matching, this is also used for iteration over NFA nodes:
|
||||
* once a node is iterated over, its list ID is set to a certain value
|
||||
* believed to be unique during the iteration so that it is not re-checked.
|
||||
*/
|
||||
int list_id;
|
||||
|
||||
/**
|
||||
* The union part of the "tagged union".
|
||||
* data_u represents the possible data types
|
||||
* that the node can represent.
|
||||
*/
|
||||
union {
|
||||
/**
|
||||
* Represents data carried by a value node.
|
||||
*/
|
||||
struct {
|
||||
/**
|
||||
* The value the node matches.
|
||||
*/
|
||||
char value;
|
||||
/**
|
||||
* The next node in the NFA.
|
||||
*/
|
||||
struct regex_node_s* next;
|
||||
} value_s;
|
||||
|
||||
/**
|
||||
* Represents data carried by a value node.
|
||||
*/
|
||||
struct {
|
||||
/**
|
||||
* Represents the bottom bounds of the range, inclusive.
|
||||
*/
|
||||
char from;
|
||||
/**
|
||||
* Represents the top bounds of the range, inclusive.
|
||||
*/
|
||||
char to;
|
||||
/**
|
||||
* The next node in the NFA.
|
||||
*/
|
||||
struct regex_node_s* next;
|
||||
} range_s;
|
||||
|
||||
/**
|
||||
* Represents data carried by an "any" node.
|
||||
*/
|
||||
struct {
|
||||
/**
|
||||
* The next node in the NFA.
|
||||
*/
|
||||
struct regex_node_s* next;
|
||||
} any_s;
|
||||
|
||||
/**
|
||||
* Represents data carried by a connection node.
|
||||
*/
|
||||
struct {
|
||||
/**
|
||||
* The next node in the NFA.
|
||||
*/
|
||||
struct regex_node_s* next;
|
||||
} connect_s;
|
||||
|
||||
/**
|
||||
* Represents data carried by a fork node.
|
||||
*/
|
||||
struct {
|
||||
/**
|
||||
* The first next node in the NFA.
|
||||
*/
|
||||
struct regex_node_s* left;
|
||||
/**
|
||||
* The second next node in the NFA.
|
||||
*/
|
||||
struct regex_node_s* right;
|
||||
} fork_s;
|
||||
|
||||
/**
|
||||
* Represents data carried by a group node.
|
||||
*/
|
||||
struct {
|
||||
/**
|
||||
* Boolean, whether this is the beginning or end of a group.
|
||||
*/
|
||||
int open;
|
||||
/**
|
||||
* The id of the group.
|
||||
*/
|
||||
int id;
|
||||
/**
|
||||
* The other group node of the same ID.
|
||||
*/
|
||||
struct regex_node_s* other;
|
||||
/**
|
||||
* The next node in the NFA.
|
||||
*/
|
||||
struct regex_node_s* next;
|
||||
} group_s;
|
||||
} data_u;
|
||||
};
|
||||
|
||||
/**
|
||||
* Represents a small NFA to be treated as a unit.
|
||||
*/
|
||||
struct regex_chain_s {
|
||||
/**
|
||||
* The first node in the NFA chain.
|
||||
*/
|
||||
struct regex_node_s* head;
|
||||
/**
|
||||
* The last node in the NFA chain.
|
||||
*/
|
||||
struct regex_node_s* tail;
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct that represents a list of nodes currently being
|
||||
* checked against input.
|
||||
*/
|
||||
struct regex_list_s {
|
||||
/**
|
||||
* The ID of the list is used together with
|
||||
* regex_node_s' list_id to prevent multiple
|
||||
* pointers to the same node in the list.
|
||||
*/
|
||||
int id;
|
||||
/**
|
||||
* The number of nodes currently in the list.
|
||||
*/
|
||||
int size;
|
||||
/**
|
||||
* The list of nodes.
|
||||
*/
|
||||
struct regex_node_s* nodes[LIBREGEX_MAX_NODE_COUNT];
|
||||
};
|
||||
|
||||
/**
|
||||
* Represents a single matched group.
|
||||
*/
|
||||
struct regex_match_s {
|
||||
/**
|
||||
* The starting index of the match in the string, inclusive.
|
||||
*/
|
||||
int from;
|
||||
/**
|
||||
* The ending index of the match in the string, inclusive.
|
||||
*/
|
||||
int to;
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct that represents he result of running a match.
|
||||
*/
|
||||
struct regex_result_s {
|
||||
/**
|
||||
* Boolean, whether the regular expression matched or not.
|
||||
*/
|
||||
int matches;
|
||||
/**
|
||||
* List of groups that were matched successfully.
|
||||
*/
|
||||
struct regex_match_s* groups[LIBREGEX_MAX_GROUP_COUNT];
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct that represents data used to match
|
||||
* a string against a regular expression.
|
||||
*/
|
||||
struct regex_sim_s {
|
||||
/**
|
||||
* The string being matched.
|
||||
*/
|
||||
char* string;
|
||||
/**
|
||||
* The index of the string currently being inspected.
|
||||
*/
|
||||
int index;
|
||||
|
||||
/**
|
||||
* The current list of states ready to be matched.
|
||||
*/
|
||||
struct regex_list_s* current;
|
||||
/**
|
||||
* The list of states to be matched in the next index.
|
||||
*/
|
||||
struct regex_list_s* next;
|
||||
|
||||
/**
|
||||
* The result struct being built during the match.
|
||||
*/
|
||||
struct regex_result_s* result;
|
||||
/**
|
||||
* A list of groups potentially being constructed.
|
||||
*/
|
||||
struct regex_match_s groups[LIBREGEX_MAX_GROUP_COUNT];
|
||||
};
|
||||
|
||||
typedef enum libregex_result_e libregex_result;
|
||||
typedef enum regex_node_type_e regex_node_type;
|
||||
typedef struct regex_node_s regex_node;
|
||||
typedef struct regex_chain_s regex_chain;
|
||||
typedef struct regex_list_s regex_list;
|
||||
typedef struct regex_match_s regex_match;
|
||||
typedef struct regex_result_s regex_result;
|
||||
typedef struct regex_sim_s regex_sim;
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user