From e86518b7167ab67455364bd0ab79df1d8278b070 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Thu, 19 Jan 2017 19:34:23 -0800 Subject: [PATCH] Write a general header file and a header file for regular expressions. --- include/liblex.h | 24 +++++++ include/pattern.h | 169 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 include/liblex.h create mode 100644 include/pattern.h diff --git a/include/liblex.h b/include/liblex.h new file mode 100644 index 0000000..63ee7ba --- /dev/null +++ b/include/liblex.h @@ -0,0 +1,24 @@ +#ifndef LIBLEX_H +#define LIBLEX_H + +/** + * Represents a result of a liblex operation. + */ +enum liblex_result_e { + /** + * Represents a succesful operation. + */ + LIBLEX_SUCCESS, + /** + * Represents a memory allocation failure. + */ + LIBLEX_MALLOC, + /** + * Represents an invalid pattern. + */ + LIBLEX_INVALID +}; + +typedef enum liblex_result_e liblex_result; + +#endif diff --git a/include/pattern.h b/include/pattern.h new file mode 100644 index 0000000..b512d9d --- /dev/null +++ b/include/pattern.h @@ -0,0 +1,169 @@ +#ifndef LIBLEX_PATTERN_H +#define LIBLEX_PATTERN_H + +#include "liblex.h" + +/** + * Enum that represents the type + * of a NFA node / state. + */ +enum pattern_node_type_e { + /** + * Represents a clear, un-configured node. + */ + PNODE_CLEAR, + /** + * Represents a single value to be matched. + */ + PNODE_VALUE, + /** + * Represents a range of values, inclusive. + */ + PNODE_RANGE, + /** + * Represents any character other than the NULL terminator. + */ + PNODE_ANY, + /** + * Represents a "connection" between nodes, does not + * match any value. + * */ + PNODE_CONNECT, + /** + * Represents a fork / divide in possible "next" states. + */ + PNODE_FORK, + /** + * Represents a succesfully matched pattern. + */ + PNODE_END +}; + +/** + * Struct representing a single NFA node / state. + */ +struct pattern_node_s { + /** + * The type of this node. + */ + enum pattern_node_type_e type; + /** + * The id of this node. + * The id is unique within the pattern. + */ + int id; + + /** + * The node's data that varies based on type. + */ + union { + /** + * Data for a "value" node. + */ + struct { + /** + * The value this node matches. + */ + char value; + /** + * The next state in the pattern. + */ + struct pattern_node_s* next; + } value_s; + + /** + * Data for a "range" node. + */ + struct { + /** + * The beginning of the range, inclusive, of matched values. + */ + char from; + /** + * The end of the range, inclusive, of matched values. + */ + char to; + /** + * The next state in the pattern. + */ + struct pattern_node_s* next; + } range_s; + + /** + * Data for an "any" node. + */ + struct { + /** + * The next state in the pattern. + */ + struct pattern_node_s* next; + } any_s; + + /** + * Data for a "connect" node. + */ + struct { + /** + * The next node in the pattern. + */ + struct pattern_node_s* next; + } connect_s; + + /** + * Data for a "fork" node. + */ + struct { + /** + * The left node in the fork. + */ + struct pattern_node_s* left; + /** + * The right node in the fork. + */ + struct pattern_node_s* right; + } fork_s; + + /** + * Data for an "end" node. + */ + struct { + /** + * The ID of the pattern that just finished matching. + */ + int pattern_id; + } end_s; + } data_u; +}; + +/** + * A chain of pattern nodes. + */ +struct pattern_chain_s { + /** + * The beginning of the chain, its first state. + */ + struct pattern_node_s* head; + /** + * The end of the chain, its last state. + */ + struct pattern_node_s* tail; +}; + +typedef enum pattern_node_type_e pattern_node_type; +typedef struct pattern_node_s pattern_node; +typedef struct pattern_chain_s pattern_chain; + +/** + * Compiles a string representation of a pattern into a pattern NFA, + * and stores the first node of the new NFA in root. + * @param root the node to store the resulting pattern into. + * @param expression the pattern represented as a string. + * @return LIBLEX_SUCCESS if all goes well, otherwise some other liblex_result. + */ +liblex_result pattern_compile(pattern_node** root, char* expression); +/** + * Frees a pattern NFA allocated by pattern_compile. + */ +void pattern_free(pattern_node* root); + +#endif