diff --git a/CMakeLists.txt b/CMakeLists.txt index 7148537..f5bd0d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.7) project(liblex) -add_library(lex STATIC src/pattern.c src/pairmap.c) +add_library(lex STATIC src/pattern.c src/pairmap.c src/eval.c) add_executable(liblex src/main.c) add_subdirectory(external/libds) diff --git a/include/eval.h b/include/eval.h new file mode 100644 index 0000000..a5e987b --- /dev/null +++ b/include/eval.h @@ -0,0 +1,119 @@ +#ifndef LIBLEX_EVAL_H +#define LIBLEX_EVAL_H + +#include "pattern.h" +#include "pairmap.h" +#include "ll.h" + +/** + * A pattern match. + * During lexing / evaluation, structs of this type + * are used to represent "matched" hypohteses. + */ +struct match_s { + int from; + int to; + int pattern; +}; + +/** + * Used in matching a single word / lexeme / etc + * from some input. + */ +struct eval_s { + /** + * The input being evaluated. + */ + char* string; + /** + * The current index in the input. + */ + int index; + /** + * The index where the valuation began. + */ + int begin; + /** + * The number of nodes / states matched in + * the last iteration. + */ + int matched; + /** + * Set A of the two sets of pattern nodes / states. + */ + ht set_a; + /** + * Set B of the two sets of pattern nodes / states. + */ + ht set_b; + /** + * The matches that were found so far. + */ + ll matches; + + /** + * The current set of states. + */ + ht* set_current; + /** + * The next set of states. + */ + ht* set_next; +}; + +/** + * Configuration for evaluation. + * This contains a list of states that should be started with when + * the evaluation of a single token begins. + */ +struct eval_config_s { + /** + * The inital list of states to be checked. + */ + ll states; +}; + +typedef struct match_s match; +typedef struct eval_s eval; +typedef struct eval_config_s eval_config; + +/** + * Initializes the evaluation configuration. + * @param config the configuration to initialize. + */ +void eval_config_init(eval_config* config); +/** + * Frees data allocated by an evaluation configuration. + * @param config the configuration file to free. + * @return LIBLEX_SUCCESS if all goes well, or LIBLEX_MALLC if there was an allocaton failure. + */ +liblex_result eval_config_free(eval_config* config); +/** + * Adds a new pattern to the configuration, with a given pattern ID. + * @param config the configuration to add the pattern to. + * @param pattern the pattern to add + * @param pattern_id the id to associate with the pattern + * @return LIBLEX_SUCCESS if all goes well, or LIBLEX_MALLOC if there was an allocation failure. + */ +liblex_result eval_config_add(eval_config* config, char* pattern, int pattern_id); +/** + * Evaluates / finds a single word. + * @param string the string to evaluate. + * @param index the index to start at. + * @param config the configuration to use + * @param match pointer to where to store the newly created match. + * @return LIBLEX_SUCCESS if all goes well, or LIBLEX_MALLOC if there was an allocation failure. + */ +liblex_result eval_word(char* string, int index, eval_config* config, match* match); +/** + * Evaluates input starting at the index until it reaches the null terminator, + * adding the best matches to the linked list. + * @param string the string to evaluate. + * @param index the index to start at + * @param config the config to use + * @param matches the linked list to populate with matches + * @return LIBLEX_SUCCESS if all goes well, or LIBLEX_MALLOC if there was an allocation failure. + */ +liblex_result eval_all(char* string, int index, eval_config* config, ll* matches); + +#endif diff --git a/src/eval.c b/src/eval.c new file mode 100644 index 0000000..ec08fcd --- /dev/null +++ b/src/eval.c @@ -0,0 +1,186 @@ +#include "eval.h" +#include +#include +#include "pattern.h" +#include "pairmap.h" + +#define EVAL_FOREACH_MALLOC 1 + +static liblex_result foreach_errors[2] = { + LIBLEX_SUCCESS, + LIBLEX_MALLOC +}; + +void eval_config_init(eval_config* config){ + ll_init(&config->states); +} +liblex_result eval_config_add(eval_config* config, char* ptrn, int pattern_id){ + liblex_result result; + pattern* new_pattern = malloc(sizeof(*new_pattern)); + if(new_pattern){ + result = pattern_compile(new_pattern, ptrn, pattern_id); + if(result == LIBLEX_SUCCESS){ + result = (ll_append(&config->states, new_pattern) == LIBDS_SUCCESS) ? + LIBLEX_SUCCESS : LIBLEX_MALLOC; + } + if(result != LIBLEX_SUCCESS){ + pattern_free(new_pattern); + free(new_pattern); + } + } else { + result = LIBLEX_MALLOC; + } + return result; +} + +int eval_foreach_match_free(void* data, va_list args) { + free(data); + return 0; +} + +int eval_config_foreach_free(void* data, va_list args){ + pattern* ptrn = data; + int result = pattern_free(ptrn) == LIBLEX_SUCCESS ? 0 : EVAL_FOREACH_MALLOC; + free(ptrn); + return result; +} + +liblex_result eval_config_free(eval_config* config){ + liblex_result result = foreach_errors[ll_foreach(&config->states, NULL, compare_always, eval_config_foreach_free)]; + ll_free(&config->states); + return result; +} + +pattern_node* eval_pattern_node_get_next(pattern_node* node){ + pattern_node* to_return = NULL; + if(node->type == PNODE_VALUE){ + to_return = node->data_u.value_s.next; + } else if(node->type == PNODE_ANY){ + to_return = node->data_u.any_s.next; + } else if(node->type == PNODE_RANGE){ + to_return = node->data_u.range_s.next; + } else if(node->type == PNODE_CONNECT){ + to_return = node->data_u.connect_s.next; + } + return to_return; +} + +liblex_result eval_pairmap_add_node(ht* table, pattern_node* node){ + liblex_result result = LIBLEX_SUCCESS; + + if(node->type == PNODE_ANY || node->type == PNODE_VALUE || node->type == PNODE_RANGE || + node->type == PNODE_END){ + pairmap_key tmp_key; + result = PAIRMAP_PUT(table, &tmp_key, node->pattern_id, node->id, node) == LIBDS_SUCCESS ? LIBLEX_SUCCESS : LIBLEX_MALLOC; + } else if(node->type == PNODE_CONNECT){ + result = eval_pairmap_add_node(table, eval_pattern_node_get_next(node)); + } else if(node->type == PNODE_FORK){ + result = eval_pairmap_add_node(table, node->data_u.fork_s.left); + if(result == LIBDS_SUCCESS){ + result = eval_pairmap_add_node(table, node->data_u.fork_s.right); + } + } + return result; +} + +int eval_foreach_add_node(void* data, va_list args){ + pattern* new_pattern = data; + ht* pairmap = va_arg(args, ht*); + pattern_node* pattern_head = new_pattern->head; + return eval_pairmap_add_node(pairmap, pattern_head) == LIBLEX_SUCCESS? 0 : EVAL_FOREACH_MALLOC; +} + +int eval_node_matches(pattern_node* node, eval* eval){ + int matches = 0; + char value = eval->string[eval->index]; + if(node->type == PNODE_ANY){ + matches = value != '\0'; + } else if(node->type == PNODE_VALUE){ + matches = value == node->data_u.value_s.value; + } else if(node->type == PNODE_RANGE) { + matches = value >= node->data_u.range_s.from && value <= node->data_u.range_s.to; + } + return matches; +} + +int eval_foreach_check_state(void* data, va_list args){ + int return_code = 0; + pattern_node* node = data; + eval* evl = va_arg(args, eval*); + if(eval_node_matches(node, evl)){ + return_code = eval_pairmap_add_node(evl->set_next, eval_pattern_node_get_next(node)) == LIBLEX_SUCCESS ? 0 : EVAL_FOREACH_MALLOC; + if(return_code == 0){ + evl->matched++; + } + } else if(node->type == PNODE_END){ + match* new_match = malloc(sizeof(*new_match)); + if(new_match){ + new_match->from = evl->begin; + new_match->to = evl->index; + new_match->pattern = node->pattern_id; + + return_code = ll_append(&evl->matches, new_match) == LIBLEX_SUCCESS ? 0 : EVAL_FOREACH_MALLOC; + if(return_code){ + free(new_match); + } + } else { + return_code = EVAL_FOREACH_MALLOC; + } + } + return return_code; +} + +liblex_result eval_step(eval* eval){ + ht* tmp; + eval->matched = 0; + liblex_result result = foreach_errors[ht_foreach(eval->set_current, NULL, compare_always, eval_foreach_check_state, eval)]; + + tmp = eval->set_current; + eval->set_current = eval->set_next; + eval->set_next = tmp; + eval->index++; + + ht_free(eval->set_next); + pairmap_init_ht(eval->set_next); + return result; +} + +liblex_result eval_word(char* string, int index, eval_config* config, match* mtch){ + liblex_result result = LIBLEX_SUCCESS; + eval evl; + evl.index = evl.begin = index; + evl.string = string; + evl.matched = 0; + pairmap_init_ht(&evl.set_a); + pairmap_init_ht(&evl.set_b); + ll_init(&evl.matches); + evl.set_current = &evl.set_a; + evl.set_next = &evl.set_b; + + ll_foreach(&config->states, NULL, compare_always, eval_foreach_add_node, evl.set_current); + do { + eval_step(&evl); + } while(evl.matched); + + if(evl.matches.tail){ + match* largest_match = evl.matches.tail->data; + mtch->from = largest_match->from; + mtch->to = largest_match->to; + mtch->pattern = mtch->pattern; + } else { + mtch->from = -1; + mtch->to = -1; + mtch->pattern = -1; + } + + ll_foreach(&evl.matches, NULL, compare_always, eval_foreach_match_free); + ll_free(&evl.matches); + ht_free(&evl.set_a); + ht_free(&evl.set_b); + + return result; +} +liblex_result eval_all(char* string, int index, eval_config* config, ll* matches){ + liblex_result result = LIBLEX_SUCCESS; + return result; +}