diff --git a/include/libregex.h b/include/libregex.h index 28339a5..9586972 100644 --- a/include/libregex.h +++ b/include/libregex.h @@ -300,6 +300,7 @@ void regex_node_clear(regex_node* node); /** * Frees an entire NFA, starting from the given root node. * @param root the root, or starting node, of the NFA. + * @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code. */ libregex_result regex_free(regex_node* root); /** @@ -317,5 +318,11 @@ libregex_result regex_build(regex_node** root, char* expression); * @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code. */ libregex_result regex_match_string(regex_node* root, char* string, regex_result* result); +/** + * Frees data used by regex_match_string in a regex_result struct. + * The actual struct is not freed. + * @param result the result struct + */ +void regex_result_free(regex_result* result); #endif \ No newline at end of file diff --git a/src/libregex.c b/src/libregex.c index f485e4b..38aea20 100644 --- a/src/libregex.c +++ b/src/libregex.c @@ -1,6 +1,7 @@ -#include "libregex.h"#include "libregex.h" +#include "libregex.h" #include #include +#include #include "ll.h" int _regex_node_foreach_free(void* data, va_list args){ @@ -477,4 +478,123 @@ libregex_result regex_build(regex_node** root, char* expression){ return result; } -libregex_result regex_match_string(regex_node* root, char* string, regex_result* result); \ No newline at end of file + +int _regex_node_matches(regex_node* node, char c){ + int matches = 0; + if(node->type == REGEX_VALUE){ + matches = node->data_u.value_s.value == c; + } else if(node->type == REGEX_RANGE){ + matches = node->data_u.range_s.from <= c && c <= node->data_u.range_s.to; + } else if(node->type == REGEX_ANY){ + matches = c != '\0'; + } + return matches; +} + +void _regex_node_add(regex_node* node, regex_list* list){ + if(node && node->list_id < list->id && list->size < LIBREGEX_MAX_NODE_COUNT){ + node->list_id = list->id; + if(node->type == REGEX_CONNECT){ + _regex_node_add(node->data_u.connect_s.next, list); + } else if(node->type == REGEX_FORK){ + _regex_node_add(node->data_u.fork_s.left, list); + _regex_node_add(node->data_u.fork_s.right, list); + } else if(!(node->type == REGEX_CLEAR)){ + if(node->type == REGEX_GROUP){ + _regex_node_add(node->data_u.group_s.next, list); + } + list->nodes[list->size++] = node; + } + } +} + +libregex_result _regex_step(regex_sim* sim){ + libregex_result result = LIBREGEX_SUCCESS; + regex_list* swap_temp; + int index = 0; + for(; index < sim->current->size; index++){ + regex_node* current = sim->current->nodes[index]; + if(_regex_node_matches(current, sim->string[sim->index])){ + regex_node** next = _regex_node_get_next(current); + if(next){ + _regex_node_add(*next, sim->next); + } + } else if(current->type == REGEX_END){ + sim->result->matches = 1; + } else if(current->type == REGEX_GROUP && sim->string[sim->index - 1 + current->data_u.group_s.open] != '\0'){ + int* to_set = (current->data_u.group_s.open) ? + &(sim->groups[current->data_u.group_s.id - 1].from) : + &(sim->groups[current->data_u.group_s.id - 1].to); + *to_set = sim->index - 1 + current->data_u.group_s.open; + + if(current->data_u.group_s.open == 0 && current->data_u.group_s.id < LIBREGEX_MAX_GROUP_COUNT){ + regex_match* new_match = sim->result->groups[current->data_u.group_s.id - 1]; + if(new_match == NULL){ + new_match = malloc(sizeof(*new_match)); + } + + if(new_match){ + new_match->from = sim->groups[current->data_u.group_s.id - 1].from; + new_match->to = sim->groups[current->data_u.group_s.id - 1].to; + sim->result->groups[current->data_u.group_s.id - 1] = new_match; + } else { + result = LIBREGEX_MALLOC; + } + } + } + } + + swap_temp = sim->current; + sim->current = sim->next; + sim->next = swap_temp; + + sim->next->size = 0; + sim->next->id += 2; + + return result; +} + +libregex_result regex_match_string(regex_node* root, char* string, regex_result* build_result){ + ll clear_ll; + libregex_result result; + regex_sim sim; + regex_list list_a; + regex_list list_b; + + list_a.size = 0; + list_a.id = 0; + + list_b.size = 0; + list_b.id = 1; + + build_result->matches = 0; + memset(&build_result->groups, 0, sizeof(build_result->groups)); + + memset(&sim.groups, 0, sizeof(sim.groups)); + sim.current = &list_a; + sim.next = &list_b; + sim.result = build_result; + sim.string = string; + sim.index = 0; + + _regex_node_add(root, &list_a); + + do { + result = _regex_step(&sim); + } while(sim.string[sim.index++] != '\0' && result == LIBREGEX_SUCCESS); + + if(result == LIBREGEX_SUCCESS){ + ll_init(&clear_ll); + result = _regex_find_all(root, &clear_ll, -1); + ll_clear(&clear_ll); + } + + return result; +} +void regex_result_free(regex_result* result){ + int index = 0; + for(; index < LIBREGEX_MAX_GROUP_COUNT; index++){ + free(result->groups[index]); + } + result->matches = 0; +} \ No newline at end of file