Implement matching strings + finding groups.

They're done in the same step.

result = _regex_find_all(root, &clear_ll, -1);
ll_clear(&clear_ll);

Is all that's necessary to reset all nodes, since the only value
that changes during the match is list_id, and that gets set when the
nodes are found.
This commit is contained in:
Danila Fedorin 2017-01-08 00:04:31 -08:00
parent 06fe67b226
commit fbb0635b26
2 changed files with 129 additions and 2 deletions

View File

@ -300,6 +300,7 @@ void regex_node_clear(regex_node* node);
/**
* Frees an entire NFA, starting from the given root node.
* @param root the root, or starting node, of the NFA.
* @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code.
*/
libregex_result regex_free(regex_node* root);
/**
@ -317,5 +318,11 @@ libregex_result regex_build(regex_node** root, char* expression);
* @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code.
*/
libregex_result regex_match_string(regex_node* root, char* string, regex_result* result);
/**
* Frees data used by regex_match_string in a regex_result struct.
* The actual struct is not freed.
* @param result the result struct
*/
void regex_result_free(regex_result* result);
#endif

View File

@ -1,6 +1,7 @@
#include "libregex.h"#include "libregex.h"
#include "libregex.h"
#include <stdlib.h>
#include <string.h>
#include <libregex.h>
#include "ll.h"
int _regex_node_foreach_free(void* data, va_list args){
@ -477,4 +478,123 @@ libregex_result regex_build(regex_node** root, char* expression){
return result;
}
libregex_result regex_match_string(regex_node* root, char* string, regex_result* result);
int _regex_node_matches(regex_node* node, char c){
int matches = 0;
if(node->type == REGEX_VALUE){
matches = node->data_u.value_s.value == c;
} else if(node->type == REGEX_RANGE){
matches = node->data_u.range_s.from <= c && c <= node->data_u.range_s.to;
} else if(node->type == REGEX_ANY){
matches = c != '\0';
}
return matches;
}
void _regex_node_add(regex_node* node, regex_list* list){
if(node && node->list_id < list->id && list->size < LIBREGEX_MAX_NODE_COUNT){
node->list_id = list->id;
if(node->type == REGEX_CONNECT){
_regex_node_add(node->data_u.connect_s.next, list);
} else if(node->type == REGEX_FORK){
_regex_node_add(node->data_u.fork_s.left, list);
_regex_node_add(node->data_u.fork_s.right, list);
} else if(!(node->type == REGEX_CLEAR)){
if(node->type == REGEX_GROUP){
_regex_node_add(node->data_u.group_s.next, list);
}
list->nodes[list->size++] = node;
}
}
}
libregex_result _regex_step(regex_sim* sim){
libregex_result result = LIBREGEX_SUCCESS;
regex_list* swap_temp;
int index = 0;
for(; index < sim->current->size; index++){
regex_node* current = sim->current->nodes[index];
if(_regex_node_matches(current, sim->string[sim->index])){
regex_node** next = _regex_node_get_next(current);
if(next){
_regex_node_add(*next, sim->next);
}
} else if(current->type == REGEX_END){
sim->result->matches = 1;
} else if(current->type == REGEX_GROUP && sim->string[sim->index - 1 + current->data_u.group_s.open] != '\0'){
int* to_set = (current->data_u.group_s.open) ?
&(sim->groups[current->data_u.group_s.id - 1].from) :
&(sim->groups[current->data_u.group_s.id - 1].to);
*to_set = sim->index - 1 + current->data_u.group_s.open;
if(current->data_u.group_s.open == 0 && current->data_u.group_s.id < LIBREGEX_MAX_GROUP_COUNT){
regex_match* new_match = sim->result->groups[current->data_u.group_s.id - 1];
if(new_match == NULL){
new_match = malloc(sizeof(*new_match));
}
if(new_match){
new_match->from = sim->groups[current->data_u.group_s.id - 1].from;
new_match->to = sim->groups[current->data_u.group_s.id - 1].to;
sim->result->groups[current->data_u.group_s.id - 1] = new_match;
} else {
result = LIBREGEX_MALLOC;
}
}
}
}
swap_temp = sim->current;
sim->current = sim->next;
sim->next = swap_temp;
sim->next->size = 0;
sim->next->id += 2;
return result;
}
libregex_result regex_match_string(regex_node* root, char* string, regex_result* build_result){
ll clear_ll;
libregex_result result;
regex_sim sim;
regex_list list_a;
regex_list list_b;
list_a.size = 0;
list_a.id = 0;
list_b.size = 0;
list_b.id = 1;
build_result->matches = 0;
memset(&build_result->groups, 0, sizeof(build_result->groups));
memset(&sim.groups, 0, sizeof(sim.groups));
sim.current = &list_a;
sim.next = &list_b;
sim.result = build_result;
sim.string = string;
sim.index = 0;
_regex_node_add(root, &list_a);
do {
result = _regex_step(&sim);
} while(sim.string[sim.index++] != '\0' && result == LIBREGEX_SUCCESS);
if(result == LIBREGEX_SUCCESS){
ll_init(&clear_ll);
result = _regex_find_all(root, &clear_ll, -1);
ll_clear(&clear_ll);
}
return result;
}
void regex_result_free(regex_result* result){
int index = 0;
for(; index < LIBREGEX_MAX_GROUP_COUNT; index++){
free(result->groups[index]);
}
result->matches = 0;
}