Implement matching strings + finding groups.
They're done in the same step. result = _regex_find_all(root, &clear_ll, -1); ll_clear(&clear_ll); Is all that's necessary to reset all nodes, since the only value that changes during the match is list_id, and that gets set when the nodes are found.
This commit is contained in:
parent
06fe67b226
commit
fbb0635b26
|
@ -300,6 +300,7 @@ void regex_node_clear(regex_node* node);
|
|||
/**
|
||||
* Frees an entire NFA, starting from the given root node.
|
||||
* @param root the root, or starting node, of the NFA.
|
||||
* @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code.
|
||||
*/
|
||||
libregex_result regex_free(regex_node* root);
|
||||
/**
|
||||
|
@ -317,5 +318,11 @@ libregex_result regex_build(regex_node** root, char* expression);
|
|||
* @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code.
|
||||
*/
|
||||
libregex_result regex_match_string(regex_node* root, char* string, regex_result* result);
|
||||
/**
|
||||
* Frees data used by regex_match_string in a regex_result struct.
|
||||
* The actual struct is not freed.
|
||||
* @param result the result struct
|
||||
*/
|
||||
void regex_result_free(regex_result* result);
|
||||
|
||||
#endif
|
124
src/libregex.c
124
src/libregex.c
|
@ -1,6 +1,7 @@
|
|||
#include "libregex.h"#include "libregex.h"
|
||||
#include "libregex.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <libregex.h>
|
||||
#include "ll.h"
|
||||
|
||||
int _regex_node_foreach_free(void* data, va_list args){
|
||||
|
@ -477,4 +478,123 @@ libregex_result regex_build(regex_node** root, char* expression){
|
|||
|
||||
return result;
|
||||
}
|
||||
libregex_result regex_match_string(regex_node* root, char* string, regex_result* result);
|
||||
|
||||
int _regex_node_matches(regex_node* node, char c){
|
||||
int matches = 0;
|
||||
if(node->type == REGEX_VALUE){
|
||||
matches = node->data_u.value_s.value == c;
|
||||
} else if(node->type == REGEX_RANGE){
|
||||
matches = node->data_u.range_s.from <= c && c <= node->data_u.range_s.to;
|
||||
} else if(node->type == REGEX_ANY){
|
||||
matches = c != '\0';
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
void _regex_node_add(regex_node* node, regex_list* list){
|
||||
if(node && node->list_id < list->id && list->size < LIBREGEX_MAX_NODE_COUNT){
|
||||
node->list_id = list->id;
|
||||
if(node->type == REGEX_CONNECT){
|
||||
_regex_node_add(node->data_u.connect_s.next, list);
|
||||
} else if(node->type == REGEX_FORK){
|
||||
_regex_node_add(node->data_u.fork_s.left, list);
|
||||
_regex_node_add(node->data_u.fork_s.right, list);
|
||||
} else if(!(node->type == REGEX_CLEAR)){
|
||||
if(node->type == REGEX_GROUP){
|
||||
_regex_node_add(node->data_u.group_s.next, list);
|
||||
}
|
||||
list->nodes[list->size++] = node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
libregex_result _regex_step(regex_sim* sim){
|
||||
libregex_result result = LIBREGEX_SUCCESS;
|
||||
regex_list* swap_temp;
|
||||
int index = 0;
|
||||
for(; index < sim->current->size; index++){
|
||||
regex_node* current = sim->current->nodes[index];
|
||||
if(_regex_node_matches(current, sim->string[sim->index])){
|
||||
regex_node** next = _regex_node_get_next(current);
|
||||
if(next){
|
||||
_regex_node_add(*next, sim->next);
|
||||
}
|
||||
} else if(current->type == REGEX_END){
|
||||
sim->result->matches = 1;
|
||||
} else if(current->type == REGEX_GROUP && sim->string[sim->index - 1 + current->data_u.group_s.open] != '\0'){
|
||||
int* to_set = (current->data_u.group_s.open) ?
|
||||
&(sim->groups[current->data_u.group_s.id - 1].from) :
|
||||
&(sim->groups[current->data_u.group_s.id - 1].to);
|
||||
*to_set = sim->index - 1 + current->data_u.group_s.open;
|
||||
|
||||
if(current->data_u.group_s.open == 0 && current->data_u.group_s.id < LIBREGEX_MAX_GROUP_COUNT){
|
||||
regex_match* new_match = sim->result->groups[current->data_u.group_s.id - 1];
|
||||
if(new_match == NULL){
|
||||
new_match = malloc(sizeof(*new_match));
|
||||
}
|
||||
|
||||
if(new_match){
|
||||
new_match->from = sim->groups[current->data_u.group_s.id - 1].from;
|
||||
new_match->to = sim->groups[current->data_u.group_s.id - 1].to;
|
||||
sim->result->groups[current->data_u.group_s.id - 1] = new_match;
|
||||
} else {
|
||||
result = LIBREGEX_MALLOC;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
swap_temp = sim->current;
|
||||
sim->current = sim->next;
|
||||
sim->next = swap_temp;
|
||||
|
||||
sim->next->size = 0;
|
||||
sim->next->id += 2;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
libregex_result regex_match_string(regex_node* root, char* string, regex_result* build_result){
|
||||
ll clear_ll;
|
||||
libregex_result result;
|
||||
regex_sim sim;
|
||||
regex_list list_a;
|
||||
regex_list list_b;
|
||||
|
||||
list_a.size = 0;
|
||||
list_a.id = 0;
|
||||
|
||||
list_b.size = 0;
|
||||
list_b.id = 1;
|
||||
|
||||
build_result->matches = 0;
|
||||
memset(&build_result->groups, 0, sizeof(build_result->groups));
|
||||
|
||||
memset(&sim.groups, 0, sizeof(sim.groups));
|
||||
sim.current = &list_a;
|
||||
sim.next = &list_b;
|
||||
sim.result = build_result;
|
||||
sim.string = string;
|
||||
sim.index = 0;
|
||||
|
||||
_regex_node_add(root, &list_a);
|
||||
|
||||
do {
|
||||
result = _regex_step(&sim);
|
||||
} while(sim.string[sim.index++] != '\0' && result == LIBREGEX_SUCCESS);
|
||||
|
||||
if(result == LIBREGEX_SUCCESS){
|
||||
ll_init(&clear_ll);
|
||||
result = _regex_find_all(root, &clear_ll, -1);
|
||||
ll_clear(&clear_ll);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
void regex_result_free(regex_result* result){
|
||||
int index = 0;
|
||||
for(; index < LIBREGEX_MAX_GROUP_COUNT; index++){
|
||||
free(result->groups[index]);
|
||||
}
|
||||
result->matches = 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user