509 lines
15 KiB
C
509 lines
15 KiB
C
#include "pattern.h"
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "pattern.h"
|
|
#include "sprs.h"
|
|
#include "ll.h"
|
|
|
|
void _pattern_node_clear(pattern_node* to_clear){
|
|
to_clear->type = PNODE_CLEAR;
|
|
to_clear->id = -1;
|
|
to_clear->pattern_id = -1;
|
|
to_clear->invert = 0;
|
|
memset(&(to_clear->data_u), 0, sizeof(to_clear->data_u));
|
|
}
|
|
|
|
liblex_result _pattern_node_create_clear(pattern_node** into){
|
|
liblex_result result = LIBLEX_SUCCESS;
|
|
*into = malloc(sizeof(**into));
|
|
if(*into){
|
|
_pattern_node_clear(*into);
|
|
} else {
|
|
result = LIBLEX_MALLOC;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
liblex_result _pattern_node_create_value(pattern_node** into, int id, int pattern_id, char value, pattern_node* next){
|
|
liblex_result result = _pattern_node_create_clear(into);
|
|
if(result == LIBLEX_SUCCESS){
|
|
(*into)->type = PNODE_VALUE;
|
|
(*into)->id = id;
|
|
(*into)->pattern_id = pattern_id;
|
|
(*into)->data_u.value_s.value = value;
|
|
(*into)->data_u.value_s.next = next;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
liblex_result _pattern_node_create_range(pattern_node** into, int id, int pattern_id, char from, char to, pattern_node* next){
|
|
liblex_result result = _pattern_node_create_clear(into);
|
|
if(result == LIBLEX_SUCCESS){
|
|
(*into)->type = PNODE_RANGE;
|
|
(*into)->id = id;
|
|
(*into)->pattern_id = pattern_id;
|
|
(*into)->data_u.range_s.from = from;
|
|
(*into)->data_u.range_s.to = to;
|
|
(*into)->data_u.range_s.next = next;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
liblex_result _pattern_node_create_any(pattern_node** into, int id, int pattern_id, pattern_node* next){
|
|
liblex_result result = _pattern_node_create_clear(into);
|
|
if(result == LIBLEX_SUCCESS){
|
|
(*into)->type = PNODE_ANY;
|
|
(*into)->id = id;
|
|
(*into)->pattern_id = pattern_id;
|
|
(*into)->data_u.any_s.next = next;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
liblex_result _pattern_node_create_connect(pattern_node** into, int id, int pattern_id, pattern_node* next){
|
|
liblex_result result = _pattern_node_create_clear(into);
|
|
if(result == LIBLEX_SUCCESS){
|
|
(*into)->type = PNODE_CONNECT;
|
|
(*into)->id = id;
|
|
(*into)->pattern_id = pattern_id;
|
|
(*into)->data_u.connect_s.next = next;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
liblex_result _pattern_node_create_fork(pattern_node** into, int id, int pattern_id, pattern_node* left, pattern_node* right){
|
|
liblex_result result = _pattern_node_create_clear(into);
|
|
if(result == LIBLEX_SUCCESS){
|
|
(*into)->type = PNODE_FORK;
|
|
(*into)->id = id;
|
|
(*into)->pattern_id = pattern_id;
|
|
(*into)->data_u.fork_s.left = left;
|
|
(*into)->data_u.fork_s.right = right;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
liblex_result _pattern_node_create_end(pattern_node** into, int id, int pattern_id){
|
|
liblex_result result = _pattern_node_create_clear(into);
|
|
if(result == LIBLEX_SUCCESS){
|
|
(*into)->type = PNODE_END;
|
|
(*into)->id = id;
|
|
(*into)->pattern_id = pattern_id;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
liblex_result _pattern_chain_create(pattern_chain** into, pattern_node* from, pattern_node* to){
|
|
liblex_result result = LIBLEX_SUCCESS;
|
|
*into = malloc(sizeof(**into));
|
|
if(*into){
|
|
(*into)->head = from;
|
|
(*into)->tail = to;
|
|
} else {
|
|
result = LIBLEX_MALLOC;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
pattern_node** _pattern_node_get_next(pattern_node* node) {
|
|
pattern_node** next_node = NULL;
|
|
if(node->type == PNODE_ANY){
|
|
next_node = &(node->data_u.any_s.next);
|
|
} else if(node->type == PNODE_RANGE){
|
|
next_node = &(node->data_u.range_s.next);
|
|
} else if(node->type == PNODE_VALUE) {
|
|
next_node = &(node->data_u.value_s.next);
|
|
} else if(node->type == PNODE_CONNECT){
|
|
next_node = &(node->data_u.connect_s.next);
|
|
}
|
|
return next_node;
|
|
}
|
|
|
|
void _pattern_node_append_node(pattern_node* append_to, pattern_node* to_append){
|
|
if(append_to && to_append){
|
|
pattern_node** next_node = _pattern_node_get_next(append_to);
|
|
if(next_node){
|
|
*next_node = to_append;
|
|
}
|
|
}
|
|
}
|
|
|
|
void _pattern_chain_append_node(pattern_chain* append_to, pattern_node* to_append){
|
|
if(append_to && to_append){
|
|
pattern_node** next_node = (append_to->head == NULL) ? &append_to->head : _pattern_node_get_next(append_to->tail);
|
|
if(next_node) {
|
|
*next_node = append_to->tail = to_append;
|
|
}
|
|
}
|
|
}
|
|
|
|
void _pattern_chain_append_chain(pattern_chain* append_to, pattern_chain* to_append){
|
|
if(append_to && to_append){
|
|
_pattern_chain_append_node(append_to, to_append->head);
|
|
if(to_append->tail) {
|
|
append_to->tail = to_append->tail;
|
|
}
|
|
}
|
|
}
|
|
|
|
int _pattern_node_foreach_free(void* node, va_list args){
|
|
free(node);
|
|
return 0;
|
|
}
|
|
|
|
void _pattern_node_find_all(pattern_node* node, sprs* set){
|
|
if(node && !sprs_contains(set, node->id)){
|
|
sprs_put(set, node->id, node);
|
|
if(node->type == PNODE_ANY) {
|
|
_pattern_node_find_all(node->data_u.any_s.next, set);
|
|
} else if(node->type == PNODE_CONNECT){
|
|
_pattern_node_find_all(node->data_u.connect_s.next, set);
|
|
} else if(node->type == PNODE_FORK){
|
|
_pattern_node_find_all(node->data_u.fork_s.left, set);
|
|
_pattern_node_find_all(node->data_u.fork_s.right, set);
|
|
} else if(node->type == PNODE_RANGE){
|
|
_pattern_node_find_all(node->data_u.range_s.next, set);
|
|
} else if(node->type == PNODE_VALUE){
|
|
_pattern_node_find_all(node->data_u.value_s.next, set);
|
|
}
|
|
}
|
|
}
|
|
|
|
liblex_result _pattern_free(pattern_node* root, int size){
|
|
sprs set;
|
|
liblex_result result;
|
|
sprs_init(&set);
|
|
result = (sprs_setup(&set, (size_t) size) == LIBDS_SUCCESS) ? LIBLEX_SUCCESS : LIBLEX_MALLOC;
|
|
if(result == LIBLEX_SUCCESS){
|
|
_pattern_node_find_all(root, &set);
|
|
sprs_foreach(&set, NULL, compare_always, _pattern_node_foreach_free);
|
|
}
|
|
sprs_free(&set);
|
|
return result;
|
|
}
|
|
|
|
liblex_result _pattern_read_value(char* read_into, const char* string, int* index){
|
|
liblex_result result = LIBLEX_SUCCESS;
|
|
if(string[*index] == '\\'){
|
|
(*index)++;
|
|
}
|
|
if(string[*index]) {
|
|
*read_into = string[*index];
|
|
(*index)++;
|
|
} else {
|
|
result = LIBLEX_INVALID;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
liblex_result _pattern_build_inverted_or(pattern_chain** into, int* ids, int pattern_id, const char* string, int* index) {
|
|
pattern_node* new_node = NULL;
|
|
liblex_result result = LIBLEX_SUCCESS;
|
|
result = _pattern_chain_create(into, NULL, NULL);
|
|
|
|
if(result == LIBLEX_SUCCESS){
|
|
(*index)++;
|
|
if(string[*index]) (*index)++;
|
|
}
|
|
|
|
while(string[*index] && string[*index] != ']' && result == LIBLEX_SUCCESS) {
|
|
char from = '\0';
|
|
char to = '\0';
|
|
result = _pattern_read_value(&from, string, index);
|
|
if(result == LIBLEX_SUCCESS && string[*index] == '-') {
|
|
(*index)++;
|
|
result = _pattern_read_value(&to, string, index);
|
|
}
|
|
|
|
if(result == LIBLEX_SUCCESS) {
|
|
if(to) {
|
|
result = _pattern_node_create_range(&new_node, (*ids)++, pattern_id, from, to, NULL);
|
|
} else {
|
|
result = _pattern_node_create_value(&new_node, (*ids)++, pattern_id, from, NULL);
|
|
}
|
|
}
|
|
|
|
if(result == LIBLEX_SUCCESS) {
|
|
new_node->invert = 1;
|
|
_pattern_chain_append_node(*into, new_node);
|
|
}
|
|
}
|
|
|
|
if(result == LIBLEX_SUCCESS) {
|
|
result = _pattern_node_create_any(&new_node, (*ids)++, pattern_id, NULL);
|
|
if(result == LIBLEX_SUCCESS) {
|
|
_pattern_chain_append_node(*into, new_node);
|
|
}
|
|
}
|
|
|
|
if(result != LIBLEX_SUCCESS) {
|
|
if(*into && (*into)->head) {
|
|
_pattern_free((*into)->head, *ids);
|
|
}
|
|
free(*into);
|
|
} else {
|
|
(*index)++;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
liblex_result _pattern_build_or(pattern_chain** into, int* ids, int pattern_id, const char* string, int* index) {
|
|
pattern_node *tail_node;
|
|
liblex_result result = LIBLEX_SUCCESS;
|
|
result = _pattern_node_create_connect(&tail_node, (*ids)++, pattern_id, NULL);
|
|
if (result == LIBLEX_SUCCESS) {
|
|
result = _pattern_chain_create(into, NULL, tail_node);
|
|
}
|
|
|
|
if(result == LIBLEX_SUCCESS){
|
|
(*index)++;
|
|
}
|
|
|
|
while (string[*index] && string[*index] != ']' && result == LIBLEX_SUCCESS) {
|
|
char from = '\0';
|
|
char to = '\0';
|
|
pattern_node *new_node = NULL;
|
|
pattern_node *fork_node = NULL;
|
|
pattern_node *add_node = NULL;
|
|
result = _pattern_read_value(&from, string, index);
|
|
if (result == LIBLEX_SUCCESS) {
|
|
if (string[*index] == '-') {
|
|
(*index)++;
|
|
result = _pattern_read_value(&to, string, index);
|
|
}
|
|
}
|
|
|
|
if (result == LIBLEX_SUCCESS) {
|
|
if (to) {
|
|
result = _pattern_node_create_range(&new_node, (*ids)++, pattern_id, from, to, tail_node);
|
|
} else {
|
|
result = _pattern_node_create_value(&new_node, (*ids)++, pattern_id, from, tail_node);
|
|
}
|
|
add_node = new_node;
|
|
}
|
|
|
|
if (result == LIBLEX_SUCCESS && (*into)->head) {
|
|
result = _pattern_node_create_fork(&fork_node, (*ids)++, pattern_id, (*into)->head, new_node);
|
|
if (result == LIBLEX_SUCCESS) {
|
|
add_node = fork_node;
|
|
}
|
|
}
|
|
|
|
if (result == LIBLEX_SUCCESS) {
|
|
(*into)->head = add_node;
|
|
} else {
|
|
free(new_node);
|
|
free(fork_node);
|
|
}
|
|
}
|
|
|
|
if (result != LIBLEX_SUCCESS) {
|
|
if (*into && (*into)->head) {
|
|
_pattern_free((*into)->head, *ids);
|
|
} else {
|
|
free(tail_node);
|
|
}
|
|
free(*into);
|
|
} else {
|
|
(*index)++;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void _pattern_chain_append_chain_discard(pattern_chain* append_to, pattern_chain** to_append){
|
|
_pattern_chain_append_chain(append_to, *to_append);
|
|
free(*to_append);
|
|
*to_append = NULL;
|
|
}
|
|
|
|
liblex_result _pattern_build_chain(pattern_chain** into, int* ids, int pattern_id, const char* string, int* index){
|
|
liblex_result result = LIBLEX_SUCCESS;
|
|
ll or_stack;
|
|
pattern_chain* current_chain = NULL;
|
|
pattern_chain* sub_chain = NULL;
|
|
int is_subchain = (*index > -1) && (string[*index] == '(');
|
|
|
|
ll_init(&or_stack);
|
|
result = _pattern_chain_create(¤t_chain, NULL, NULL);
|
|
if(result == LIBLEX_SUCCESS){
|
|
(*index)++;
|
|
}
|
|
|
|
while(string[*index] && string[*index] != ')' && result == LIBLEX_SUCCESS){
|
|
if(string[*index] == '('){
|
|
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
|
|
result = _pattern_build_chain(&sub_chain, ids, pattern_id, string, index);
|
|
} else if(string[*index] == '['){
|
|
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
|
|
if(string[(*index) + 1] == '^') {
|
|
result = _pattern_build_inverted_or(&sub_chain, ids, pattern_id, string, index);
|
|
} else {
|
|
result = _pattern_build_or(&sub_chain, ids, pattern_id, string, index);
|
|
}
|
|
} else if(string[*index] == '?' || string[*index] == '*' || string[*index] == '+'){
|
|
if(sub_chain != NULL && sub_chain->head){
|
|
pattern_node* connection_node = NULL;
|
|
pattern_node* fork_node = NULL;
|
|
|
|
result = _pattern_node_create_connect(&connection_node, (*ids)++, pattern_id, NULL);
|
|
if(result == LIBLEX_SUCCESS){
|
|
result = _pattern_node_create_fork(&fork_node, (*ids)++, pattern_id, sub_chain->head, connection_node);
|
|
}
|
|
|
|
if(result == LIBLEX_SUCCESS){
|
|
pattern_node** next_node = _pattern_node_get_next(sub_chain->tail);
|
|
if(next_node){
|
|
*next_node = (string[*index] == '?') ? connection_node : fork_node;
|
|
sub_chain->head = (string[*index] == '+') ? sub_chain->head : fork_node;
|
|
sub_chain->tail = connection_node;
|
|
} else {
|
|
result = LIBLEX_INVALID;
|
|
}
|
|
}
|
|
|
|
if(result == LIBLEX_SUCCESS){
|
|
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
|
|
(*index)++;
|
|
} else {
|
|
free(connection_node);
|
|
free(fork_node);
|
|
}
|
|
} else {
|
|
result = LIBLEX_INVALID;
|
|
}
|
|
} else if(string[*index] == '|'){
|
|
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
|
|
|
|
if(current_chain->head){
|
|
result = (ll_append(&or_stack, current_chain) == LIBDS_SUCCESS) ? LIBLEX_SUCCESS : LIBLEX_MALLOC;
|
|
} else {
|
|
result = LIBLEX_INVALID;
|
|
}
|
|
if(result == LIBLEX_SUCCESS){
|
|
result = _pattern_chain_create(¤t_chain, NULL, NULL);
|
|
(*index)++;
|
|
}
|
|
} else if(string[*index] == '.') {
|
|
pattern_node* new_node = NULL;
|
|
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
|
|
result = _pattern_chain_create(&sub_chain, NULL, NULL);
|
|
if(result == LIBLEX_SUCCESS){
|
|
result = _pattern_node_create_any(&new_node, (*ids)++, pattern_id, NULL);
|
|
}
|
|
if(result == LIBLEX_SUCCESS){
|
|
sub_chain->head = sub_chain->tail = new_node;
|
|
(*index)++;
|
|
} else {
|
|
free(new_node);
|
|
}
|
|
} else {
|
|
char new_char = '\0';
|
|
pattern_node* new_node = NULL;
|
|
|
|
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
|
|
result = _pattern_read_value(&new_char, string, index);
|
|
if(result == LIBLEX_SUCCESS){
|
|
result = _pattern_chain_create(&sub_chain, NULL, NULL);
|
|
}
|
|
if(result == LIBLEX_SUCCESS){
|
|
result = _pattern_node_create_value(&new_node, (*ids)++, pattern_id, new_char, NULL);
|
|
}
|
|
if(result == LIBLEX_SUCCESS){
|
|
sub_chain->head = sub_chain->tail = new_node;
|
|
} else {
|
|
free(new_node);
|
|
}
|
|
}
|
|
}
|
|
|
|
if(is_subchain && result == LIBLEX_SUCCESS && string[*index] != ')'){
|
|
result = LIBLEX_INVALID;
|
|
}
|
|
|
|
if(result == LIBLEX_SUCCESS){
|
|
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
|
|
}
|
|
|
|
if(result == LIBLEX_SUCCESS && or_stack.head){
|
|
pattern_node* connect_node = NULL;
|
|
|
|
result = _pattern_node_create_connect(&connect_node, (*ids)++, pattern_id, NULL);
|
|
if(result == LIBLEX_SUCCESS){
|
|
_pattern_chain_append_node(current_chain, connect_node);
|
|
}
|
|
while(or_stack.head && result == LIBLEX_SUCCESS){
|
|
pattern_node* fork = NULL;
|
|
pattern_chain* new_chain = ll_pophead(&or_stack);
|
|
|
|
if(new_chain && new_chain->head){
|
|
result = _pattern_node_create_fork(&fork, (*ids)++, pattern_id, current_chain->head, new_chain->head);
|
|
if(result == LIBLEX_SUCCESS){
|
|
_pattern_chain_append_node(new_chain, connect_node);
|
|
current_chain->head = fork;
|
|
} else {
|
|
_pattern_free(new_chain->head, *ids);
|
|
}
|
|
free(new_chain);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
if(result != LIBLEX_SUCCESS){
|
|
if(current_chain){
|
|
_pattern_free(current_chain->head, *ids);
|
|
}
|
|
free(current_chain);
|
|
if(sub_chain){
|
|
_pattern_free(sub_chain->head, *ids);
|
|
}
|
|
free(sub_chain);
|
|
while(or_stack.head){
|
|
pattern_chain* new_chain = ll_pophead(&or_stack);
|
|
if(new_chain){
|
|
_pattern_free(new_chain->head, *ids);
|
|
}
|
|
}
|
|
}
|
|
|
|
if(result == LIBLEX_SUCCESS){
|
|
*into = current_chain;
|
|
if(is_subchain) {
|
|
(*index)++;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
liblex_result pattern_compile(pattern* ptrn, const char* expression, int id){
|
|
liblex_result result;
|
|
pattern_node* end_node = NULL;
|
|
pattern_chain* full_chain = NULL;
|
|
int index = -1;
|
|
int ids = 0;
|
|
|
|
result = _pattern_build_chain(&full_chain, &ids, id, expression, &index);
|
|
if(result == LIBLEX_SUCCESS){
|
|
result = _pattern_node_create_end(&end_node, ids, id);
|
|
}
|
|
if(result == LIBLEX_SUCCESS){
|
|
_pattern_chain_append_node(full_chain, end_node);
|
|
ptrn->head = full_chain->head;
|
|
ptrn->size = ids + 1;
|
|
free(full_chain);
|
|
} else {
|
|
ptrn->head = NULL;
|
|
ptrn->size = 0;
|
|
if(full_chain){
|
|
_pattern_free(full_chain->head, ids);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
liblex_result pattern_free(pattern* ptrn){
|
|
return _pattern_free(ptrn->head, ptrn->size);
|
|
}
|