#include "pattern.h" #include #include #include "pattern.h" #include "sprs.h" #include "ll.h" void _pattern_node_clear(pattern_node* to_clear){ to_clear->type = PNODE_CLEAR; to_clear->id = -1; to_clear->pattern_id = -1; to_clear->invert = 0; memset(&(to_clear->data_u), 0, sizeof(to_clear->data_u)); } liblex_result _pattern_node_create_clear(pattern_node** into){ liblex_result result = LIBLEX_SUCCESS; *into = malloc(sizeof(**into)); if(*into){ _pattern_node_clear(*into); } else { result = LIBLEX_MALLOC; } return result; } liblex_result _pattern_node_create_value(pattern_node** into, int id, int pattern_id, char value, pattern_node* next){ liblex_result result = _pattern_node_create_clear(into); if(result == LIBLEX_SUCCESS){ (*into)->type = PNODE_VALUE; (*into)->id = id; (*into)->pattern_id = pattern_id; (*into)->data_u.value_s.value = value; (*into)->data_u.value_s.next = next; } return result; } liblex_result _pattern_node_create_range(pattern_node** into, int id, int pattern_id, char from, char to, pattern_node* next){ liblex_result result = _pattern_node_create_clear(into); if(result == LIBLEX_SUCCESS){ (*into)->type = PNODE_RANGE; (*into)->id = id; (*into)->pattern_id = pattern_id; (*into)->data_u.range_s.from = from; (*into)->data_u.range_s.to = to; (*into)->data_u.range_s.next = next; } return result; } liblex_result _pattern_node_create_any(pattern_node** into, int id, int pattern_id, pattern_node* next){ liblex_result result = _pattern_node_create_clear(into); if(result == LIBLEX_SUCCESS){ (*into)->type = PNODE_ANY; (*into)->id = id; (*into)->pattern_id = pattern_id; (*into)->data_u.any_s.next = next; } return result; } liblex_result _pattern_node_create_connect(pattern_node** into, int id, int pattern_id, pattern_node* next){ liblex_result result = _pattern_node_create_clear(into); if(result == LIBLEX_SUCCESS){ (*into)->type = PNODE_CONNECT; (*into)->id = id; (*into)->pattern_id = pattern_id; (*into)->data_u.connect_s.next = next; } return result; } liblex_result _pattern_node_create_fork(pattern_node** into, int id, int pattern_id, pattern_node* left, pattern_node* right){ liblex_result result = _pattern_node_create_clear(into); if(result == LIBLEX_SUCCESS){ (*into)->type = PNODE_FORK; (*into)->id = id; (*into)->pattern_id = pattern_id; (*into)->data_u.fork_s.left = left; (*into)->data_u.fork_s.right = right; } return result; } liblex_result _pattern_node_create_end(pattern_node** into, int id, int pattern_id){ liblex_result result = _pattern_node_create_clear(into); if(result == LIBLEX_SUCCESS){ (*into)->type = PNODE_END; (*into)->id = id; (*into)->pattern_id = pattern_id; } return result; } liblex_result _pattern_chain_create(pattern_chain** into, pattern_node* from, pattern_node* to){ liblex_result result = LIBLEX_SUCCESS; *into = malloc(sizeof(**into)); if(*into){ (*into)->head = from; (*into)->tail = to; } else { result = LIBLEX_MALLOC; } return result; } pattern_node** _pattern_node_get_next(pattern_node* node) { pattern_node** next_node = NULL; if(node->type == PNODE_ANY){ next_node = &(node->data_u.any_s.next); } else if(node->type == PNODE_RANGE){ next_node = &(node->data_u.range_s.next); } else if(node->type == PNODE_VALUE) { next_node = &(node->data_u.value_s.next); } else if(node->type == PNODE_CONNECT){ next_node = &(node->data_u.connect_s.next); } return next_node; } void _pattern_node_append_node(pattern_node* append_to, pattern_node* to_append){ if(append_to && to_append){ pattern_node** next_node = _pattern_node_get_next(append_to); if(next_node){ *next_node = to_append; } } } void _pattern_chain_append_node(pattern_chain* append_to, pattern_node* to_append){ if(append_to && to_append){ pattern_node** next_node = (append_to->head == NULL) ? &append_to->head : _pattern_node_get_next(append_to->tail); if(next_node) { *next_node = append_to->tail = to_append; } } } void _pattern_chain_append_chain(pattern_chain* append_to, pattern_chain* to_append){ if(append_to && to_append){ _pattern_chain_append_node(append_to, to_append->head); if(to_append->tail) { append_to->tail = to_append->tail; } } } int _pattern_node_foreach_free(void* node, va_list args){ free(node); return 0; } void _pattern_node_find_all(pattern_node* node, sprs* set){ if(node && !sprs_contains(set, node->id)){ sprs_put(set, node->id, node); if(node->type == PNODE_ANY) { _pattern_node_find_all(node->data_u.any_s.next, set); } else if(node->type == PNODE_CONNECT){ _pattern_node_find_all(node->data_u.connect_s.next, set); } else if(node->type == PNODE_FORK){ _pattern_node_find_all(node->data_u.fork_s.left, set); _pattern_node_find_all(node->data_u.fork_s.right, set); } else if(node->type == PNODE_RANGE){ _pattern_node_find_all(node->data_u.range_s.next, set); } else if(node->type == PNODE_VALUE){ _pattern_node_find_all(node->data_u.value_s.next, set); } } } liblex_result _pattern_free(pattern_node* root, int size){ sprs set; liblex_result result; sprs_init(&set); result = (sprs_setup(&set, (size_t) size) == LIBDS_SUCCESS) ? LIBLEX_SUCCESS : LIBLEX_MALLOC; if(result == LIBLEX_SUCCESS){ _pattern_node_find_all(root, &set); sprs_foreach(&set, NULL, compare_always, _pattern_node_foreach_free); } sprs_free(&set); return result; } liblex_result _pattern_read_value(char* read_into, const char* string, int* index){ liblex_result result = LIBLEX_SUCCESS; if(string[*index] == '\\'){ (*index)++; } if(string[*index]) { *read_into = string[*index]; (*index)++; } else { result = LIBLEX_INVALID; } return result; } liblex_result _pattern_build_inverted_or(pattern_chain** into, int* ids, int pattern_id, const char* string, int* index) { pattern_node* new_node = NULL; liblex_result result = LIBLEX_SUCCESS; result = _pattern_chain_create(into, NULL, NULL); if(result == LIBLEX_SUCCESS){ (*index)++; if(string[*index]) (*index)++; } while(string[*index] && string[*index] != ']' && result == LIBLEX_SUCCESS) { char from = '\0'; char to = '\0'; result = _pattern_read_value(&from, string, index); if(result == LIBLEX_SUCCESS && string[*index] == '-') { (*index)++; result = _pattern_read_value(&to, string, index); } if(result == LIBLEX_SUCCESS) { if(to) { result = _pattern_node_create_range(&new_node, (*ids)++, pattern_id, from, to, NULL); } else { result = _pattern_node_create_value(&new_node, (*ids)++, pattern_id, from, NULL); } } if(result == LIBLEX_SUCCESS) { new_node->invert = 1; _pattern_chain_append_node(*into, new_node); } } if(result == LIBLEX_SUCCESS) { result = _pattern_node_create_any(&new_node, (*ids)++, pattern_id, NULL); if(result == LIBLEX_SUCCESS) { _pattern_chain_append_node(*into, new_node); } } if(result != LIBLEX_SUCCESS) { if(*into && (*into)->head) { _pattern_free((*into)->head, *ids); } free(*into); } else { (*index)++; } return result; } liblex_result _pattern_build_or(pattern_chain** into, int* ids, int pattern_id, const char* string, int* index) { pattern_node *tail_node; liblex_result result = LIBLEX_SUCCESS; result = _pattern_node_create_connect(&tail_node, (*ids)++, pattern_id, NULL); if (result == LIBLEX_SUCCESS) { result = _pattern_chain_create(into, NULL, tail_node); } if(result == LIBLEX_SUCCESS){ (*index)++; } while (string[*index] && string[*index] != ']' && result == LIBLEX_SUCCESS) { char from = '\0'; char to = '\0'; pattern_node *new_node = NULL; pattern_node *fork_node = NULL; pattern_node *add_node = NULL; result = _pattern_read_value(&from, string, index); if (result == LIBLEX_SUCCESS) { if (string[*index] == '-') { (*index)++; result = _pattern_read_value(&to, string, index); } } if (result == LIBLEX_SUCCESS) { if (to) { result = _pattern_node_create_range(&new_node, (*ids)++, pattern_id, from, to, tail_node); } else { result = _pattern_node_create_value(&new_node, (*ids)++, pattern_id, from, tail_node); } add_node = new_node; } if (result == LIBLEX_SUCCESS && (*into)->head) { result = _pattern_node_create_fork(&fork_node, (*ids)++, pattern_id, (*into)->head, new_node); if (result == LIBLEX_SUCCESS) { add_node = fork_node; } } if (result == LIBLEX_SUCCESS) { (*into)->head = add_node; } else { free(new_node); free(fork_node); } } if (result != LIBLEX_SUCCESS) { if (*into && (*into)->head) { _pattern_free((*into)->head, *ids); } else { free(tail_node); } free(*into); } else { (*index)++; } return result; } void _pattern_chain_append_chain_discard(pattern_chain* append_to, pattern_chain** to_append){ _pattern_chain_append_chain(append_to, *to_append); free(*to_append); *to_append = NULL; } liblex_result _pattern_build_chain(pattern_chain** into, int* ids, int pattern_id, const char* string, int* index){ liblex_result result = LIBLEX_SUCCESS; ll or_stack; pattern_chain* current_chain = NULL; pattern_chain* sub_chain = NULL; int is_subchain = (*index > -1) && (string[*index] == '('); ll_init(&or_stack); result = _pattern_chain_create(¤t_chain, NULL, NULL); if(result == LIBLEX_SUCCESS){ (*index)++; } while(string[*index] && string[*index] != ')' && result == LIBLEX_SUCCESS){ if(string[*index] == '('){ _pattern_chain_append_chain_discard(current_chain, &sub_chain); result = _pattern_build_chain(&sub_chain, ids, pattern_id, string, index); } else if(string[*index] == '['){ _pattern_chain_append_chain_discard(current_chain, &sub_chain); if(string[(*index) + 1] == '^') { result = _pattern_build_inverted_or(&sub_chain, ids, pattern_id, string, index); } else { result = _pattern_build_or(&sub_chain, ids, pattern_id, string, index); } } else if(string[*index] == '?' || string[*index] == '*' || string[*index] == '+'){ if(sub_chain != NULL && sub_chain->head){ pattern_node* connection_node = NULL; pattern_node* fork_node = NULL; result = _pattern_node_create_connect(&connection_node, (*ids)++, pattern_id, NULL); if(result == LIBLEX_SUCCESS){ result = _pattern_node_create_fork(&fork_node, (*ids)++, pattern_id, sub_chain->head, connection_node); } if(result == LIBLEX_SUCCESS){ pattern_node** next_node = _pattern_node_get_next(sub_chain->tail); if(next_node){ *next_node = (string[*index] == '?') ? connection_node : fork_node; sub_chain->head = (string[*index] == '+') ? sub_chain->head : fork_node; sub_chain->tail = connection_node; } else { result = LIBLEX_INVALID; } } if(result == LIBLEX_SUCCESS){ _pattern_chain_append_chain_discard(current_chain, &sub_chain); (*index)++; } else { free(connection_node); free(fork_node); } } else { result = LIBLEX_INVALID; } } else if(string[*index] == '|'){ _pattern_chain_append_chain_discard(current_chain, &sub_chain); if(current_chain->head){ result = (ll_append(&or_stack, current_chain) == LIBDS_SUCCESS) ? LIBLEX_SUCCESS : LIBLEX_MALLOC; } else { result = LIBLEX_INVALID; } if(result == LIBLEX_SUCCESS){ result = _pattern_chain_create(¤t_chain, NULL, NULL); (*index)++; } } else if(string[*index] == '.') { pattern_node* new_node = NULL; _pattern_chain_append_chain_discard(current_chain, &sub_chain); result = _pattern_chain_create(&sub_chain, NULL, NULL); if(result == LIBLEX_SUCCESS){ result = _pattern_node_create_any(&new_node, (*ids)++, pattern_id, NULL); } if(result == LIBLEX_SUCCESS){ sub_chain->head = sub_chain->tail = new_node; (*index)++; } else { free(new_node); } } else { char new_char = '\0'; pattern_node* new_node = NULL; _pattern_chain_append_chain_discard(current_chain, &sub_chain); result = _pattern_read_value(&new_char, string, index); if(result == LIBLEX_SUCCESS){ result = _pattern_chain_create(&sub_chain, NULL, NULL); } if(result == LIBLEX_SUCCESS){ result = _pattern_node_create_value(&new_node, (*ids)++, pattern_id, new_char, NULL); } if(result == LIBLEX_SUCCESS){ sub_chain->head = sub_chain->tail = new_node; } else { free(new_node); } } } if(is_subchain && result == LIBLEX_SUCCESS && string[*index] != ')'){ result = LIBLEX_INVALID; } if(result == LIBLEX_SUCCESS){ _pattern_chain_append_chain_discard(current_chain, &sub_chain); } if(result == LIBLEX_SUCCESS && or_stack.head){ pattern_node* connect_node = NULL; result = _pattern_node_create_connect(&connect_node, (*ids)++, pattern_id, NULL); if(result == LIBLEX_SUCCESS){ _pattern_chain_append_node(current_chain, connect_node); } while(or_stack.head && result == LIBLEX_SUCCESS){ pattern_node* fork = NULL; pattern_chain* new_chain = ll_pophead(&or_stack); if(new_chain && new_chain->head){ result = _pattern_node_create_fork(&fork, (*ids)++, pattern_id, current_chain->head, new_chain->head); if(result == LIBLEX_SUCCESS){ _pattern_chain_append_node(new_chain, connect_node); current_chain->head = fork; } else { _pattern_free(new_chain->head, *ids); } free(new_chain); } } } if(result != LIBLEX_SUCCESS){ if(current_chain){ _pattern_free(current_chain->head, *ids); } free(current_chain); if(sub_chain){ _pattern_free(sub_chain->head, *ids); } free(sub_chain); while(or_stack.head){ pattern_chain* new_chain = ll_pophead(&or_stack); if(new_chain){ _pattern_free(new_chain->head, *ids); } } } if(result == LIBLEX_SUCCESS){ *into = current_chain; if(is_subchain) { (*index)++; } } return result; } liblex_result pattern_compile(pattern* ptrn, const char* expression, int id){ liblex_result result; pattern_node* end_node = NULL; pattern_chain* full_chain = NULL; int index = -1; int ids = 0; result = _pattern_build_chain(&full_chain, &ids, id, expression, &index); if(result == LIBLEX_SUCCESS){ result = _pattern_node_create_end(&end_node, ids, id); } if(result == LIBLEX_SUCCESS){ _pattern_chain_append_node(full_chain, end_node); ptrn->head = full_chain->head; ptrn->size = ids + 1; free(full_chain); } else { ptrn->head = NULL; ptrn->size = 0; if(full_chain){ _pattern_free(full_chain->head, ids); } } return result; } liblex_result pattern_free(pattern* ptrn){ return _pattern_free(ptrn->head, ptrn->size); }