liblex/src/pattern.c

509 lines
15 KiB
C

#include "pattern.h"
#include <stdlib.h>
#include <string.h>
#include "pattern.h"
#include "sprs.h"
#include "ll.h"
void _pattern_node_clear(pattern_node* to_clear){
to_clear->type = PNODE_CLEAR;
to_clear->id = -1;
to_clear->pattern_id = -1;
to_clear->invert = 0;
memset(&(to_clear->data_u), 0, sizeof(to_clear->data_u));
}
liblex_result _pattern_node_create_clear(pattern_node** into){
liblex_result result = LIBLEX_SUCCESS;
*into = malloc(sizeof(**into));
if(*into){
_pattern_node_clear(*into);
} else {
result = LIBLEX_MALLOC;
}
return result;
}
liblex_result _pattern_node_create_value(pattern_node** into, int id, int pattern_id, char value, pattern_node* next){
liblex_result result = _pattern_node_create_clear(into);
if(result == LIBLEX_SUCCESS){
(*into)->type = PNODE_VALUE;
(*into)->id = id;
(*into)->pattern_id = pattern_id;
(*into)->data_u.value_s.value = value;
(*into)->data_u.value_s.next = next;
}
return result;
}
liblex_result _pattern_node_create_range(pattern_node** into, int id, int pattern_id, char from, char to, pattern_node* next){
liblex_result result = _pattern_node_create_clear(into);
if(result == LIBLEX_SUCCESS){
(*into)->type = PNODE_RANGE;
(*into)->id = id;
(*into)->pattern_id = pattern_id;
(*into)->data_u.range_s.from = from;
(*into)->data_u.range_s.to = to;
(*into)->data_u.range_s.next = next;
}
return result;
}
liblex_result _pattern_node_create_any(pattern_node** into, int id, int pattern_id, pattern_node* next){
liblex_result result = _pattern_node_create_clear(into);
if(result == LIBLEX_SUCCESS){
(*into)->type = PNODE_ANY;
(*into)->id = id;
(*into)->pattern_id = pattern_id;
(*into)->data_u.any_s.next = next;
}
return result;
}
liblex_result _pattern_node_create_connect(pattern_node** into, int id, int pattern_id, pattern_node* next){
liblex_result result = _pattern_node_create_clear(into);
if(result == LIBLEX_SUCCESS){
(*into)->type = PNODE_CONNECT;
(*into)->id = id;
(*into)->pattern_id = pattern_id;
(*into)->data_u.connect_s.next = next;
}
return result;
}
liblex_result _pattern_node_create_fork(pattern_node** into, int id, int pattern_id, pattern_node* left, pattern_node* right){
liblex_result result = _pattern_node_create_clear(into);
if(result == LIBLEX_SUCCESS){
(*into)->type = PNODE_FORK;
(*into)->id = id;
(*into)->pattern_id = pattern_id;
(*into)->data_u.fork_s.left = left;
(*into)->data_u.fork_s.right = right;
}
return result;
}
liblex_result _pattern_node_create_end(pattern_node** into, int id, int pattern_id){
liblex_result result = _pattern_node_create_clear(into);
if(result == LIBLEX_SUCCESS){
(*into)->type = PNODE_END;
(*into)->id = id;
(*into)->pattern_id = pattern_id;
}
return result;
}
liblex_result _pattern_chain_create(pattern_chain** into, pattern_node* from, pattern_node* to){
liblex_result result = LIBLEX_SUCCESS;
*into = malloc(sizeof(**into));
if(*into){
(*into)->head = from;
(*into)->tail = to;
} else {
result = LIBLEX_MALLOC;
}
return result;
}
pattern_node** _pattern_node_get_next(pattern_node* node) {
pattern_node** next_node = NULL;
if(node->type == PNODE_ANY){
next_node = &(node->data_u.any_s.next);
} else if(node->type == PNODE_RANGE){
next_node = &(node->data_u.range_s.next);
} else if(node->type == PNODE_VALUE) {
next_node = &(node->data_u.value_s.next);
} else if(node->type == PNODE_CONNECT){
next_node = &(node->data_u.connect_s.next);
}
return next_node;
}
void _pattern_node_append_node(pattern_node* append_to, pattern_node* to_append){
if(append_to && to_append){
pattern_node** next_node = _pattern_node_get_next(append_to);
if(next_node){
*next_node = to_append;
}
}
}
void _pattern_chain_append_node(pattern_chain* append_to, pattern_node* to_append){
if(append_to && to_append){
pattern_node** next_node = (append_to->head == NULL) ? &append_to->head : _pattern_node_get_next(append_to->tail);
if(next_node) {
*next_node = append_to->tail = to_append;
}
}
}
void _pattern_chain_append_chain(pattern_chain* append_to, pattern_chain* to_append){
if(append_to && to_append){
_pattern_chain_append_node(append_to, to_append->head);
if(to_append->tail) {
append_to->tail = to_append->tail;
}
}
}
int _pattern_node_foreach_free(void* node, va_list args){
free(node);
return 0;
}
void _pattern_node_find_all(pattern_node* node, sprs* set){
if(node && !sprs_contains(set, node->id)){
sprs_put(set, node->id, node);
if(node->type == PNODE_ANY) {
_pattern_node_find_all(node->data_u.any_s.next, set);
} else if(node->type == PNODE_CONNECT){
_pattern_node_find_all(node->data_u.connect_s.next, set);
} else if(node->type == PNODE_FORK){
_pattern_node_find_all(node->data_u.fork_s.left, set);
_pattern_node_find_all(node->data_u.fork_s.right, set);
} else if(node->type == PNODE_RANGE){
_pattern_node_find_all(node->data_u.range_s.next, set);
} else if(node->type == PNODE_VALUE){
_pattern_node_find_all(node->data_u.value_s.next, set);
}
}
}
liblex_result _pattern_free(pattern_node* root, int size){
sprs set;
liblex_result result;
sprs_init(&set);
result = (sprs_setup(&set, (size_t) size) == LIBDS_SUCCESS) ? LIBLEX_SUCCESS : LIBLEX_MALLOC;
if(result == LIBLEX_SUCCESS){
_pattern_node_find_all(root, &set);
sprs_foreach(&set, NULL, compare_always, _pattern_node_foreach_free);
}
sprs_free(&set);
return result;
}
liblex_result _pattern_read_value(char* read_into, const char* string, int* index){
liblex_result result = LIBLEX_SUCCESS;
if(string[*index] == '\\'){
(*index)++;
}
if(string[*index]) {
*read_into = string[*index];
(*index)++;
} else {
result = LIBLEX_INVALID;
}
return result;
}
liblex_result _pattern_build_inverted_or(pattern_chain** into, int* ids, int pattern_id, const char* string, int* index) {
pattern_node* new_node = NULL;
liblex_result result = LIBLEX_SUCCESS;
result = _pattern_chain_create(into, NULL, NULL);
if(result == LIBLEX_SUCCESS){
(*index)++;
if(string[*index]) (*index)++;
}
while(string[*index] && string[*index] != ']' && result == LIBLEX_SUCCESS) {
char from = '\0';
char to = '\0';
result = _pattern_read_value(&from, string, index);
if(result == LIBLEX_SUCCESS && string[*index] == '-') {
(*index)++;
result = _pattern_read_value(&to, string, index);
}
if(result == LIBLEX_SUCCESS) {
if(to) {
result = _pattern_node_create_range(&new_node, (*ids)++, pattern_id, from, to, NULL);
} else {
result = _pattern_node_create_value(&new_node, (*ids)++, pattern_id, from, NULL);
}
}
if(result == LIBLEX_SUCCESS) {
new_node->invert = 1;
_pattern_chain_append_node(*into, new_node);
}
}
if(result == LIBLEX_SUCCESS) {
result = _pattern_node_create_any(&new_node, (*ids)++, pattern_id, NULL);
if(result == LIBLEX_SUCCESS) {
_pattern_chain_append_node(*into, new_node);
}
}
if(result != LIBLEX_SUCCESS) {
if(*into && (*into)->head) {
_pattern_free((*into)->head, *ids);
}
free(*into);
} else {
(*index)++;
}
return result;
}
liblex_result _pattern_build_or(pattern_chain** into, int* ids, int pattern_id, const char* string, int* index) {
pattern_node *tail_node;
liblex_result result = LIBLEX_SUCCESS;
result = _pattern_node_create_connect(&tail_node, (*ids)++, pattern_id, NULL);
if (result == LIBLEX_SUCCESS) {
result = _pattern_chain_create(into, NULL, tail_node);
}
if(result == LIBLEX_SUCCESS){
(*index)++;
}
while (string[*index] && string[*index] != ']' && result == LIBLEX_SUCCESS) {
char from = '\0';
char to = '\0';
pattern_node *new_node = NULL;
pattern_node *fork_node = NULL;
pattern_node *add_node = NULL;
result = _pattern_read_value(&from, string, index);
if (result == LIBLEX_SUCCESS) {
if (string[*index] == '-') {
(*index)++;
result = _pattern_read_value(&to, string, index);
}
}
if (result == LIBLEX_SUCCESS) {
if (to) {
result = _pattern_node_create_range(&new_node, (*ids)++, pattern_id, from, to, tail_node);
} else {
result = _pattern_node_create_value(&new_node, (*ids)++, pattern_id, from, tail_node);
}
add_node = new_node;
}
if (result == LIBLEX_SUCCESS && (*into)->head) {
result = _pattern_node_create_fork(&fork_node, (*ids)++, pattern_id, (*into)->head, new_node);
if (result == LIBLEX_SUCCESS) {
add_node = fork_node;
}
}
if (result == LIBLEX_SUCCESS) {
(*into)->head = add_node;
} else {
free(new_node);
free(fork_node);
}
}
if (result != LIBLEX_SUCCESS) {
if (*into && (*into)->head) {
_pattern_free((*into)->head, *ids);
} else {
free(tail_node);
}
free(*into);
} else {
(*index)++;
}
return result;
}
void _pattern_chain_append_chain_discard(pattern_chain* append_to, pattern_chain** to_append){
_pattern_chain_append_chain(append_to, *to_append);
free(*to_append);
*to_append = NULL;
}
liblex_result _pattern_build_chain(pattern_chain** into, int* ids, int pattern_id, const char* string, int* index){
liblex_result result = LIBLEX_SUCCESS;
ll or_stack;
pattern_chain* current_chain = NULL;
pattern_chain* sub_chain = NULL;
int is_subchain = (*index > -1) && (string[*index] == '(');
ll_init(&or_stack);
result = _pattern_chain_create(&current_chain, NULL, NULL);
if(result == LIBLEX_SUCCESS){
(*index)++;
}
while(string[*index] && string[*index] != ')' && result == LIBLEX_SUCCESS){
if(string[*index] == '('){
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
result = _pattern_build_chain(&sub_chain, ids, pattern_id, string, index);
} else if(string[*index] == '['){
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
if(string[(*index) + 1] == '^') {
result = _pattern_build_inverted_or(&sub_chain, ids, pattern_id, string, index);
} else {
result = _pattern_build_or(&sub_chain, ids, pattern_id, string, index);
}
} else if(string[*index] == '?' || string[*index] == '*' || string[*index] == '+'){
if(sub_chain != NULL && sub_chain->head){
pattern_node* connection_node = NULL;
pattern_node* fork_node = NULL;
result = _pattern_node_create_connect(&connection_node, (*ids)++, pattern_id, NULL);
if(result == LIBLEX_SUCCESS){
result = _pattern_node_create_fork(&fork_node, (*ids)++, pattern_id, sub_chain->head, connection_node);
}
if(result == LIBLEX_SUCCESS){
pattern_node** next_node = _pattern_node_get_next(sub_chain->tail);
if(next_node){
*next_node = (string[*index] == '?') ? connection_node : fork_node;
sub_chain->head = (string[*index] == '+') ? sub_chain->head : fork_node;
sub_chain->tail = connection_node;
} else {
result = LIBLEX_INVALID;
}
}
if(result == LIBLEX_SUCCESS){
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
(*index)++;
} else {
free(connection_node);
free(fork_node);
}
} else {
result = LIBLEX_INVALID;
}
} else if(string[*index] == '|'){
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
if(current_chain->head){
result = (ll_append(&or_stack, current_chain) == LIBDS_SUCCESS) ? LIBLEX_SUCCESS : LIBLEX_MALLOC;
} else {
result = LIBLEX_INVALID;
}
if(result == LIBLEX_SUCCESS){
result = _pattern_chain_create(&current_chain, NULL, NULL);
(*index)++;
}
} else if(string[*index] == '.') {
pattern_node* new_node = NULL;
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
result = _pattern_chain_create(&sub_chain, NULL, NULL);
if(result == LIBLEX_SUCCESS){
result = _pattern_node_create_any(&new_node, (*ids)++, pattern_id, NULL);
}
if(result == LIBLEX_SUCCESS){
sub_chain->head = sub_chain->tail = new_node;
(*index)++;
} else {
free(new_node);
}
} else {
char new_char = '\0';
pattern_node* new_node = NULL;
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
result = _pattern_read_value(&new_char, string, index);
if(result == LIBLEX_SUCCESS){
result = _pattern_chain_create(&sub_chain, NULL, NULL);
}
if(result == LIBLEX_SUCCESS){
result = _pattern_node_create_value(&new_node, (*ids)++, pattern_id, new_char, NULL);
}
if(result == LIBLEX_SUCCESS){
sub_chain->head = sub_chain->tail = new_node;
} else {
free(new_node);
}
}
}
if(is_subchain && result == LIBLEX_SUCCESS && string[*index] != ')'){
result = LIBLEX_INVALID;
}
if(result == LIBLEX_SUCCESS){
_pattern_chain_append_chain_discard(current_chain, &sub_chain);
}
if(result == LIBLEX_SUCCESS && or_stack.head){
pattern_node* connect_node = NULL;
result = _pattern_node_create_connect(&connect_node, (*ids)++, pattern_id, NULL);
if(result == LIBLEX_SUCCESS){
_pattern_chain_append_node(current_chain, connect_node);
}
while(or_stack.head && result == LIBLEX_SUCCESS){
pattern_node* fork = NULL;
pattern_chain* new_chain = ll_pophead(&or_stack);
if(new_chain && new_chain->head){
result = _pattern_node_create_fork(&fork, (*ids)++, pattern_id, current_chain->head, new_chain->head);
if(result == LIBLEX_SUCCESS){
_pattern_chain_append_node(new_chain, connect_node);
current_chain->head = fork;
} else {
_pattern_free(new_chain->head, *ids);
}
free(new_chain);
}
}
}
if(result != LIBLEX_SUCCESS){
if(current_chain){
_pattern_free(current_chain->head, *ids);
}
free(current_chain);
if(sub_chain){
_pattern_free(sub_chain->head, *ids);
}
free(sub_chain);
while(or_stack.head){
pattern_chain* new_chain = ll_pophead(&or_stack);
if(new_chain){
_pattern_free(new_chain->head, *ids);
}
}
}
if(result == LIBLEX_SUCCESS){
*into = current_chain;
if(is_subchain) {
(*index)++;
}
}
return result;
}
liblex_result pattern_compile(pattern* ptrn, const char* expression, int id){
liblex_result result;
pattern_node* end_node = NULL;
pattern_chain* full_chain = NULL;
int index = -1;
int ids = 0;
result = _pattern_build_chain(&full_chain, &ids, id, expression, &index);
if(result == LIBLEX_SUCCESS){
result = _pattern_node_create_end(&end_node, ids, id);
}
if(result == LIBLEX_SUCCESS){
_pattern_chain_append_node(full_chain, end_node);
ptrn->head = full_chain->head;
ptrn->size = ids + 1;
free(full_chain);
} else {
ptrn->head = NULL;
ptrn->size = 0;
if(full_chain){
_pattern_free(full_chain->head, ids);
}
}
return result;
}
liblex_result pattern_free(pattern* ptrn){
return _pattern_free(ptrn->head, ptrn->size);
}