Implement matching strings + finding groups.

They're done in the same step. result = _regex_find_all(root, &clear_ll, -1); ll_clear(&clear_ll); Is all that's necessary to reset all nodes, since the only value that changes during the match is list_id, and that gets set when the nodes are found.
2017-01-08 00:04:31 -08:00
parent 06fe67b226
commit fbb0635b26
2 changed files with 129 additions and 2 deletions
--- a/include/libregex.h
+++ b/include/libregex.h
@@ -300,6 +300,7 @@ void regex_node_clear(regex_node* node);
 /**
 * Frees an entire NFA, starting from the given root node.
 * @param root the root, or starting node, of the NFA.
+ * @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code.
 */
 libregex_result regex_free(regex_node* root);
 /**
@@ -317,5 +318,11 @@ libregex_result regex_build(regex_node** root, char* expression);
 * @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code.
 */
 libregex_result regex_match_string(regex_node* root, char* string, regex_result* result);
+/**
+ * Frees data used by regex_match_string in a regex_result struct.
+ * The actual struct is not freed.
+ * @param result the result struct
+ */
+void regex_result_free(regex_result* result);

 #endif
--- a/src/libregex.c
+++ b/src/libregex.c
@@ -1,6 +1,7 @@
-#include "libregex.h"#include "libregex.h"
+#include "libregex.h"
 #include <stdlib.h>
 #include <string.h>
+#include <libregex.h>
 #include "ll.h"

 int _regex_node_foreach_free(void* data, va_list args){
@@ -477,4 +478,123 @@ libregex_result regex_build(regex_node** root, char* expression){

  return result;
 }
-libregex_result regex_match_string(regex_node* root, char* string, regex_result* result);
+
+int _regex_node_matches(regex_node* node, char c){
+  int matches = 0;
+  if(node->type == REGEX_VALUE){
+    matches = node->data_u.value_s.value == c;
+  } else if(node->type == REGEX_RANGE){
+    matches = node->data_u.range_s.from <= c && c <= node->data_u.range_s.to;
+  } else if(node->type == REGEX_ANY){
+    matches = c != '\0';
+  }
+  return matches;
+}
+
+void _regex_node_add(regex_node* node, regex_list* list){
+  if(node && node->list_id < list->id && list->size < LIBREGEX_MAX_NODE_COUNT){
+    node->list_id = list->id;
+    if(node->type == REGEX_CONNECT){
+      _regex_node_add(node->data_u.connect_s.next, list);
+    } else if(node->type == REGEX_FORK){
+      _regex_node_add(node->data_u.fork_s.left, list);
+      _regex_node_add(node->data_u.fork_s.right, list);
+    } else if(!(node->type == REGEX_CLEAR)){
+      if(node->type == REGEX_GROUP){
+        _regex_node_add(node->data_u.group_s.next, list);
+      }
+      list->nodes[list->size++] = node;
+    }
+  }
+}
+
+libregex_result _regex_step(regex_sim* sim){
+  libregex_result result = LIBREGEX_SUCCESS;
+  regex_list* swap_temp;
+  int index = 0;
+  for(; index < sim->current->size; index++){
+    regex_node* current = sim->current->nodes[index];
+    if(_regex_node_matches(current, sim->string[sim->index])){
+      regex_node** next = _regex_node_get_next(current);
+      if(next){
+        _regex_node_add(*next, sim->next);
+      }
+    } else if(current->type == REGEX_END){
+      sim->result->matches = 1;
+    } else if(current->type == REGEX_GROUP && sim->string[sim->index - 1 + current->data_u.group_s.open] != '\0'){
+      int* to_set = (current->data_u.group_s.open) ?
+                    &(sim->groups[current->data_u.group_s.id - 1].from) :
+                    &(sim->groups[current->data_u.group_s.id - 1].to);
+      *to_set = sim->index - 1 + current->data_u.group_s.open;
+
+      if(current->data_u.group_s.open == 0 && current->data_u.group_s.id < LIBREGEX_MAX_GROUP_COUNT){
+        regex_match* new_match = sim->result->groups[current->data_u.group_s.id - 1];
+        if(new_match == NULL){
+          new_match = malloc(sizeof(*new_match));
+        }
+
+        if(new_match){
+          new_match->from = sim->groups[current->data_u.group_s.id - 1].from;
+          new_match->to = sim->groups[current->data_u.group_s.id - 1].to;
+          sim->result->groups[current->data_u.group_s.id - 1] = new_match;
+        } else {
+          result = LIBREGEX_MALLOC;
+        }
+      }
+    }
+  }
+
+  swap_temp = sim->current;
+  sim->current = sim->next;
+  sim->next = swap_temp;
+
+  sim->next->size = 0;
+  sim->next->id += 2;
+
+  return result;
+}
+
+libregex_result regex_match_string(regex_node* root, char* string, regex_result* build_result){
+  ll clear_ll;
+  libregex_result result;
+  regex_sim sim;
+  regex_list list_a;
+  regex_list list_b;
+
+  list_a.size = 0;
+  list_a.id = 0;
+
+  list_b.size = 0;
+  list_b.id = 1;
+
+  build_result->matches = 0;
+  memset(&build_result->groups, 0, sizeof(build_result->groups));
+
+  memset(&sim.groups, 0, sizeof(sim.groups));
+  sim.current = &list_a;
+  sim.next = &list_b;
+  sim.result = build_result;
+  sim.string = string;
+  sim.index = 0;
+
+  _regex_node_add(root, &list_a);
+
+  do {
+    result = _regex_step(&sim);
+  } while(sim.string[sim.index++] != '\0' && result == LIBREGEX_SUCCESS);
+
+  if(result == LIBREGEX_SUCCESS){
+    ll_init(&clear_ll);
+    result = _regex_find_all(root, &clear_ll, -1);
+    ll_clear(&clear_ll);
+  }
+
+  return result;
+}
+void regex_result_free(regex_result* result){
+  int index = 0;
+  for(; index < LIBREGEX_MAX_GROUP_COUNT; index++){
+    free(result->groups[index]);
+  }
+  result->matches = 0;
+}