Write initial code for matching the patterns to a single string.

The code is limited in two aspects, at the moment: * It's not very well tested * The current "best" match is just the one that occurred last. While this is just fine in terms of length, this leaves open to chance which pattern type will be returned if multiple patterns of the same length match.
2017-02-04 00:28:36 -08:00 · 2017-02-04 00:28:36 -08:00 · 0b6d73ebdf
commit 0b6d73ebdf
parent d753bbe1ab
3 changed files with 306 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 2.7)
 project(liblex)

-add_library(lex STATIC src/pattern.c src/pairmap.c)
+add_library(lex STATIC src/pattern.c src/pairmap.c src/eval.c)
 add_executable(liblex src/main.c)
 add_subdirectory(external/libds)

--- a/include/eval.h
+++ b/include/eval.h
@ -0,0 +1,119 @@
+#ifndef LIBLEX_EVAL_H
+#define LIBLEX_EVAL_H
+
+#include "pattern.h"
+#include "pairmap.h"
+#include "ll.h"
+
+/**
+ * A pattern match.
+ * During lexing / evaluation, structs of this type
+ * are used to represent "matched" hypohteses.
+ */
+struct match_s {
+  int from;
+  int to;
+  int pattern;
+};
+
+/**
+ * Used in matching a single word / lexeme / etc
+ * from some input.
+ */
+struct eval_s {
+  /**
+   * The input being evaluated.
+   */
+  char* string;
+  /**
+   * The current index in the input.
+   */
+  int index;
+  /**
+   * The index where the valuation began.
+   */
+  int begin;
+  /**
+   * The number of nodes / states matched in
+   * the last iteration.
+   */
+  int matched;
+  /**
+   * Set A of the two sets of pattern nodes / states.
+   */
+  ht set_a;
+  /**
+   * Set B of the two sets of pattern nodes / states.
+   */
+  ht set_b;
+  /**
+   * The matches that were found so far.
+   */
+  ll matches;
+
+  /**
+   * The current set of states.
+   */
+  ht* set_current;
+  /**
+   * The next set of states.
+   */
+  ht* set_next;
+};
+
+/**
+ * Configuration for evaluation.
+ * This contains a list of states that should be started with when
+ * the evaluation of a single token begins.
+ */
+struct eval_config_s {
+  /**
+   * The inital list of states to be checked.
+   */
+  ll states;
+};
+
+typedef struct match_s match;
+typedef struct eval_s eval;
+typedef struct eval_config_s eval_config;
+
+/**
+ * Initializes the evaluation configuration.
+ * @param config the configuration to initialize.
+ */
+void eval_config_init(eval_config* config);
+/**
+ * Frees data allocated by an evaluation configuration.
+ * @param config the configuration file to free.
+ * @return LIBLEX_SUCCESS if all goes well, or LIBLEX_MALLC if there was an allocaton failure.
+ */
+liblex_result eval_config_free(eval_config* config);
+/**
+ * Adds a new pattern to the configuration, with a given pattern ID.
+ * @param config the configuration to add the pattern to.
+ * @param pattern the pattern to add
+ * @param pattern_id the id to associate with the pattern
+ * @return LIBLEX_SUCCESS if all goes well, or LIBLEX_MALLOC if there was an allocation failure.
+ */
+liblex_result eval_config_add(eval_config* config, char* pattern, int pattern_id);
+/**
+ * Evaluates / finds a single word.
+ * @param string the string to evaluate.
+ * @param index the index to start at.
+ * @param config the configuration to use
+ * @param match pointer to where to store the newly created match.
+ * @return LIBLEX_SUCCESS if all goes well, or LIBLEX_MALLOC if there was an allocation failure.
+ */
+liblex_result eval_word(char* string, int index, eval_config* config, match* match);
+/**
+ * Evaluates input starting at the index until it reaches the null terminator,
+ * adding the best matches to the linked list.
+ * @param string the string to evaluate.
+ * @param index the index to start at
+ * @param config the config to use
+ * @param matches the linked list to populate with matches
+ * @return LIBLEX_SUCCESS if all goes well, or LIBLEX_MALLOC if there was an allocation failure.
+ */
+liblex_result eval_all(char* string, int index, eval_config* config, ll* matches);
+
+#endif
--- a/src/eval.c
+++ b/src/eval.c
@ -0,0 +1,186 @@
+#include "eval.h"
+#include <stdlib.h>
+#include <pattern.h>
+#include "pattern.h"
+#include "pairmap.h"
+
+#define EVAL_FOREACH_MALLOC 1
+
+static liblex_result foreach_errors[2] = {
+  LIBLEX_SUCCESS,
+  LIBLEX_MALLOC
+};
+
+void eval_config_init(eval_config* config){
+  ll_init(&config->states);
+}
+liblex_result eval_config_add(eval_config* config, char* ptrn, int pattern_id){
+  liblex_result result;
+  pattern* new_pattern = malloc(sizeof(*new_pattern));
+  if(new_pattern){
+    result = pattern_compile(new_pattern, ptrn, pattern_id);
+    if(result == LIBLEX_SUCCESS){
+      result = (ll_append(&config->states, new_pattern) == LIBDS_SUCCESS) ?
+        LIBLEX_SUCCESS : LIBLEX_MALLOC;
+    }
+    if(result != LIBLEX_SUCCESS){
+      pattern_free(new_pattern);
+      free(new_pattern);
+    }
+  } else {
+    result = LIBLEX_MALLOC;
+  }
+  return result;
+}
+
+int eval_foreach_match_free(void* data, va_list args) {
+  free(data);
+  return 0;
+}
+
+int eval_config_foreach_free(void* data, va_list args){
+  pattern* ptrn = data;
+  int result = pattern_free(ptrn) == LIBLEX_SUCCESS ? 0 : EVAL_FOREACH_MALLOC;
+  free(ptrn);
+  return result;
+}
+
+liblex_result eval_config_free(eval_config* config){
+  liblex_result result = foreach_errors[ll_foreach(&config->states, NULL, compare_always, eval_config_foreach_free)];
+  ll_free(&config->states);
+  return result;
+}
+
+pattern_node* eval_pattern_node_get_next(pattern_node* node){
+  pattern_node* to_return = NULL;
+  if(node->type == PNODE_VALUE){
+    to_return = node->data_u.value_s.next;
+  } else if(node->type == PNODE_ANY){
+    to_return = node->data_u.any_s.next;
+  } else if(node->type == PNODE_RANGE){
+    to_return = node->data_u.range_s.next;
+  } else if(node->type == PNODE_CONNECT){
+    to_return = node->data_u.connect_s.next;
+  }
+  return to_return;
+}
+
+liblex_result eval_pairmap_add_node(ht* table, pattern_node* node){
+  liblex_result result = LIBLEX_SUCCESS;
+
+  if(node->type == PNODE_ANY || node->type == PNODE_VALUE || node->type == PNODE_RANGE ||
+      node->type == PNODE_END){
+    pairmap_key tmp_key;
+    result = PAIRMAP_PUT(table, &tmp_key, node->pattern_id, node->id, node) == LIBDS_SUCCESS ? LIBLEX_SUCCESS : LIBLEX_MALLOC;
+  } else if(node->type == PNODE_CONNECT){
+    result = eval_pairmap_add_node(table, eval_pattern_node_get_next(node));
+  } else if(node->type == PNODE_FORK){
+    result = eval_pairmap_add_node(table, node->data_u.fork_s.left);
+    if(result == LIBDS_SUCCESS){ 
+      result = eval_pairmap_add_node(table, node->data_u.fork_s.right);
+    }
+  }
+  return result;
+}
+
+int eval_foreach_add_node(void* data, va_list args){
+  pattern* new_pattern = data;
+  ht* pairmap = va_arg(args, ht*);
+  pattern_node* pattern_head = new_pattern->head;
+  return eval_pairmap_add_node(pairmap, pattern_head) == LIBLEX_SUCCESS? 0 : EVAL_FOREACH_MALLOC; 
+}
+
+int eval_node_matches(pattern_node* node, eval* eval){
+  int matches = 0;
+  char value = eval->string[eval->index];
+  if(node->type == PNODE_ANY){
+    matches = value != '\0';
+  } else if(node->type == PNODE_VALUE){
+    matches = value == node->data_u.value_s.value;
+  } else if(node->type == PNODE_RANGE) {
+    matches = value >= node->data_u.range_s.from && value <= node->data_u.range_s.to;
+  }
+  return matches;
+}
+
+int eval_foreach_check_state(void* data, va_list args){
+  int return_code = 0;
+  pattern_node* node = data;
+  eval* evl = va_arg(args, eval*);
+  if(eval_node_matches(node, evl)){
+    return_code = eval_pairmap_add_node(evl->set_next, eval_pattern_node_get_next(node)) == LIBLEX_SUCCESS ? 0 : EVAL_FOREACH_MALLOC;
+    if(return_code == 0){
+      evl->matched++;
+    }
+  } else if(node->type == PNODE_END){
+    match* new_match = malloc(sizeof(*new_match));
+    if(new_match){
+      new_match->from = evl->begin;
+      new_match->to = evl->index;
+      new_match->pattern = node->pattern_id;
+
+      return_code = ll_append(&evl->matches, new_match) == LIBLEX_SUCCESS ? 0 : EVAL_FOREACH_MALLOC;
+      if(return_code){
+        free(new_match);
+      }
+    } else {
+      return_code = EVAL_FOREACH_MALLOC;
+    }
+  }
+  return return_code;
+}
+
+liblex_result eval_step(eval* eval){
+  ht* tmp;
+  eval->matched = 0;
+  liblex_result result = foreach_errors[ht_foreach(eval->set_current, NULL, compare_always, eval_foreach_check_state, eval)];
+
+  tmp = eval->set_current;
+  eval->set_current = eval->set_next;
+  eval->set_next = tmp;
+  eval->index++;
+
+  ht_free(eval->set_next);
+  pairmap_init_ht(eval->set_next);
+  return result;
+}
+
+liblex_result eval_word(char* string, int index, eval_config* config, match* mtch){
+  liblex_result result = LIBLEX_SUCCESS;
+  eval evl;
+  evl.index = evl.begin = index;
+  evl.string = string;
+  evl.matched = 0;
+  pairmap_init_ht(&evl.set_a);
+  pairmap_init_ht(&evl.set_b);
+  ll_init(&evl.matches);
+  evl.set_current = &evl.set_a;
+  evl.set_next = &evl.set_b;
+
+  ll_foreach(&config->states, NULL, compare_always, eval_foreach_add_node, evl.set_current);
+  do {
+    eval_step(&evl);
+  } while(evl.matched);
+
+  if(evl.matches.tail){
+    match* largest_match = evl.matches.tail->data;
+    mtch->from = largest_match->from;
+    mtch->to = largest_match->to;
+    mtch->pattern = mtch->pattern;
+  } else {
+    mtch->from = -1;
+    mtch->to = -1;
+    mtch->pattern = -1;
+  }
+
+  ll_foreach(&evl.matches, NULL, compare_always, eval_foreach_match_free);
+  ll_free(&evl.matches);
+  ht_free(&evl.set_a);
+  ht_free(&evl.set_b);
+
+  return result;
+}
+liblex_result eval_all(char* string, int index, eval_config* config, ll* matches){
+  liblex_result result = LIBLEX_SUCCESS;
+  return result;
+}