Implement matching strings + finding groups.
They're done in the same step. result = _regex_find_all(root, &clear_ll, -1); ll_clear(&clear_ll); Is all that's necessary to reset all nodes, since the only value that changes during the match is list_id, and that gets set when the nodes are found.
This commit is contained in:
		
							parent
							
								
									06fe67b226
								
							
						
					
					
						commit
						fbb0635b26
					
				| @ -300,6 +300,7 @@ void regex_node_clear(regex_node* node); | ||||
| /**
 | ||||
|  * Frees an entire NFA, starting from the given root node. | ||||
|  * @param root the root, or starting node, of the NFA. | ||||
|  * @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code. | ||||
|  */ | ||||
| libregex_result regex_free(regex_node* root); | ||||
| /**
 | ||||
| @ -317,5 +318,11 @@ libregex_result regex_build(regex_node** root, char* expression); | ||||
|  * @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code. | ||||
|  */ | ||||
| libregex_result regex_match_string(regex_node* root, char* string, regex_result* result); | ||||
| /**
 | ||||
|  * Frees data used by regex_match_string in a regex_result struct. | ||||
|  * The actual struct is not freed. | ||||
|  * @param result the result struct | ||||
|  */ | ||||
| void regex_result_free(regex_result* result); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										124
									
								
								src/libregex.c
									
									
									
									
									
								
							
							
						
						
									
										124
									
								
								src/libregex.c
									
									
									
									
									
								
							| @ -1,6 +1,7 @@ | ||||
| #include "libregex.h"#include "libregex.h" | ||||
| #include "libregex.h" | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <libregex.h> | ||||
| #include "ll.h" | ||||
| 
 | ||||
| int _regex_node_foreach_free(void* data, va_list args){ | ||||
| @ -477,4 +478,123 @@ libregex_result regex_build(regex_node** root, char* expression){ | ||||
| 
 | ||||
|   return result; | ||||
| } | ||||
| libregex_result regex_match_string(regex_node* root, char* string, regex_result* result); | ||||
| 
 | ||||
| int _regex_node_matches(regex_node* node, char c){ | ||||
|   int matches = 0; | ||||
|   if(node->type == REGEX_VALUE){ | ||||
|     matches = node->data_u.value_s.value == c; | ||||
|   } else if(node->type == REGEX_RANGE){ | ||||
|     matches = node->data_u.range_s.from <= c && c <= node->data_u.range_s.to; | ||||
|   } else if(node->type == REGEX_ANY){ | ||||
|     matches = c != '\0'; | ||||
|   } | ||||
|   return matches; | ||||
| } | ||||
| 
 | ||||
| void _regex_node_add(regex_node* node, regex_list* list){ | ||||
|   if(node && node->list_id < list->id && list->size < LIBREGEX_MAX_NODE_COUNT){ | ||||
|     node->list_id = list->id; | ||||
|     if(node->type == REGEX_CONNECT){ | ||||
|       _regex_node_add(node->data_u.connect_s.next, list); | ||||
|     } else if(node->type == REGEX_FORK){ | ||||
|       _regex_node_add(node->data_u.fork_s.left, list); | ||||
|       _regex_node_add(node->data_u.fork_s.right, list); | ||||
|     } else if(!(node->type == REGEX_CLEAR)){ | ||||
|       if(node->type == REGEX_GROUP){ | ||||
|         _regex_node_add(node->data_u.group_s.next, list); | ||||
|       } | ||||
|       list->nodes[list->size++] = node; | ||||
|     } | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| libregex_result _regex_step(regex_sim* sim){ | ||||
|   libregex_result result = LIBREGEX_SUCCESS; | ||||
|   regex_list* swap_temp; | ||||
|   int index = 0; | ||||
|   for(; index < sim->current->size; index++){ | ||||
|     regex_node* current = sim->current->nodes[index]; | ||||
|     if(_regex_node_matches(current, sim->string[sim->index])){ | ||||
|       regex_node** next = _regex_node_get_next(current); | ||||
|       if(next){ | ||||
|         _regex_node_add(*next, sim->next); | ||||
|       } | ||||
|     } else if(current->type == REGEX_END){ | ||||
|       sim->result->matches = 1; | ||||
|     } else if(current->type == REGEX_GROUP && sim->string[sim->index - 1 + current->data_u.group_s.open] != '\0'){ | ||||
|       int* to_set = (current->data_u.group_s.open) ? | ||||
|                     &(sim->groups[current->data_u.group_s.id - 1].from) : | ||||
|                     &(sim->groups[current->data_u.group_s.id - 1].to); | ||||
|       *to_set = sim->index - 1 + current->data_u.group_s.open; | ||||
| 
 | ||||
|       if(current->data_u.group_s.open == 0 && current->data_u.group_s.id < LIBREGEX_MAX_GROUP_COUNT){ | ||||
|         regex_match* new_match = sim->result->groups[current->data_u.group_s.id - 1]; | ||||
|         if(new_match == NULL){ | ||||
|           new_match = malloc(sizeof(*new_match)); | ||||
|         } | ||||
| 
 | ||||
|         if(new_match){ | ||||
|           new_match->from = sim->groups[current->data_u.group_s.id - 1].from; | ||||
|           new_match->to = sim->groups[current->data_u.group_s.id - 1].to; | ||||
|           sim->result->groups[current->data_u.group_s.id - 1] = new_match; | ||||
|         } else { | ||||
|           result = LIBREGEX_MALLOC; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   swap_temp = sim->current; | ||||
|   sim->current = sim->next; | ||||
|   sim->next = swap_temp; | ||||
| 
 | ||||
|   sim->next->size = 0; | ||||
|   sim->next->id += 2; | ||||
| 
 | ||||
|   return result; | ||||
| } | ||||
| 
 | ||||
| libregex_result regex_match_string(regex_node* root, char* string, regex_result* build_result){ | ||||
|   ll clear_ll; | ||||
|   libregex_result result; | ||||
|   regex_sim sim; | ||||
|   regex_list list_a; | ||||
|   regex_list list_b; | ||||
| 
 | ||||
|   list_a.size = 0; | ||||
|   list_a.id = 0; | ||||
| 
 | ||||
|   list_b.size = 0; | ||||
|   list_b.id = 1; | ||||
| 
 | ||||
|   build_result->matches = 0; | ||||
|   memset(&build_result->groups, 0, sizeof(build_result->groups)); | ||||
| 
 | ||||
|   memset(&sim.groups, 0, sizeof(sim.groups)); | ||||
|   sim.current = &list_a; | ||||
|   sim.next = &list_b; | ||||
|   sim.result = build_result; | ||||
|   sim.string = string; | ||||
|   sim.index = 0; | ||||
| 
 | ||||
|   _regex_node_add(root, &list_a); | ||||
| 
 | ||||
|   do { | ||||
|     result = _regex_step(&sim); | ||||
|   } while(sim.string[sim.index++] != '\0' && result == LIBREGEX_SUCCESS); | ||||
| 
 | ||||
|   if(result == LIBREGEX_SUCCESS){ | ||||
|     ll_init(&clear_ll); | ||||
|     result = _regex_find_all(root, &clear_ll, -1); | ||||
|     ll_clear(&clear_ll); | ||||
|   } | ||||
| 
 | ||||
|   return result; | ||||
| } | ||||
| void regex_result_free(regex_result* result){ | ||||
|   int index = 0; | ||||
|   for(; index < LIBREGEX_MAX_GROUP_COUNT; index++){ | ||||
|     free(result->groups[index]); | ||||
|   } | ||||
|   result->matches = 0; | ||||
| } | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user