Implement matching strings + finding groups.
They're done in the same step. result = _regex_find_all(root, &clear_ll, -1); ll_clear(&clear_ll); Is all that's necessary to reset all nodes, since the only value that changes during the match is list_id, and that gets set when the nodes are found.
This commit is contained in:
		
							parent
							
								
									06fe67b226
								
							
						
					
					
						commit
						fbb0635b26
					
				| @ -300,6 +300,7 @@ void regex_node_clear(regex_node* node); | |||||||
| /**
 | /**
 | ||||||
|  * Frees an entire NFA, starting from the given root node. |  * Frees an entire NFA, starting from the given root node. | ||||||
|  * @param root the root, or starting node, of the NFA. |  * @param root the root, or starting node, of the NFA. | ||||||
|  |  * @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code. | ||||||
|  */ |  */ | ||||||
| libregex_result regex_free(regex_node* root); | libregex_result regex_free(regex_node* root); | ||||||
| /**
 | /**
 | ||||||
| @ -317,5 +318,11 @@ libregex_result regex_build(regex_node** root, char* expression); | |||||||
|  * @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code. |  * @return the result of the operation: LIBREGEX_SUCCESS if all goes well, or an error code. | ||||||
|  */ |  */ | ||||||
| libregex_result regex_match_string(regex_node* root, char* string, regex_result* result); | libregex_result regex_match_string(regex_node* root, char* string, regex_result* result); | ||||||
|  | /**
 | ||||||
|  |  * Frees data used by regex_match_string in a regex_result struct. | ||||||
|  |  * The actual struct is not freed. | ||||||
|  |  * @param result the result struct | ||||||
|  |  */ | ||||||
|  | void regex_result_free(regex_result* result); | ||||||
| 
 | 
 | ||||||
| #endif | #endif | ||||||
							
								
								
									
										124
									
								
								src/libregex.c
									
									
									
									
									
								
							
							
						
						
									
										124
									
								
								src/libregex.c
									
									
									
									
									
								
							| @ -1,6 +1,7 @@ | |||||||
| #include "libregex.h"#include "libregex.h" | #include "libregex.h" | ||||||
| #include <stdlib.h> | #include <stdlib.h> | ||||||
| #include <string.h> | #include <string.h> | ||||||
|  | #include <libregex.h> | ||||||
| #include "ll.h" | #include "ll.h" | ||||||
| 
 | 
 | ||||||
| int _regex_node_foreach_free(void* data, va_list args){ | int _regex_node_foreach_free(void* data, va_list args){ | ||||||
| @ -477,4 +478,123 @@ libregex_result regex_build(regex_node** root, char* expression){ | |||||||
| 
 | 
 | ||||||
|   return result; |   return result; | ||||||
| } | } | ||||||
| libregex_result regex_match_string(regex_node* root, char* string, regex_result* result); | 
 | ||||||
|  | int _regex_node_matches(regex_node* node, char c){ | ||||||
|  |   int matches = 0; | ||||||
|  |   if(node->type == REGEX_VALUE){ | ||||||
|  |     matches = node->data_u.value_s.value == c; | ||||||
|  |   } else if(node->type == REGEX_RANGE){ | ||||||
|  |     matches = node->data_u.range_s.from <= c && c <= node->data_u.range_s.to; | ||||||
|  |   } else if(node->type == REGEX_ANY){ | ||||||
|  |     matches = c != '\0'; | ||||||
|  |   } | ||||||
|  |   return matches; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void _regex_node_add(regex_node* node, regex_list* list){ | ||||||
|  |   if(node && node->list_id < list->id && list->size < LIBREGEX_MAX_NODE_COUNT){ | ||||||
|  |     node->list_id = list->id; | ||||||
|  |     if(node->type == REGEX_CONNECT){ | ||||||
|  |       _regex_node_add(node->data_u.connect_s.next, list); | ||||||
|  |     } else if(node->type == REGEX_FORK){ | ||||||
|  |       _regex_node_add(node->data_u.fork_s.left, list); | ||||||
|  |       _regex_node_add(node->data_u.fork_s.right, list); | ||||||
|  |     } else if(!(node->type == REGEX_CLEAR)){ | ||||||
|  |       if(node->type == REGEX_GROUP){ | ||||||
|  |         _regex_node_add(node->data_u.group_s.next, list); | ||||||
|  |       } | ||||||
|  |       list->nodes[list->size++] = node; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | libregex_result _regex_step(regex_sim* sim){ | ||||||
|  |   libregex_result result = LIBREGEX_SUCCESS; | ||||||
|  |   regex_list* swap_temp; | ||||||
|  |   int index = 0; | ||||||
|  |   for(; index < sim->current->size; index++){ | ||||||
|  |     regex_node* current = sim->current->nodes[index]; | ||||||
|  |     if(_regex_node_matches(current, sim->string[sim->index])){ | ||||||
|  |       regex_node** next = _regex_node_get_next(current); | ||||||
|  |       if(next){ | ||||||
|  |         _regex_node_add(*next, sim->next); | ||||||
|  |       } | ||||||
|  |     } else if(current->type == REGEX_END){ | ||||||
|  |       sim->result->matches = 1; | ||||||
|  |     } else if(current->type == REGEX_GROUP && sim->string[sim->index - 1 + current->data_u.group_s.open] != '\0'){ | ||||||
|  |       int* to_set = (current->data_u.group_s.open) ? | ||||||
|  |                     &(sim->groups[current->data_u.group_s.id - 1].from) : | ||||||
|  |                     &(sim->groups[current->data_u.group_s.id - 1].to); | ||||||
|  |       *to_set = sim->index - 1 + current->data_u.group_s.open; | ||||||
|  | 
 | ||||||
|  |       if(current->data_u.group_s.open == 0 && current->data_u.group_s.id < LIBREGEX_MAX_GROUP_COUNT){ | ||||||
|  |         regex_match* new_match = sim->result->groups[current->data_u.group_s.id - 1]; | ||||||
|  |         if(new_match == NULL){ | ||||||
|  |           new_match = malloc(sizeof(*new_match)); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if(new_match){ | ||||||
|  |           new_match->from = sim->groups[current->data_u.group_s.id - 1].from; | ||||||
|  |           new_match->to = sim->groups[current->data_u.group_s.id - 1].to; | ||||||
|  |           sim->result->groups[current->data_u.group_s.id - 1] = new_match; | ||||||
|  |         } else { | ||||||
|  |           result = LIBREGEX_MALLOC; | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   swap_temp = sim->current; | ||||||
|  |   sim->current = sim->next; | ||||||
|  |   sim->next = swap_temp; | ||||||
|  | 
 | ||||||
|  |   sim->next->size = 0; | ||||||
|  |   sim->next->id += 2; | ||||||
|  | 
 | ||||||
|  |   return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | libregex_result regex_match_string(regex_node* root, char* string, regex_result* build_result){ | ||||||
|  |   ll clear_ll; | ||||||
|  |   libregex_result result; | ||||||
|  |   regex_sim sim; | ||||||
|  |   regex_list list_a; | ||||||
|  |   regex_list list_b; | ||||||
|  | 
 | ||||||
|  |   list_a.size = 0; | ||||||
|  |   list_a.id = 0; | ||||||
|  | 
 | ||||||
|  |   list_b.size = 0; | ||||||
|  |   list_b.id = 1; | ||||||
|  | 
 | ||||||
|  |   build_result->matches = 0; | ||||||
|  |   memset(&build_result->groups, 0, sizeof(build_result->groups)); | ||||||
|  | 
 | ||||||
|  |   memset(&sim.groups, 0, sizeof(sim.groups)); | ||||||
|  |   sim.current = &list_a; | ||||||
|  |   sim.next = &list_b; | ||||||
|  |   sim.result = build_result; | ||||||
|  |   sim.string = string; | ||||||
|  |   sim.index = 0; | ||||||
|  | 
 | ||||||
|  |   _regex_node_add(root, &list_a); | ||||||
|  | 
 | ||||||
|  |   do { | ||||||
|  |     result = _regex_step(&sim); | ||||||
|  |   } while(sim.string[sim.index++] != '\0' && result == LIBREGEX_SUCCESS); | ||||||
|  | 
 | ||||||
|  |   if(result == LIBREGEX_SUCCESS){ | ||||||
|  |     ll_init(&clear_ll); | ||||||
|  |     result = _regex_find_all(root, &clear_ll, -1); | ||||||
|  |     ll_clear(&clear_ll); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   return result; | ||||||
|  | } | ||||||
|  | void regex_result_free(regex_result* result){ | ||||||
|  |   int index = 0; | ||||||
|  |   for(; index < LIBREGEX_MAX_GROUP_COUNT; index++){ | ||||||
|  |     free(result->groups[index]); | ||||||
|  |   } | ||||||
|  |   result->matches = 0; | ||||||
|  | } | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user