Implement lexing code.
This commit is contained in:
parent
065f2fa4b9
commit
0ff71a9389
|
@ -17,6 +17,35 @@ struct lexer {
|
|||
eval_config config;
|
||||
};
|
||||
|
||||
/**
|
||||
* A token that is produced by the lexer.
|
||||
*/
|
||||
struct lexer_match {
|
||||
/**
|
||||
* The line that this token was found on.
|
||||
*/
|
||||
size_t line;
|
||||
/**
|
||||
* The first index at which this token's string
|
||||
* begins.
|
||||
*/
|
||||
size_t from;
|
||||
/**
|
||||
* The index of the first character that is outside
|
||||
* this token.
|
||||
*/
|
||||
size_t to;
|
||||
/**
|
||||
* The index of the beginning of the line on which
|
||||
* this token is found.
|
||||
*/
|
||||
size_t line_from;
|
||||
/**
|
||||
* The type of token.
|
||||
*/
|
||||
int type;
|
||||
};
|
||||
|
||||
/**
|
||||
* The various tokens used by the lexer
|
||||
* in order to tag meaningful sequences
|
||||
|
@ -45,6 +74,7 @@ enum lexer_token {
|
|||
|
||||
typedef struct lexer lexer;
|
||||
typedef enum lexer_token lexer_token;
|
||||
typedef struct lexer_match lexer_match;
|
||||
|
||||
/**
|
||||
* Initializes the given lexer,
|
||||
|
@ -53,6 +83,14 @@ typedef enum lexer_token lexer_token;
|
|||
* @return the result of the operation (can be MALLOC on failed allocation.)
|
||||
*/
|
||||
libab_result lexer_init(lexer* lexer);
|
||||
/**
|
||||
* Turns the given input string into tokens.
|
||||
* @param lexer the lexer to use to turn the string into tokens.
|
||||
* @param string the string to turn into tokens.
|
||||
* @param lex_into the list which should be populated with matches.
|
||||
* @return the result of the operation.
|
||||
*/
|
||||
libab_result lexer_lex(lexer* lexer, const char* string, ll* lext_into);
|
||||
/**
|
||||
* Releases the memory associated with the given lexer,
|
||||
* removing all registered patterns from it.
|
||||
|
@ -60,5 +98,11 @@ libab_result lexer_init(lexer* lexer);
|
|||
* @return the result of the operation.
|
||||
*/
|
||||
libab_result lexer_free(lexer* lexer);
|
||||
/**
|
||||
* Function intended to be passed to "foreach" calls
|
||||
* in libds. lexer_lex allocates matches, and passing this function
|
||||
* to foreach will free the memory allocated for the matches.
|
||||
*/
|
||||
int lexer_foreach_match_free(void* data, va_list args);
|
||||
|
||||
#endif
|
||||
|
|
69
src/lexer.c
69
src/lexer.c
|
@ -1,5 +1,8 @@
|
|||
#include "lexer.h"
|
||||
#include "ll.h"
|
||||
#include "libabacus_util.h"
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
libab_result lexer_init(lexer* lexer) {
|
||||
size_t i;
|
||||
|
@ -46,6 +49,72 @@ libab_result lexer_init(lexer* lexer) {
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct lexer_state {
|
||||
size_t line;
|
||||
size_t line_from;
|
||||
const char* source;
|
||||
ll* matches;
|
||||
};
|
||||
int _lexer_foreach_convert_match(void* data, va_list args) {
|
||||
libab_result result = LIBAB_SUCCESS;
|
||||
lexer_match* new_match;
|
||||
match* match = data;
|
||||
struct lexer_state* state = va_arg(args, struct lexer_state*);
|
||||
char first_char = state->source[match->from];
|
||||
if(isspace(first_char)) {
|
||||
// Skip
|
||||
} else if(first_char == '\n') {
|
||||
state->line++;
|
||||
state->line_from = match->to;
|
||||
} if((new_match = malloc(sizeof(*new_match)))) {
|
||||
new_match->from = match->from;
|
||||
new_match->to = match->to;
|
||||
new_match->line_from = state->line_from;
|
||||
new_match->line = state->line;
|
||||
result = convert_ds_result(ll_append(state->matches, new_match));
|
||||
if(result != LIBAB_SUCCESS) {
|
||||
free(new_match);
|
||||
}
|
||||
} else {
|
||||
result = LIBAB_MALLOC;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
libab_result lexer_lex(lexer* lexer, const char* string, ll* lex_into) {
|
||||
libab_result result;
|
||||
ll raw_matches;
|
||||
struct lexer_state state;
|
||||
|
||||
ll_init(&raw_matches);
|
||||
|
||||
state.line = 0;
|
||||
state.line_from = 0;
|
||||
state.matches = lex_into;
|
||||
state.source = string;
|
||||
|
||||
result = convert_lex_result(
|
||||
eval_all(string, 0, &lexer->config, &raw_matches));
|
||||
|
||||
if(result == LIBAB_SUCCESS) {
|
||||
result = ll_foreach(&raw_matches, NULL, compare_always,
|
||||
_lexer_foreach_convert_match, &state);
|
||||
}
|
||||
|
||||
if(result != LIBAB_SUCCESS) {
|
||||
ll_foreach(lex_into, NULL, compare_always, lexer_foreach_match_free);
|
||||
}
|
||||
|
||||
ll_foreach(&raw_matches, NULL, compare_always, eval_foreach_match_free);
|
||||
ll_free(&raw_matches);
|
||||
|
||||
return result;
|
||||
}
|
||||
libab_result lexer_free(lexer* lexer) {
|
||||
return convert_lex_result(eval_config_free(&lexer->config));
|
||||
}
|
||||
int lexer_foreach_match_free(void* data, va_list args) {
|
||||
free((lexer_match*) data);
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user