From 0ff71a9389d0cbdde610b38758f0cd158df34408 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Sat, 10 Feb 2018 17:09:35 -0800 Subject: [PATCH] Implement lexing code. --- include/lexer.h | 44 +++++++++++++++++++++++++++++++ src/lexer.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/include/lexer.h b/include/lexer.h index 702ba5e..529375c 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -17,6 +17,35 @@ struct lexer { eval_config config; }; +/** + * A token that is produced by the lexer. + */ +struct lexer_match { + /** + * The line that this token was found on. + */ + size_t line; + /** + * The first index at which this token's string + * begins. + */ + size_t from; + /** + * The index of the first character that is outside + * this token. + */ + size_t to; + /** + * The index of the beginning of the line on which + * this token is found. + */ + size_t line_from; + /** + * The type of token. + */ + int type; +}; + /** * The various tokens used by the lexer * in order to tag meaningful sequences @@ -45,6 +74,7 @@ enum lexer_token { typedef struct lexer lexer; typedef enum lexer_token lexer_token; +typedef struct lexer_match lexer_match; /** * Initializes the given lexer, @@ -53,6 +83,14 @@ typedef enum lexer_token lexer_token; * @return the result of the operation (can be MALLOC on failed allocation.) */ libab_result lexer_init(lexer* lexer); +/** + * Turns the given input string into tokens. + * @param lexer the lexer to use to turn the string into tokens. + * @param string the string to turn into tokens. + * @param lex_into the list which should be populated with matches. + * @return the result of the operation. + */ +libab_result lexer_lex(lexer* lexer, const char* string, ll* lext_into); /** * Releases the memory associated with the given lexer, * removing all registered patterns from it. @@ -60,5 +98,11 @@ libab_result lexer_init(lexer* lexer); * @return the result of the operation. */ libab_result lexer_free(lexer* lexer); +/** + * Function intended to be passed to "foreach" calls + * in libds. lexer_lex allocates matches, and passing this function + * to foreach will free the memory allocated for the matches. + */ +int lexer_foreach_match_free(void* data, va_list args); #endif diff --git a/src/lexer.c b/src/lexer.c index 494487e..7129fec 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,5 +1,8 @@ #include "lexer.h" +#include "ll.h" #include "libabacus_util.h" +#include +#include libab_result lexer_init(lexer* lexer) { size_t i; @@ -46,6 +49,72 @@ libab_result lexer_init(lexer* lexer) { return result; } + +struct lexer_state { + size_t line; + size_t line_from; + const char* source; + ll* matches; +}; +int _lexer_foreach_convert_match(void* data, va_list args) { + libab_result result = LIBAB_SUCCESS; + lexer_match* new_match; + match* match = data; + struct lexer_state* state = va_arg(args, struct lexer_state*); + char first_char = state->source[match->from]; + if(isspace(first_char)) { + // Skip + } else if(first_char == '\n') { + state->line++; + state->line_from = match->to; + } if((new_match = malloc(sizeof(*new_match)))) { + new_match->from = match->from; + new_match->to = match->to; + new_match->line_from = state->line_from; + new_match->line = state->line; + result = convert_ds_result(ll_append(state->matches, new_match)); + if(result != LIBAB_SUCCESS) { + free(new_match); + } + } else { + result = LIBAB_MALLOC; + } + return result; +} + +libab_result lexer_lex(lexer* lexer, const char* string, ll* lex_into) { + libab_result result; + ll raw_matches; + struct lexer_state state; + + ll_init(&raw_matches); + + state.line = 0; + state.line_from = 0; + state.matches = lex_into; + state.source = string; + + result = convert_lex_result( + eval_all(string, 0, &lexer->config, &raw_matches)); + + if(result == LIBAB_SUCCESS) { + result = ll_foreach(&raw_matches, NULL, compare_always, + _lexer_foreach_convert_match, &state); + } + + if(result != LIBAB_SUCCESS) { + ll_foreach(lex_into, NULL, compare_always, lexer_foreach_match_free); + } + + ll_foreach(&raw_matches, NULL, compare_always, eval_foreach_match_free); + ll_free(&raw_matches); + + return result; +} libab_result lexer_free(lexer* lexer) { return convert_lex_result(eval_config_free(&lexer->config)); } +int lexer_foreach_match_free(void* data, va_list args) { + free((lexer_match*) data); + return 0; +}