Implement lexing code.

This commit is contained in:
Danila Fedorin 2018-02-10 17:09:35 -08:00
parent 065f2fa4b9
commit 0ff71a9389
2 changed files with 113 additions and 0 deletions

View File

@ -17,6 +17,35 @@ struct lexer {
eval_config config;
};
/**
* A token that is produced by the lexer.
*/
struct lexer_match {
/**
* The line that this token was found on.
*/
size_t line;
/**
* The first index at which this token's string
* begins.
*/
size_t from;
/**
* The index of the first character that is outside
* this token.
*/
size_t to;
/**
* The index of the beginning of the line on which
* this token is found.
*/
size_t line_from;
/**
* The type of token.
*/
int type;
};
/**
* The various tokens used by the lexer
* in order to tag meaningful sequences
@ -45,6 +74,7 @@ enum lexer_token {
typedef struct lexer lexer;
typedef enum lexer_token lexer_token;
typedef struct lexer_match lexer_match;
/**
* Initializes the given lexer,
@ -53,6 +83,14 @@ typedef enum lexer_token lexer_token;
* @return the result of the operation (can be MALLOC on failed allocation.)
*/
libab_result lexer_init(lexer* lexer);
/**
* Turns the given input string into tokens.
* @param lexer the lexer to use to turn the string into tokens.
* @param string the string to turn into tokens.
* @param lex_into the list which should be populated with matches.
* @return the result of the operation.
*/
libab_result lexer_lex(lexer* lexer, const char* string, ll* lext_into);
/**
* Releases the memory associated with the given lexer,
* removing all registered patterns from it.
@ -60,5 +98,11 @@ libab_result lexer_init(lexer* lexer);
* @return the result of the operation.
*/
libab_result lexer_free(lexer* lexer);
/**
* Function intended to be passed to "foreach" calls
* in libds. lexer_lex allocates matches, and passing this function
* to foreach will free the memory allocated for the matches.
*/
int lexer_foreach_match_free(void* data, va_list args);
#endif

View File

@ -1,5 +1,8 @@
#include "lexer.h"
#include "ll.h"
#include "libabacus_util.h"
#include <stdlib.h>
#include <ctype.h>
libab_result lexer_init(lexer* lexer) {
size_t i;
@ -46,6 +49,72 @@ libab_result lexer_init(lexer* lexer) {
return result;
}
struct lexer_state {
size_t line;
size_t line_from;
const char* source;
ll* matches;
};
int _lexer_foreach_convert_match(void* data, va_list args) {
libab_result result = LIBAB_SUCCESS;
lexer_match* new_match;
match* match = data;
struct lexer_state* state = va_arg(args, struct lexer_state*);
char first_char = state->source[match->from];
if(isspace(first_char)) {
// Skip
} else if(first_char == '\n') {
state->line++;
state->line_from = match->to;
} if((new_match = malloc(sizeof(*new_match)))) {
new_match->from = match->from;
new_match->to = match->to;
new_match->line_from = state->line_from;
new_match->line = state->line;
result = convert_ds_result(ll_append(state->matches, new_match));
if(result != LIBAB_SUCCESS) {
free(new_match);
}
} else {
result = LIBAB_MALLOC;
}
return result;
}
libab_result lexer_lex(lexer* lexer, const char* string, ll* lex_into) {
libab_result result;
ll raw_matches;
struct lexer_state state;
ll_init(&raw_matches);
state.line = 0;
state.line_from = 0;
state.matches = lex_into;
state.source = string;
result = convert_lex_result(
eval_all(string, 0, &lexer->config, &raw_matches));
if(result == LIBAB_SUCCESS) {
result = ll_foreach(&raw_matches, NULL, compare_always,
_lexer_foreach_convert_match, &state);
}
if(result != LIBAB_SUCCESS) {
ll_foreach(lex_into, NULL, compare_always, lexer_foreach_match_free);
}
ll_foreach(&raw_matches, NULL, compare_always, eval_foreach_match_free);
ll_free(&raw_matches);
return result;
}
libab_result lexer_free(lexer* lexer) {
return convert_lex_result(eval_config_free(&lexer->config));
}
int lexer_foreach_match_free(void* data, va_list args) {
free((lexer_match*) data);
return 0;
}