Implement lexing code.
This commit is contained in:
parent
065f2fa4b9
commit
0ff71a9389
|
@ -17,6 +17,35 @@ struct lexer {
|
||||||
eval_config config;
|
eval_config config;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A token that is produced by the lexer.
|
||||||
|
*/
|
||||||
|
struct lexer_match {
|
||||||
|
/**
|
||||||
|
* The line that this token was found on.
|
||||||
|
*/
|
||||||
|
size_t line;
|
||||||
|
/**
|
||||||
|
* The first index at which this token's string
|
||||||
|
* begins.
|
||||||
|
*/
|
||||||
|
size_t from;
|
||||||
|
/**
|
||||||
|
* The index of the first character that is outside
|
||||||
|
* this token.
|
||||||
|
*/
|
||||||
|
size_t to;
|
||||||
|
/**
|
||||||
|
* The index of the beginning of the line on which
|
||||||
|
* this token is found.
|
||||||
|
*/
|
||||||
|
size_t line_from;
|
||||||
|
/**
|
||||||
|
* The type of token.
|
||||||
|
*/
|
||||||
|
int type;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The various tokens used by the lexer
|
* The various tokens used by the lexer
|
||||||
* in order to tag meaningful sequences
|
* in order to tag meaningful sequences
|
||||||
|
@ -45,6 +74,7 @@ enum lexer_token {
|
||||||
|
|
||||||
typedef struct lexer lexer;
|
typedef struct lexer lexer;
|
||||||
typedef enum lexer_token lexer_token;
|
typedef enum lexer_token lexer_token;
|
||||||
|
typedef struct lexer_match lexer_match;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the given lexer,
|
* Initializes the given lexer,
|
||||||
|
@ -53,6 +83,14 @@ typedef enum lexer_token lexer_token;
|
||||||
* @return the result of the operation (can be MALLOC on failed allocation.)
|
* @return the result of the operation (can be MALLOC on failed allocation.)
|
||||||
*/
|
*/
|
||||||
libab_result lexer_init(lexer* lexer);
|
libab_result lexer_init(lexer* lexer);
|
||||||
|
/**
|
||||||
|
* Turns the given input string into tokens.
|
||||||
|
* @param lexer the lexer to use to turn the string into tokens.
|
||||||
|
* @param string the string to turn into tokens.
|
||||||
|
* @param lex_into the list which should be populated with matches.
|
||||||
|
* @return the result of the operation.
|
||||||
|
*/
|
||||||
|
libab_result lexer_lex(lexer* lexer, const char* string, ll* lext_into);
|
||||||
/**
|
/**
|
||||||
* Releases the memory associated with the given lexer,
|
* Releases the memory associated with the given lexer,
|
||||||
* removing all registered patterns from it.
|
* removing all registered patterns from it.
|
||||||
|
@ -60,5 +98,11 @@ libab_result lexer_init(lexer* lexer);
|
||||||
* @return the result of the operation.
|
* @return the result of the operation.
|
||||||
*/
|
*/
|
||||||
libab_result lexer_free(lexer* lexer);
|
libab_result lexer_free(lexer* lexer);
|
||||||
|
/**
|
||||||
|
* Function intended to be passed to "foreach" calls
|
||||||
|
* in libds. lexer_lex allocates matches, and passing this function
|
||||||
|
* to foreach will free the memory allocated for the matches.
|
||||||
|
*/
|
||||||
|
int lexer_foreach_match_free(void* data, va_list args);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
69
src/lexer.c
69
src/lexer.c
|
@ -1,5 +1,8 @@
|
||||||
#include "lexer.h"
|
#include "lexer.h"
|
||||||
|
#include "ll.h"
|
||||||
#include "libabacus_util.h"
|
#include "libabacus_util.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
libab_result lexer_init(lexer* lexer) {
|
libab_result lexer_init(lexer* lexer) {
|
||||||
size_t i;
|
size_t i;
|
||||||
|
@ -46,6 +49,72 @@ libab_result lexer_init(lexer* lexer) {
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct lexer_state {
|
||||||
|
size_t line;
|
||||||
|
size_t line_from;
|
||||||
|
const char* source;
|
||||||
|
ll* matches;
|
||||||
|
};
|
||||||
|
int _lexer_foreach_convert_match(void* data, va_list args) {
|
||||||
|
libab_result result = LIBAB_SUCCESS;
|
||||||
|
lexer_match* new_match;
|
||||||
|
match* match = data;
|
||||||
|
struct lexer_state* state = va_arg(args, struct lexer_state*);
|
||||||
|
char first_char = state->source[match->from];
|
||||||
|
if(isspace(first_char)) {
|
||||||
|
// Skip
|
||||||
|
} else if(first_char == '\n') {
|
||||||
|
state->line++;
|
||||||
|
state->line_from = match->to;
|
||||||
|
} if((new_match = malloc(sizeof(*new_match)))) {
|
||||||
|
new_match->from = match->from;
|
||||||
|
new_match->to = match->to;
|
||||||
|
new_match->line_from = state->line_from;
|
||||||
|
new_match->line = state->line;
|
||||||
|
result = convert_ds_result(ll_append(state->matches, new_match));
|
||||||
|
if(result != LIBAB_SUCCESS) {
|
||||||
|
free(new_match);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result = LIBAB_MALLOC;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
libab_result lexer_lex(lexer* lexer, const char* string, ll* lex_into) {
|
||||||
|
libab_result result;
|
||||||
|
ll raw_matches;
|
||||||
|
struct lexer_state state;
|
||||||
|
|
||||||
|
ll_init(&raw_matches);
|
||||||
|
|
||||||
|
state.line = 0;
|
||||||
|
state.line_from = 0;
|
||||||
|
state.matches = lex_into;
|
||||||
|
state.source = string;
|
||||||
|
|
||||||
|
result = convert_lex_result(
|
||||||
|
eval_all(string, 0, &lexer->config, &raw_matches));
|
||||||
|
|
||||||
|
if(result == LIBAB_SUCCESS) {
|
||||||
|
result = ll_foreach(&raw_matches, NULL, compare_always,
|
||||||
|
_lexer_foreach_convert_match, &state);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(result != LIBAB_SUCCESS) {
|
||||||
|
ll_foreach(lex_into, NULL, compare_always, lexer_foreach_match_free);
|
||||||
|
}
|
||||||
|
|
||||||
|
ll_foreach(&raw_matches, NULL, compare_always, eval_foreach_match_free);
|
||||||
|
ll_free(&raw_matches);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
libab_result lexer_free(lexer* lexer) {
|
libab_result lexer_free(lexer* lexer) {
|
||||||
return convert_lex_result(eval_config_free(&lexer->config));
|
return convert_lex_result(eval_config_free(&lexer->config));
|
||||||
}
|
}
|
||||||
|
int lexer_foreach_match_free(void* data, va_list args) {
|
||||||
|
free((lexer_match*) data);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user