2018-02-10 13:52:33 -08:00
|
|
|
#ifndef LIBABACUS_LEXER_H
|
|
|
|
#define LIBABACUS_LEXER_H
|
|
|
|
|
|
|
|
#include "eval.h"
|
2018-02-11 22:32:42 -08:00
|
|
|
#include "result.h"
|
2018-02-10 13:52:33 -08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* The lexer used for reading
|
|
|
|
* a string and converting it into
|
|
|
|
* tokens.
|
|
|
|
*/
|
2018-02-11 21:09:41 -08:00
|
|
|
struct libab_lexer_s {
|
2018-02-10 13:52:33 -08:00
|
|
|
/**
|
|
|
|
* The liblex configuration used
|
|
|
|
* to convert the string into tokens.
|
|
|
|
*/
|
|
|
|
eval_config config;
|
|
|
|
};
|
|
|
|
|
2018-02-10 17:09:35 -08:00
|
|
|
/**
|
|
|
|
* A token that is produced by the lexer.
|
|
|
|
*/
|
2018-02-11 21:09:41 -08:00
|
|
|
struct libab_lexer_match_s {
|
2018-02-10 17:09:35 -08:00
|
|
|
/**
|
|
|
|
* The line that this token was found on.
|
|
|
|
*/
|
|
|
|
size_t line;
|
|
|
|
/**
|
|
|
|
* The first index at which this token's string
|
|
|
|
* begins.
|
|
|
|
*/
|
|
|
|
size_t from;
|
|
|
|
/**
|
|
|
|
* The index of the first character that is outside
|
|
|
|
* this token.
|
|
|
|
*/
|
|
|
|
size_t to;
|
|
|
|
/**
|
|
|
|
* The index of the beginning of the line on which
|
|
|
|
* this token is found.
|
|
|
|
*/
|
|
|
|
size_t line_from;
|
|
|
|
/**
|
|
|
|
* The type of token.
|
|
|
|
*/
|
|
|
|
int type;
|
|
|
|
};
|
|
|
|
|
2018-02-10 14:21:04 -08:00
|
|
|
/**
|
|
|
|
* The various tokens used by the lexer
|
|
|
|
* in order to tag meaningful sequences
|
|
|
|
* of characters.
|
|
|
|
*/
|
2018-02-11 21:09:41 -08:00
|
|
|
enum libab_lexer_token_e {
|
2018-02-10 14:21:04 -08:00
|
|
|
TOKEN_CHAR = 0,
|
|
|
|
TOKEN_ID,
|
|
|
|
TOKEN_NUM,
|
2018-03-17 20:56:25 -07:00
|
|
|
TOKEN_OP,
|
2018-02-10 14:21:04 -08:00
|
|
|
TOKEN_OP_INFIX,
|
|
|
|
TOKEN_OP_PREFIX,
|
2018-02-17 13:32:55 -08:00
|
|
|
TOKEN_OP_POSTFIX,
|
2018-03-16 23:09:11 -07:00
|
|
|
TOKEN_OP_RESERVED,
|
2018-02-20 16:38:02 -08:00
|
|
|
TOKEN_KW_IF,
|
|
|
|
TOKEN_KW_ELSE,
|
2018-02-27 11:57:45 -08:00
|
|
|
TOKEN_KW_WHILE,
|
2018-02-27 12:30:37 -08:00
|
|
|
TOKEN_KW_DO,
|
2018-03-06 16:47:11 -08:00
|
|
|
TOKEN_KW_ARROW,
|
2018-03-08 21:10:05 -08:00
|
|
|
TOKEN_KW_FUN,
|
2018-03-08 21:31:24 -08:00
|
|
|
TOKEN_KW_RETURN,
|
2018-03-14 13:30:20 -07:00
|
|
|
TOKEN_KW_LET,
|
|
|
|
TOKEN_KW_BE,
|
2018-02-10 14:21:04 -08:00
|
|
|
TOKEN_LAST
|
|
|
|
};
|
|
|
|
|
2018-02-11 21:09:41 -08:00
|
|
|
typedef struct libab_lexer_s libab_lexer;
|
|
|
|
typedef enum libab_lexer_token_e libab_lexer_token;
|
|
|
|
typedef struct libab_lexer_match_s libab_lexer_match;
|
2018-02-10 13:52:33 -08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Initializes the given lexer,
|
|
|
|
* placing the default tokens into it.
|
|
|
|
* @param lexer the lexer to intiailize.
|
|
|
|
* @return the result of the operation (can be MALLOC on failed allocation.)
|
|
|
|
*/
|
2018-02-11 21:09:41 -08:00
|
|
|
libab_result libab_lexer_init(libab_lexer* lexer);
|
2018-02-10 17:09:35 -08:00
|
|
|
/**
|
|
|
|
* Turns the given input string into tokens.
|
|
|
|
* @param lexer the lexer to use to turn the string into tokens.
|
|
|
|
* @param string the string to turn into tokens.
|
|
|
|
* @param lex_into the list which should be populated with matches.
|
|
|
|
* @return the result of the operation.
|
|
|
|
*/
|
2018-02-11 21:09:41 -08:00
|
|
|
libab_result libab_lexer_lex(libab_lexer* lexer, const char* string, ll* lext_into);
|
2018-02-10 13:52:33 -08:00
|
|
|
/**
|
|
|
|
* Releases the memory associated with the given lexer,
|
|
|
|
* removing all registered patterns from it.
|
|
|
|
* @param lexer the lexer to free.
|
|
|
|
* @return the result of the operation.
|
|
|
|
*/
|
2018-02-11 21:09:41 -08:00
|
|
|
libab_result libab_lexer_free(libab_lexer* lexer);
|
2018-02-10 17:09:35 -08:00
|
|
|
/**
|
|
|
|
* Function intended to be passed to "foreach" calls
|
|
|
|
* in libds. lexer_lex allocates matches, and passing this function
|
|
|
|
* to foreach will free the memory allocated for the matches.
|
|
|
|
*/
|
2018-02-11 21:09:41 -08:00
|
|
|
int libab_lexer_foreach_match_free(void* data, va_list args);
|
2018-02-10 13:52:33 -08:00
|
|
|
|
|
|
|
#endif
|