370 lines
11 KiB
C
370 lines
11 KiB
C
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
/* == Nonterminal ID Definitions == */
|
|
#define PGS_NONTERMINAL_S 0
|
|
#define PGS_NONTERMINAL_PROGRAM 1
|
|
#define PGS_NONTERMINAL_DEFINITION 2
|
|
#define PGS_NONTERMINAL_DATA_BODY 3
|
|
#define PGS_NONTERMINAL_DATA_ELEMS 4
|
|
#define PGS_NONTERMINAL_DATA_ELEM 5
|
|
#define PGS_NONTERMINAL_DATA_ELEM_TYPES 6
|
|
#define PGS_NONTERMINAL_TYPE_BODY 7
|
|
#define PGS_NONTERMINAL_PARAMS 8
|
|
#define PGS_NONTERMINAL_DEFN_BODY 9
|
|
#define PGS_NONTERMINAL_EXPR 10
|
|
#define PGS_NONTERMINAL_EXPR_ADD 11
|
|
#define PGS_NONTERMINAL_EXPR_MUL 12
|
|
#define PGS_NONTERMINAL_EXPR_APP 13
|
|
#define PGS_NONTERMINAL_EXPR_APP_BOTTOM 14
|
|
#define PGS_NONTERMINAL_EXPR_CURLY 15
|
|
#define PGS_NONTERMINAL_EXPR_LET 16
|
|
#define PGS_NONTERMINAL_EXPR_LETREC 17
|
|
#define PGS_NONTERMINAL_EXPR_CASE 18
|
|
#define PGS_NONTERMINAL_EXPR_CASE_BRANCHES 19
|
|
#define PGS_NONTERMINAL_EXPR_CASE_BRANCH 20
|
|
#define PGS_NONTERMINAL_PATTERN 21
|
|
#define PGS_NONTERMINAL_PATTERNS 22
|
|
#define PGS_NONTERMINAL_TYPE 23
|
|
|
|
#define PGS_TREE_T(tree) ((tree).tree_data.terminal.token.terminal)
|
|
#define PGS_TREE_T_FROM(tree) ((tree).tree_data.terminal.token.from)
|
|
#define PGS_TREE_T_TO(tree) ((tree).tree_data.terminal.token.to)
|
|
#define PGS_TREE_NT(tree) ((tree).tree_data.nonterminal.nonterminal)
|
|
#define PGS_TREE_NT_COUNT(tree) ((tree).tree_data.nonterminal.child_count)
|
|
#define PGS_TREE_NT_CHILD(tree, n) ((tree).tree_data.nonterminal.children[n])
|
|
#define PGS_TREE_IS_NT(tree, type) (((tree).variant == PGS_TREE_NONTERMINAL) && (PGS_TREE_NT(tree) == (type)))
|
|
|
|
/**
|
|
* Converts a nonterminal value to a string.
|
|
* @param nt the nonterminal ID.
|
|
* @return the name for the nonterminal.
|
|
*/
|
|
const char* pgs_nonterminal_name(long int nt);
|
|
|
|
/* == Generated Data Definitions == */
|
|
/**
|
|
* A grammar item. A lot of the information collected by the parser
|
|
* generate is not carried into the source code, which leaves items
|
|
* as simply a nonterminal ID and the size of the right hand side.
|
|
*/
|
|
struct pgs_item_s {
|
|
/** The nonterminal that this item is reduced to. */
|
|
long int left_id;
|
|
/**
|
|
* The size of the item body, used to pop off
|
|
* the correct number of states from the stack.
|
|
*/
|
|
size_t right_count;
|
|
};
|
|
|
|
typedef struct pgs_item_s pgs_item;
|
|
|
|
/* == General Definitions == */
|
|
#define PGS_MAX_ERROR_LENGTH 255
|
|
|
|
/**
|
|
* The types of errors that can occur while the
|
|
* entire parsing process.
|
|
*/
|
|
enum pgs_error_e {
|
|
/** No error occured. */
|
|
PGS_NONE = 0,
|
|
/** An allocation failed. */
|
|
PGS_MALLOC,
|
|
/** A token couldn't be recognized. */
|
|
PGS_BAD_CHARACTER,
|
|
/** A tree couldn't be recognized. */
|
|
PGS_BAD_TOKEN,
|
|
/** End of file reached where it was not expected */
|
|
PGS_EOF_SHIFT
|
|
};
|
|
|
|
/**
|
|
* State used to report errors and their corresponding
|
|
* messages.
|
|
*/
|
|
struct pgs_state_s {
|
|
/** The error code. */
|
|
enum pgs_error_e error;
|
|
/** The error message. */
|
|
char errbuff[PGS_MAX_ERROR_LENGTH];
|
|
};
|
|
|
|
typedef enum pgs_error_e pgs_error;
|
|
typedef struct pgs_state_s pgs_state;
|
|
|
|
/**
|
|
* Initializes a state with no error.
|
|
* @param s the state to initialize.
|
|
*/
|
|
void pgs_state_init(pgs_state* s);
|
|
/**
|
|
* Sets the state to have an error.
|
|
* @param s the state to initialize.
|
|
* @param err the error message to return.
|
|
*/
|
|
void pgs_state_error(pgs_state* s, pgs_error err, const char* message);
|
|
|
|
/* == Lexing Definitions ==*/
|
|
/**
|
|
* A token produced by lexing.
|
|
*/
|
|
struct pgs_token_s {
|
|
/** The ID of the terminal. */
|
|
long int terminal;
|
|
/** The index at which the token starts. */
|
|
size_t from;
|
|
/** The index at which the next token begins. */
|
|
size_t to;
|
|
};
|
|
|
|
/**
|
|
* A dynamic list of tokens produced while lexing.
|
|
*/
|
|
struct pgs_token_list_s {
|
|
/** The size of the currently allocated block of tokens */
|
|
size_t capacity;
|
|
/** The number of tokens in the list. */
|
|
size_t token_count;
|
|
/** The token data array. */
|
|
struct pgs_token_s* tokens;
|
|
};
|
|
|
|
typedef struct pgs_token_s pgs_token;
|
|
typedef struct pgs_token_list_s pgs_token_list;
|
|
|
|
/**
|
|
* Initializes a token list.
|
|
* @param l the list to initialize.
|
|
* @return any errors that occured while initializing the list.
|
|
*/
|
|
pgs_error pgs_token_list_init(pgs_token_list* l);
|
|
/**
|
|
* Appends a token to the list.
|
|
* @param terminal the ID of the terminal to append.
|
|
* @param from the index at which the token begins.
|
|
* @param to the index at which the next token begins.
|
|
*/
|
|
pgs_error pgs_token_list_append(pgs_token_list* l, long int terminal, size_t from, size_t to);
|
|
/**
|
|
* Returns a token at the given index.
|
|
* @param l the list to return a token from.
|
|
* @param i the index from which to return a token.
|
|
* @return a token, or NULL if the index is out of bounds.
|
|
*/
|
|
pgs_token* pgs_token_list_at(pgs_token_list* l, size_t i);
|
|
/**
|
|
* Returns a token ID at the given index.
|
|
* @param l the list to return an ID from.
|
|
* @param i the index from which to return an ID.
|
|
* @return returns an ID, or 0, which represents EOF.
|
|
*/
|
|
pgs_error pgs_token_list_at_id(pgs_token_list* l, size_t i );
|
|
/**
|
|
* Frees a list of tokens. Since the tokens are owned by the list,
|
|
* they are invalidated after this call too.
|
|
* @param l the list to free.
|
|
*/
|
|
void pgs_token_list_free(pgs_token_list* l);
|
|
/**
|
|
* Performs a lexing operation.
|
|
* @param s the state to populate with error text, if necessary.
|
|
* @param list the list of tokens to initialize and populate.
|
|
* @param source the string to lex.
|
|
* @return the error, if any, that occured during this process.
|
|
*/
|
|
pgs_error pgs_do_lex(pgs_state* s, pgs_token_list* list, const char* source);
|
|
|
|
/* == Parsing Definitions == */
|
|
/**
|
|
* Enum that represents the variant of a parse tree,
|
|
* which is either a nonterminal with chilren, or a
|
|
* terminal with a token.
|
|
*/
|
|
enum pgs_tree_variant_e {
|
|
PGS_TREE_TERMINAL,
|
|
PGS_TREE_NONTERMINAL
|
|
};
|
|
|
|
/**
|
|
* The data of a terminal tree.
|
|
*/
|
|
struct pgs_tree_terminal_s {
|
|
/** The token this tree holds. */
|
|
pgs_token token;
|
|
};
|
|
|
|
/**
|
|
* The data of a nonterminal tree.
|
|
*/
|
|
struct pgs_tree_nonterminal_s {
|
|
/**
|
|
* The nonterminal ID.
|
|
*/
|
|
long int nonterminal;
|
|
/**
|
|
* The number of children this tree has.
|
|
*/
|
|
size_t child_count;
|
|
/**
|
|
* The array of child pointers, allocated dynamically
|
|
* depending on the item that reduced to this nonterminal.
|
|
*/
|
|
struct pgs_tree_s** children;
|
|
};
|
|
|
|
/**
|
|
* A general struct for a tree, which is either a terminal
|
|
* or a nonterminal.
|
|
*/
|
|
struct pgs_tree_s {
|
|
/** The variant of the tree. */
|
|
enum pgs_tree_variant_e variant;
|
|
union {
|
|
/** The terminal variant of this tree. */
|
|
struct pgs_tree_terminal_s terminal;
|
|
/** The nonterminal variant of this tree. */
|
|
struct pgs_tree_nonterminal_s nonterminal;
|
|
} tree_data;
|
|
};
|
|
|
|
/**
|
|
* An element on the parse stack, which holds
|
|
* both a tree node and a state. In theory,
|
|
* the stack is actually items followed by states,
|
|
* but since one always comes after the other,
|
|
* and since both need to be looked up fast,
|
|
* we put them on a stack in parallel.
|
|
*/
|
|
struct pgs_parse_stack_element_s {
|
|
/** The tree on the stack */
|
|
struct pgs_tree_s* tree;
|
|
/** The state on the stack */
|
|
long int state;
|
|
};
|
|
|
|
/**
|
|
* A parse stack. The PDA automaton
|
|
* has to maintain this stack, where it gradually
|
|
* assembles a tree.
|
|
*/
|
|
struct pgs_parse_stack_s {
|
|
/** The number of stack elements currently allocated. */
|
|
size_t capacity;
|
|
/** The current number of stack elements. */
|
|
size_t size;
|
|
/** The stack element array. */
|
|
struct pgs_parse_stack_element_s* data;
|
|
};
|
|
|
|
typedef enum pgs_tree_variant_e pgs_tree_variant;
|
|
typedef struct pgs_tree_terminal_s pgs_tree_terminal;
|
|
typedef struct pgs_tree_nontermnal_s pgs_tree_nonterminal;
|
|
typedef struct pgs_tree_s pgs_tree;
|
|
typedef struct pgs_parse_stack_element_s pgs_parse_stack_element;
|
|
typedef struct pgs_parse_stack_s pgs_parse_stack;
|
|
|
|
/**
|
|
* Allocates and initialzie a parse tree node that is a nonterminal with the given
|
|
* ID and the given child count.
|
|
* @param nonterminal the nonterminal ID of this tree.
|
|
* @param chil_count the number of chilren that this tree has.
|
|
* @return the newly allocated tree, or NULL if a malloc failure occured.
|
|
*/
|
|
pgs_tree* pgs_create_tree_nonterminal(long int nonterminal, size_t child_count);
|
|
/**
|
|
* Allocates and initialize a parse tree node that is a terminal with the given token.
|
|
* @param t the token to initialize this tree with. The token need not be valid after this call.
|
|
* @return the newly allocated tree, or NULL if a malloc failure occured.
|
|
*/
|
|
pgs_tree* pgs_create_tree_terminal(pgs_token* t);
|
|
/**
|
|
* Frees a nonterminal tree.
|
|
* @tree the tree to free.
|
|
*/
|
|
void pgs_free_tree_nonterminal(pgs_tree* tree);
|
|
/**
|
|
* Frees a terminal tree.
|
|
* @tree the tree to free.
|
|
*/
|
|
void pgs_free_tree_terminal(pgs_tree* tree);
|
|
/**
|
|
* Computes the parser_action_table index for the given tree.
|
|
* @param tree the tree for which to compute the index.
|
|
* @return the index.
|
|
*/
|
|
long int pgs_tree_table_index(pgs_tree* tree);
|
|
/**
|
|
* Frees a tree.
|
|
* @param tree the tree to free.
|
|
*/
|
|
void pgs_free_tree(pgs_tree* tree);
|
|
|
|
/**
|
|
* Initialzies a parse stack.
|
|
* @param s the parse stack to initialize.
|
|
* @return the result of the initialization.
|
|
*/
|
|
pgs_error pgs_parse_stack_init(pgs_parse_stack* s);
|
|
/**
|
|
* Appends (pushes) a new tree and state to the stack.
|
|
* @param s the stack to append to.
|
|
* @param tree the tree to append.
|
|
* @param state the state to append.
|
|
* @return the result of the append.
|
|
*/
|
|
pgs_error pgs_parse_stack_append(pgs_parse_stack* s, pgs_tree* tree, long int state);
|
|
/**
|
|
* Appends a given token to the stack, by initializing a new parse tree noe.
|
|
* @param s the stack to append to.
|
|
* @param t the token for which to construct a tree and compute a new state.
|
|
* @return the result of the append.
|
|
*/
|
|
pgs_error pgs_parse_stack_append_terminal(pgs_parse_stack* s, pgs_token* t);
|
|
/**
|
|
* Appends a given item to the stack, by popping the correct number of items
|
|
* and creating a new nonterminal tree node in their place. A new state is also
|
|
* computed from the nonterminal ID.
|
|
* @param s the stack to append to.
|
|
* @param id the nonterminal ID to create.
|
|
* @param count the number of children to pop.
|
|
* @return the result of the append.
|
|
*/
|
|
pgs_error pgs_parse_stack_append_nonterminal(pgs_parse_stack* s, long int id, size_t count);
|
|
/**
|
|
* Gets the state on the top of the stack.
|
|
* @param s the stack for which to get a state.
|
|
* @return the state on the top of the stack.
|
|
*/
|
|
long int pgs_parse_stack_top_state(pgs_parse_stack* s);
|
|
/**
|
|
* Gets the tree on the top of the stack.
|
|
* @param s the stack for which to get a tree.
|
|
* @return the tree on the top of the stack.
|
|
*/
|
|
pgs_tree* pgs_parse_stack_top_tree(pgs_parse_stack* s);
|
|
/**
|
|
* Frees a parse stack, also freeing all the trees.
|
|
* @param s the stack to free.
|
|
*/
|
|
void pgs_parse_stack_free(pgs_parse_stack* s);
|
|
/**
|
|
* Takes the given tokens, and attempts to convert them into a parse tree.
|
|
* @param s the state used for storing errors.
|
|
* @param list the list of tokens, already filled.
|
|
* @param into the tree pointer pointer into which a new tree will be stored.
|
|
* @return the error, if any, that occured.
|
|
*/
|
|
pgs_error pgs_do_parse(pgs_state* s, pgs_token_list* list, pgs_tree** into);
|
|
|
|
/* == Glue == */
|
|
/**
|
|
* Attempts to parse tokens from the given string into the given tree.
|
|
* @param state the state to initialize with error information, if necessary.
|
|
* @param into the tree to build into.
|
|
* @param string the string from which to read.
|
|
* @return the error, if any, that occured.
|
|
*/
|
|
pgs_error pgs_do_all(pgs_state* state, pgs_tree** into, const char* string);
|