370 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			370 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|  | #include <stdlib.h>
 | ||
|  | #include <string.h>
 | ||
|  | 
 | ||
|  | /* == Nonterminal ID Definitions == */ | ||
|  | #define PGS_NONTERMINAL_S 0
 | ||
|  | #define PGS_NONTERMINAL_PROGRAM 1
 | ||
|  | #define PGS_NONTERMINAL_DEFINITION 2
 | ||
|  | #define PGS_NONTERMINAL_DATA_BODY 3
 | ||
|  | #define PGS_NONTERMINAL_DATA_ELEMS 4
 | ||
|  | #define PGS_NONTERMINAL_DATA_ELEM 5
 | ||
|  | #define PGS_NONTERMINAL_DATA_ELEM_TYPES 6
 | ||
|  | #define PGS_NONTERMINAL_TYPE_BODY 7
 | ||
|  | #define PGS_NONTERMINAL_PARAMS 8
 | ||
|  | #define PGS_NONTERMINAL_DEFN_BODY 9
 | ||
|  | #define PGS_NONTERMINAL_EXPR 10
 | ||
|  | #define PGS_NONTERMINAL_EXPR_ADD 11
 | ||
|  | #define PGS_NONTERMINAL_EXPR_MUL 12
 | ||
|  | #define PGS_NONTERMINAL_EXPR_APP 13
 | ||
|  | #define PGS_NONTERMINAL_EXPR_APP_BOTTOM 14
 | ||
|  | #define PGS_NONTERMINAL_EXPR_CURLY 15
 | ||
|  | #define PGS_NONTERMINAL_EXPR_LET 16
 | ||
|  | #define PGS_NONTERMINAL_EXPR_LETREC 17
 | ||
|  | #define PGS_NONTERMINAL_EXPR_CASE 18
 | ||
|  | #define PGS_NONTERMINAL_EXPR_CASE_BRANCHES 19
 | ||
|  | #define PGS_NONTERMINAL_EXPR_CASE_BRANCH 20
 | ||
|  | #define PGS_NONTERMINAL_PATTERN 21
 | ||
|  | #define PGS_NONTERMINAL_PATTERNS 22
 | ||
|  | #define PGS_NONTERMINAL_TYPE 23
 | ||
|  | 
 | ||
|  | #define PGS_TREE_T(tree) ((tree).tree_data.terminal.token.terminal)
 | ||
|  | #define PGS_TREE_T_FROM(tree) ((tree).tree_data.terminal.token.from)
 | ||
|  | #define PGS_TREE_T_TO(tree) ((tree).tree_data.terminal.token.to)
 | ||
|  | #define PGS_TREE_NT(tree) ((tree).tree_data.nonterminal.nonterminal)
 | ||
|  | #define PGS_TREE_NT_COUNT(tree) ((tree).tree_data.nonterminal.child_count)
 | ||
|  | #define PGS_TREE_NT_CHILD(tree, n) ((tree).tree_data.nonterminal.children[n])
 | ||
|  | #define PGS_TREE_IS_NT(tree, type) (((tree).variant == PGS_TREE_NONTERMINAL) && (PGS_TREE_NT(tree) == (type)))
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * Converts a nonterminal value to a string. | ||
|  |  * @param nt the nonterminal ID. | ||
|  |  * @return the name for the nonterminal. | ||
|  |  */ | ||
|  | const char* pgs_nonterminal_name(long int nt); | ||
|  | 
 | ||
|  | /* == Generated Data Definitions == */ | ||
|  | /**
 | ||
|  |  * A grammar item. A lot of the information collected by the parser | ||
|  |  * generate is not carried into the source code, which leaves items | ||
|  |  * as simply a nonterminal ID and the size of the right hand side. | ||
|  |  */ | ||
|  | struct pgs_item_s { | ||
|  |     /** The nonterminal that this item is reduced to. */ | ||
|  |     long int left_id; | ||
|  |     /**
 | ||
|  |      * The size of the item body, used to pop off | ||
|  |      * the correct number of states from the stack. | ||
|  |      */ | ||
|  |     size_t right_count; | ||
|  | }; | ||
|  | 
 | ||
|  | typedef struct pgs_item_s pgs_item; | ||
|  | 
 | ||
|  | /* == General Definitions == */ | ||
|  | #define PGS_MAX_ERROR_LENGTH 255
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * The types of errors that can occur while the | ||
|  |  * entire parsing process. | ||
|  |  */ | ||
|  | enum pgs_error_e { | ||
|  |     /** No error occured. */ | ||
|  |     PGS_NONE = 0, | ||
|  |     /** An allocation failed. */ | ||
|  |     PGS_MALLOC, | ||
|  |     /** A token couldn't be recognized. */ | ||
|  |     PGS_BAD_CHARACTER, | ||
|  |     /** A tree couldn't be recognized.  */ | ||
|  |     PGS_BAD_TOKEN, | ||
|  |     /** End of file reached where it was not expected */ | ||
|  |     PGS_EOF_SHIFT | ||
|  | }; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * State used to report errors and their corresponding | ||
|  |  * messages. | ||
|  |  */ | ||
|  | struct pgs_state_s { | ||
|  |     /** The error code. */ | ||
|  |     enum pgs_error_e error; | ||
|  |     /** The error message. */ | ||
|  |     char errbuff[PGS_MAX_ERROR_LENGTH]; | ||
|  | }; | ||
|  | 
 | ||
|  | typedef enum pgs_error_e pgs_error; | ||
|  | typedef struct pgs_state_s pgs_state; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * Initializes a state with no error. | ||
|  |  * @param s the state to initialize. | ||
|  |  */ | ||
|  | void pgs_state_init(pgs_state* s); | ||
|  | /**
 | ||
|  |  * Sets the state to have an error. | ||
|  |  * @param s the state to initialize. | ||
|  |  * @param err the error message to return. | ||
|  |  */ | ||
|  | void pgs_state_error(pgs_state* s, pgs_error err, const char* message); | ||
|  | 
 | ||
|  | /* == Lexing Definitions ==*/ | ||
|  | /**
 | ||
|  |  * A token produced by lexing. | ||
|  |  */ | ||
|  | struct pgs_token_s { | ||
|  |     /** The ID of the terminal. */ | ||
|  |     long int terminal; | ||
|  |     /** The index at which the token starts. */ | ||
|  |     size_t from; | ||
|  |     /** The index at which the next token begins. */ | ||
|  |     size_t to; | ||
|  | }; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * A dynamic list of tokens produced while lexing. | ||
|  |  */ | ||
|  | struct pgs_token_list_s { | ||
|  |     /** The size of the currently allocated block of tokens */ | ||
|  |     size_t capacity; | ||
|  |     /** The number of tokens in the list. */ | ||
|  |     size_t token_count; | ||
|  |     /** The token data array. */ | ||
|  |     struct pgs_token_s* tokens; | ||
|  | }; | ||
|  | 
 | ||
|  | typedef struct pgs_token_s pgs_token; | ||
|  | typedef struct pgs_token_list_s pgs_token_list; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * Initializes a token list. | ||
|  |  * @param l the list to initialize. | ||
|  |  * @return any errors that occured while initializing the list. | ||
|  |  */ | ||
|  | pgs_error pgs_token_list_init(pgs_token_list* l); | ||
|  | /**
 | ||
|  |  * Appends a token to the list. | ||
|  |  * @param terminal the ID of the terminal to append. | ||
|  |  * @param from the index at which the token begins. | ||
|  |  * @param to the index at which the next token begins. | ||
|  |  */ | ||
|  | pgs_error pgs_token_list_append(pgs_token_list* l, long int terminal, size_t from, size_t to); | ||
|  | /**
 | ||
|  |  * Returns a token at the given index. | ||
|  |  * @param l the list to return a token from. | ||
|  |  * @param i the index from which to return a token. | ||
|  |  * @return a token, or NULL if the index is out of bounds. | ||
|  |  */ | ||
|  | pgs_token* pgs_token_list_at(pgs_token_list* l, size_t i); | ||
|  | /**
 | ||
|  |  * Returns a token ID at the given index. | ||
|  |  * @param l the list to return an ID from. | ||
|  |  * @param i the index from which to return an ID. | ||
|  |  * @return returns an ID, or 0, which represents EOF. | ||
|  |  */ | ||
|  | pgs_error pgs_token_list_at_id(pgs_token_list* l, size_t i ); | ||
|  | /**
 | ||
|  |  * Frees a list of tokens. Since the tokens are owned by the list, | ||
|  |  * they are invalidated after this call too. | ||
|  |  * @param l the list to free. | ||
|  |  */ | ||
|  | void pgs_token_list_free(pgs_token_list* l); | ||
|  | /**
 | ||
|  |  * Performs a lexing operation. | ||
|  |  * @param s the state to populate with error text, if necessary. | ||
|  |  * @param list the list of tokens to initialize and populate. | ||
|  |  * @param source the string to lex. | ||
|  |  * @return the error, if any, that occured during this process. | ||
|  |  */ | ||
|  | pgs_error pgs_do_lex(pgs_state* s, pgs_token_list* list, const char* source); | ||
|  | 
 | ||
|  | /* == Parsing Definitions == */ | ||
|  | /**
 | ||
|  |  *  Enum that represents the variant of a parse tree, | ||
|  |  *  which is either a nonterminal with chilren, or a  | ||
|  |  *  terminal with a token. | ||
|  |  */ | ||
|  | enum pgs_tree_variant_e { | ||
|  |     PGS_TREE_TERMINAL, | ||
|  |     PGS_TREE_NONTERMINAL | ||
|  | }; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * The data of a terminal tree. | ||
|  |  */ | ||
|  | struct pgs_tree_terminal_s { | ||
|  |     /** The token this tree holds. */ | ||
|  |     pgs_token token; | ||
|  | }; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * The data of a nonterminal tree. | ||
|  |  */ | ||
|  | struct pgs_tree_nonterminal_s { | ||
|  |     /**
 | ||
|  |      * The nonterminal ID. | ||
|  |      */ | ||
|  |     long int nonterminal; | ||
|  |     /**
 | ||
|  |      * The number of children this tree has. | ||
|  |      */ | ||
|  |     size_t child_count; | ||
|  |     /**
 | ||
|  |      * The array of child pointers, allocated dynamically | ||
|  |      * depending on the item that reduced to this nonterminal. | ||
|  |      */ | ||
|  |     struct pgs_tree_s** children; | ||
|  | }; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * A general struct for a tree, which is either a terminal | ||
|  |  * or a nonterminal. | ||
|  |  */ | ||
|  | struct pgs_tree_s { | ||
|  |     /** The variant of the tree. */ | ||
|  |     enum pgs_tree_variant_e variant; | ||
|  |     union { | ||
|  |         /** The terminal variant of this tree. */ | ||
|  |         struct pgs_tree_terminal_s terminal; | ||
|  |         /** The nonterminal variant of this tree. */ | ||
|  |         struct pgs_tree_nonterminal_s nonterminal; | ||
|  |     } tree_data; | ||
|  | }; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * An element on the parse stack, which holds | ||
|  |  * both a tree node and a state. In theory, | ||
|  |  * the stack is actually items followed by states, | ||
|  |  * but since one always comes after the other, | ||
|  |  * and since both need to be looked up fast, | ||
|  |  * we put them on a stack in parallel. | ||
|  |  */ | ||
|  | struct pgs_parse_stack_element_s { | ||
|  |     /** The tree on the stack */ | ||
|  |     struct pgs_tree_s* tree; | ||
|  |     /** The state on the stack */ | ||
|  |     long int state; | ||
|  | }; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * A parse stack. The PDA automaton | ||
|  |  * has to maintain this stack, where it gradually | ||
|  |  * assembles a tree. | ||
|  |  */ | ||
|  | struct pgs_parse_stack_s { | ||
|  |     /** The number of stack elements currently allocated. */ | ||
|  |     size_t capacity; | ||
|  |     /** The current number of stack elements. */ | ||
|  |     size_t size; | ||
|  |     /** The stack element array. */ | ||
|  |     struct pgs_parse_stack_element_s* data; | ||
|  | }; | ||
|  | 
 | ||
|  | typedef enum pgs_tree_variant_e pgs_tree_variant; | ||
|  | typedef struct pgs_tree_terminal_s pgs_tree_terminal; | ||
|  | typedef struct pgs_tree_nontermnal_s pgs_tree_nonterminal; | ||
|  | typedef struct pgs_tree_s pgs_tree; | ||
|  | typedef struct pgs_parse_stack_element_s pgs_parse_stack_element; | ||
|  | typedef struct pgs_parse_stack_s pgs_parse_stack; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * Allocates and initialzie a parse tree node that is a nonterminal with the given | ||
|  |  * ID and the given child count. | ||
|  |  * @param nonterminal the nonterminal ID of this tree. | ||
|  |  * @param chil_count the number of chilren that this tree has. | ||
|  |  * @return the newly allocated tree, or NULL if a malloc failure occured. | ||
|  |  */ | ||
|  | pgs_tree* pgs_create_tree_nonterminal(long int nonterminal, size_t child_count); | ||
|  | /**
 | ||
|  |  * Allocates and initialize a parse tree node that is a terminal with the given token. | ||
|  |  * @param t the token to initialize this tree with. The token need not be valid after this call. | ||
|  |  * @return the newly allocated tree, or NULL if a malloc failure occured. | ||
|  |  */ | ||
|  | pgs_tree* pgs_create_tree_terminal(pgs_token* t); | ||
|  | /**
 | ||
|  |  * Frees a nonterminal tree. | ||
|  |  * @tree the tree to free. | ||
|  |  */ | ||
|  | void pgs_free_tree_nonterminal(pgs_tree* tree); | ||
|  | /**
 | ||
|  |  * Frees a terminal tree. | ||
|  |  * @tree the tree to free. | ||
|  |  */ | ||
|  | void pgs_free_tree_terminal(pgs_tree* tree); | ||
|  | /**
 | ||
|  |  * Computes the parser_action_table index for the given tree. | ||
|  |  * @param tree the tree for which to compute the index. | ||
|  |  * @return the index. | ||
|  |  */ | ||
|  | long int pgs_tree_table_index(pgs_tree* tree); | ||
|  | /**
 | ||
|  |  * Frees a tree. | ||
|  |  * @param tree the tree to free. | ||
|  |  */ | ||
|  | void pgs_free_tree(pgs_tree* tree); | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * Initialzies a parse stack. | ||
|  |  * @param s the parse stack to initialize. | ||
|  |  * @return the result of the initialization. | ||
|  |  */ | ||
|  | pgs_error pgs_parse_stack_init(pgs_parse_stack* s); | ||
|  | /**
 | ||
|  |  * Appends (pushes) a new tree and state to the stack. | ||
|  |  * @param s the stack to append to. | ||
|  |  * @param tree the tree to append. | ||
|  |  * @param state the state to append. | ||
|  |  * @return the result of the append. | ||
|  |  */ | ||
|  | pgs_error pgs_parse_stack_append(pgs_parse_stack* s, pgs_tree* tree, long int state); | ||
|  | /**
 | ||
|  |  * Appends a given token to the stack, by initializing a new parse tree noe. | ||
|  |  * @param s the stack to append to. | ||
|  |  * @param t the token for which to construct a tree and compute a new state. | ||
|  |  * @return the result of the append. | ||
|  |  */ | ||
|  | pgs_error pgs_parse_stack_append_terminal(pgs_parse_stack* s, pgs_token* t); | ||
|  | /**
 | ||
|  |  * Appends a given item to the stack, by popping the correct number of items | ||
|  |  * and creating a new nonterminal tree node in their place. A new state is also | ||
|  |  * computed from the nonterminal ID. | ||
|  |  * @param s the stack to append to. | ||
|  |  * @param id the nonterminal ID to create. | ||
|  |  * @param count the number of children to pop. | ||
|  |  * @return the result of the append. | ||
|  |  */ | ||
|  | pgs_error pgs_parse_stack_append_nonterminal(pgs_parse_stack* s, long int id, size_t count); | ||
|  | /**
 | ||
|  |  * Gets the state on the top of the stack. | ||
|  |  * @param s the stack for which to get a state. | ||
|  |  * @return the state on the top of the stack. | ||
|  |  */ | ||
|  | long int pgs_parse_stack_top_state(pgs_parse_stack* s); | ||
|  | /**
 | ||
|  |  * Gets the tree on the top of the stack. | ||
|  |  * @param s the stack for which to get a tree. | ||
|  |  * @return the tree on the top of the stack. | ||
|  |  */ | ||
|  | pgs_tree* pgs_parse_stack_top_tree(pgs_parse_stack* s); | ||
|  | /**
 | ||
|  |  * Frees a parse stack, also freeing all the trees. | ||
|  |  * @param s the stack to free. | ||
|  |  */ | ||
|  | void pgs_parse_stack_free(pgs_parse_stack* s); | ||
|  | /**
 | ||
|  |  * Takes the given tokens, and attempts to convert them into a parse tree. | ||
|  |  * @param s the state used for storing errors. | ||
|  |  * @param list the list of tokens, already filled. | ||
|  |  * @param into the tree pointer pointer into which a new tree will be stored. | ||
|  |  * @return the error, if any, that occured. | ||
|  |  */ | ||
|  | pgs_error pgs_do_parse(pgs_state* s, pgs_token_list* list, pgs_tree** into); | ||
|  | 
 | ||
|  | /* == Glue == */ | ||
|  | /**
 | ||
|  |  * Attempts to parse tokens from the given string into the given tree. | ||
|  |  * @param state the state to initialize with error information, if necessary. | ||
|  |  * @param into the tree to build into. | ||
|  |  * @param string the string from which to read. | ||
|  |  * @return the error, if any, that occured. | ||
|  |  */ | ||
|  | pgs_error pgs_do_all(pgs_state* state, pgs_tree** into, const char* string); |