Write first draft of Bison-based parser and lexer
This commit is contained in:
parent
43d23963e2
commit
85908ae0c4
|
@ -1,15 +1,19 @@
|
|||
%{
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "ast.hpp"
|
||||
#include "parser.hpp"
|
||||
#include "compiler_ast.hpp"
|
||||
#include "compiler_parser.hpp"
|
||||
|
||||
std::vector<definition_ptr> program;
|
||||
extern yy::parser::symbol_type yylex();
|
||||
|
||||
%}
|
||||
|
||||
%token PLUS
|
||||
%token TIMES
|
||||
%token MINUS
|
||||
%token DIVIDE
|
||||
%token INT
|
||||
%token <int> INT
|
||||
%token DEFN
|
||||
%token DATA
|
||||
%token CASE
|
||||
|
@ -20,7 +24,88 @@
|
|||
%token CPAREN
|
||||
%token COMMA
|
||||
%token ARROW
|
||||
%token EQUA
|
||||
%token LID
|
||||
%token UID
|
||||
%token EQUAL
|
||||
%token <std::string> LID
|
||||
%token <std::string> UID
|
||||
|
||||
%language "c++"
|
||||
%define api.value.type variant
|
||||
%define api.token.constructor
|
||||
|
||||
%type <std::vector<std::string>> lowercaseParams
|
||||
%type <std::vector<definition_ptr>> program definitions
|
||||
%type <std::vector<branch_ptr>> branches
|
||||
%type <ast_ptr> aAdd aMul case app appBase
|
||||
%type <definition_ptr> definition
|
||||
%type <branch_ptr> branch
|
||||
%type <pattern_ptr> pattern
|
||||
|
||||
%start program
|
||||
|
||||
%%
|
||||
|
||||
program
|
||||
: definitions { program = std::move($1); }
|
||||
;
|
||||
|
||||
definitions
|
||||
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
|
||||
;
|
||||
|
||||
definition
|
||||
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||
{ $$ = definition_ptr(
|
||||
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||
;
|
||||
|
||||
lowercaseParams
|
||||
: %empty { $$ = std::vector<std::string>(); }
|
||||
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||
;
|
||||
|
||||
aAdd
|
||||
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||
| aMul { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
aMul
|
||||
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
|
||||
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
|
||||
| app { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
app
|
||||
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
|
||||
| appBase { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
appBase
|
||||
: INT { $$ = ast_ptr(new ast_int($1)); }
|
||||
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
|
||||
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||
| case { $$ = std::move($1); }
|
||||
;
|
||||
|
||||
case
|
||||
: CASE aAdd OF OCURLY branches CCURLY
|
||||
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
|
||||
;
|
||||
|
||||
branches
|
||||
: branches COMMA branch { $$ = std::move($1); $1.push_back(std::move($3)); }
|
||||
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||
;
|
||||
|
||||
branch
|
||||
: pattern ARROW OCURLY aAdd CCURLY
|
||||
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||
;
|
||||
|
||||
pattern
|
||||
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
|
||||
| UID lowercaseParams
|
||||
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
|
||||
;
|
||||
|
|
34
code/compiler_scanner_bison.l
Normal file
34
code/compiler_scanner_bison.l
Normal file
|
@ -0,0 +1,34 @@
|
|||
%option noyywrap
|
||||
|
||||
%{
|
||||
#include <iostream>
|
||||
#include "compiler_ast.hpp"
|
||||
#include "compiler_parser.hpp"
|
||||
|
||||
#define YY_DECL yy::parser::symbol_type yylex()
|
||||
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
[ \n]+ {}
|
||||
\+ { return yy::parser::make_PLUS(); }
|
||||
\* { return yy::parser::make_TIMES(); }
|
||||
- { return yy::parser::make_MINUS(); }
|
||||
\/ { return yy::parser::make_DIVIDE(); }
|
||||
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
|
||||
defn { return yy::parser::make_DEFN(); }
|
||||
data { return yy::parser::make_DATA(); }
|
||||
case { return yy::parser::make_CASE(); }
|
||||
of { return yy::parser::make_OF(); }
|
||||
\{ { return yy::parser::make_OCURLY(); }
|
||||
\} { return yy::parser::make_CCURLY(); }
|
||||
\( { return yy::parser::make_OPAREN(); }
|
||||
\) { return yy::parser::make_CPAREN(); }
|
||||
, { return yy::parser::make_COMMA(); }
|
||||
-> { return yy::parser::make_ARROW(); }
|
||||
= { return yy::parser::make_EQUAL(); }
|
||||
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
|
||||
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
|
||||
|
||||
%%
|
|
@ -226,6 +226,10 @@ It's called Bison, and it is frequently used with Flex. Before we get to bison,
|
|||
incurred - the implementation of our AST. Such a tree is language-specific, so Bison doesn't generate it for us. Here's what
|
||||
I came up with:
|
||||
{{< codeblock "C++" "compiler_ast.hpp" >}}
|
||||
We create a base class for an expression tree, which we call `ast`. Then, for each possible syntactic construct in our language
|
||||
(a number, a variable, a binary operation, a case expression) we create a subclass of `ast`. The `ast_case` subclass
|
||||
is the most complex, since it must contain a list of case expression branches, which are a combination of a `pattern` and
|
||||
another expression.
|
||||
|
||||
Finally, we get to writing our Bison file, `parser.y`. Here's what I come up with:
|
||||
{{< rawblock "compiler_parser.y" >}}
|
||||
|
|
Loading…
Reference in New Issue
Block a user