Write first draft of Bison-based parser and lexer
This commit is contained in:
parent
43d23963e2
commit
85908ae0c4
@ -1,15 +1,19 @@
|
|||||||
%{
|
%{
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "ast.hpp"
|
#include "compiler_ast.hpp"
|
||||||
#include "parser.hpp"
|
#include "compiler_parser.hpp"
|
||||||
|
|
||||||
|
std::vector<definition_ptr> program;
|
||||||
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
%}
|
%}
|
||||||
|
|
||||||
%token PLUS
|
%token PLUS
|
||||||
%token TIMES
|
%token TIMES
|
||||||
%token MINUS
|
%token MINUS
|
||||||
%token DIVIDE
|
%token DIVIDE
|
||||||
%token INT
|
%token <int> INT
|
||||||
%token DEFN
|
%token DEFN
|
||||||
%token DATA
|
%token DATA
|
||||||
%token CASE
|
%token CASE
|
||||||
@ -20,7 +24,88 @@
|
|||||||
%token CPAREN
|
%token CPAREN
|
||||||
%token COMMA
|
%token COMMA
|
||||||
%token ARROW
|
%token ARROW
|
||||||
%token EQUA
|
%token EQUAL
|
||||||
%token LID
|
%token <std::string> LID
|
||||||
%token UID
|
%token <std::string> UID
|
||||||
|
|
||||||
|
%language "c++"
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.token.constructor
|
||||||
|
|
||||||
|
%type <std::vector<std::string>> lowercaseParams
|
||||||
|
%type <std::vector<definition_ptr>> program definitions
|
||||||
|
%type <std::vector<branch_ptr>> branches
|
||||||
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
|
%type <definition_ptr> definition
|
||||||
|
%type <branch_ptr> branch
|
||||||
|
%type <pattern_ptr> pattern
|
||||||
|
|
||||||
|
%start program
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
program
|
||||||
|
: definitions { program = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definitions
|
||||||
|
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definition
|
||||||
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
|
{ $$ = definition_ptr(
|
||||||
|
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lowercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aAdd
|
||||||
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||||
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||||
|
| aMul { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aMul
|
||||||
|
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
|
||||||
|
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
|
||||||
|
| app { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
app
|
||||||
|
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
|
||||||
|
| appBase { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
appBase
|
||||||
|
: INT { $$ = ast_ptr(new ast_int($1)); }
|
||||||
|
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
|
||||||
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||||
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
|
| case { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
case
|
||||||
|
: CASE aAdd OF OCURLY branches CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
branches
|
||||||
|
: branches COMMA branch { $$ = std::move($1); $1.push_back(std::move($3)); }
|
||||||
|
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||||
|
;
|
||||||
|
|
||||||
|
branch
|
||||||
|
: pattern ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
pattern
|
||||||
|
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
|
||||||
|
| UID lowercaseParams
|
||||||
|
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
34
code/compiler_scanner_bison.l
Normal file
34
code/compiler_scanner_bison.l
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
%option noyywrap
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include <iostream>
|
||||||
|
#include "compiler_ast.hpp"
|
||||||
|
#include "compiler_parser.hpp"
|
||||||
|
|
||||||
|
#define YY_DECL yy::parser::symbol_type yylex()
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
[ \n]+ {}
|
||||||
|
\+ { return yy::parser::make_PLUS(); }
|
||||||
|
\* { return yy::parser::make_TIMES(); }
|
||||||
|
- { return yy::parser::make_MINUS(); }
|
||||||
|
\/ { return yy::parser::make_DIVIDE(); }
|
||||||
|
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
|
||||||
|
defn { return yy::parser::make_DEFN(); }
|
||||||
|
data { return yy::parser::make_DATA(); }
|
||||||
|
case { return yy::parser::make_CASE(); }
|
||||||
|
of { return yy::parser::make_OF(); }
|
||||||
|
\{ { return yy::parser::make_OCURLY(); }
|
||||||
|
\} { return yy::parser::make_CCURLY(); }
|
||||||
|
\( { return yy::parser::make_OPAREN(); }
|
||||||
|
\) { return yy::parser::make_CPAREN(); }
|
||||||
|
, { return yy::parser::make_COMMA(); }
|
||||||
|
-> { return yy::parser::make_ARROW(); }
|
||||||
|
= { return yy::parser::make_EQUAL(); }
|
||||||
|
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
|
||||||
|
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
|
||||||
|
|
||||||
|
%%
|
@ -226,6 +226,10 @@ It's called Bison, and it is frequently used with Flex. Before we get to bison,
|
|||||||
incurred - the implementation of our AST. Such a tree is language-specific, so Bison doesn't generate it for us. Here's what
|
incurred - the implementation of our AST. Such a tree is language-specific, so Bison doesn't generate it for us. Here's what
|
||||||
I came up with:
|
I came up with:
|
||||||
{{< codeblock "C++" "compiler_ast.hpp" >}}
|
{{< codeblock "C++" "compiler_ast.hpp" >}}
|
||||||
|
We create a base class for an expression tree, which we call `ast`. Then, for each possible syntactic construct in our language
|
||||||
|
(a number, a variable, a binary operation, a case expression) we create a subclass of `ast`. The `ast_case` subclass
|
||||||
|
is the most complex, since it must contain a list of case expression branches, which are a combination of a `pattern` and
|
||||||
|
another expression.
|
||||||
|
|
||||||
Finally, we get to writing our Bison file, `parser.y`. Here's what I come up with:
|
Finally, we get to writing our Bison file, `parser.y`. Here's what I come up with:
|
||||||
{{< rawblock "compiler_parser.y" >}}
|
{{< rawblock "compiler_parser.y" >}}
|
||||||
|
Loading…
Reference in New Issue
Block a user