From 85908ae0c40cf84d6f2fd08340b307ebb535aa5c Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 5 Aug 2019 00:09:59 -0700 Subject: [PATCH] Write first draft of Bison-based parser and lexer --- code/compiler_parser.y | 97 +++++++++++++++++++++++++++-- code/compiler_scanner_bison.l | 34 ++++++++++ content/blog/02_compiler_parsing.md | 4 ++ 3 files changed, 129 insertions(+), 6 deletions(-) create mode 100644 code/compiler_scanner_bison.l diff --git a/code/compiler_parser.y b/code/compiler_parser.y index a72d14d..70950b4 100644 --- a/code/compiler_parser.y +++ b/code/compiler_parser.y @@ -1,15 +1,19 @@ %{ #include #include -#include "ast.hpp" -#include "parser.hpp" +#include "compiler_ast.hpp" +#include "compiler_parser.hpp" + +std::vector program; +extern yy::parser::symbol_type yylex(); + %} %token PLUS %token TIMES %token MINUS %token DIVIDE -%token INT +%token INT %token DEFN %token DATA %token CASE @@ -20,7 +24,88 @@ %token CPAREN %token COMMA %token ARROW -%token EQUA -%token LID -%token UID +%token EQUAL +%token LID +%token UID +%language "c++" +%define api.value.type variant +%define api.token.constructor + +%type > lowercaseParams +%type > program definitions +%type > branches +%type aAdd aMul case app appBase +%type definition +%type branch +%type pattern + +%start program + +%% + +program + : definitions { program = std::move($1); } + ; + +definitions + : definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); } + | definition { $$ = std::vector(); $$.push_back(std::move($1)); } + ; + +definition + : DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY + { $$ = definition_ptr( + new definition_defn(std::move($2), std::move($3), std::move($6))); } + ; + +lowercaseParams + : %empty { $$ = std::vector(); } + | lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); } + ; + +aAdd + : aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); } + | aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); } + | aMul { $$ = std::move($1); } + ; + +aMul + : aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); } + | aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); } + | app { $$ = std::move($1); } + ; + +app + : app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); } + | appBase { $$ = std::move($1); } + ; + +appBase + : INT { $$ = ast_ptr(new ast_int($1)); } + | LID { $$ = ast_ptr(new ast_lid(std::move($1))); } + | UID { $$ = ast_ptr(new ast_uid(std::move($1))); } + | OPAREN aAdd CPAREN { $$ = std::move($2); } + | case { $$ = std::move($1); } + ; + +case + : CASE aAdd OF OCURLY branches CCURLY + { $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); } + ; + +branches + : branches COMMA branch { $$ = std::move($1); $1.push_back(std::move($3)); } + | branch { $$ = std::vector(); $$.push_back(std::move($1));} + ; + +branch + : pattern ARROW OCURLY aAdd CCURLY + { $$ = branch_ptr(new branch(std::move($1), std::move($4))); } + ; + +pattern + : LID { $$ = pattern_ptr(new pattern_var(std::move($1))); } + | UID lowercaseParams + { $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); } + ; diff --git a/code/compiler_scanner_bison.l b/code/compiler_scanner_bison.l new file mode 100644 index 0000000..af90ca7 --- /dev/null +++ b/code/compiler_scanner_bison.l @@ -0,0 +1,34 @@ +%option noyywrap + +%{ +#include +#include "compiler_ast.hpp" +#include "compiler_parser.hpp" + +#define YY_DECL yy::parser::symbol_type yylex() + +%} + +%% + +[ \n]+ {} +\+ { return yy::parser::make_PLUS(); } +\* { return yy::parser::make_TIMES(); } +- { return yy::parser::make_MINUS(); } +\/ { return yy::parser::make_DIVIDE(); } +[0-9]+ { return yy::parser::make_INT(atoi(yytext)); } +defn { return yy::parser::make_DEFN(); } +data { return yy::parser::make_DATA(); } +case { return yy::parser::make_CASE(); } +of { return yy::parser::make_OF(); } +\{ { return yy::parser::make_OCURLY(); } +\} { return yy::parser::make_CCURLY(); } +\( { return yy::parser::make_OPAREN(); } +\) { return yy::parser::make_CPAREN(); } +, { return yy::parser::make_COMMA(); } +-> { return yy::parser::make_ARROW(); } += { return yy::parser::make_EQUAL(); } +[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); } +[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); } + +%% diff --git a/content/blog/02_compiler_parsing.md b/content/blog/02_compiler_parsing.md index 473c753..2bdeb3d 100644 --- a/content/blog/02_compiler_parsing.md +++ b/content/blog/02_compiler_parsing.md @@ -226,6 +226,10 @@ It's called Bison, and it is frequently used with Flex. Before we get to bison, incurred - the implementation of our AST. Such a tree is language-specific, so Bison doesn't generate it for us. Here's what I came up with: {{< codeblock "C++" "compiler_ast.hpp" >}} +We create a base class for an expression tree, which we call `ast`. Then, for each possible syntactic construct in our language +(a number, a variable, a binary operation, a case expression) we create a subclass of `ast`. The `ast_case` subclass +is the most complex, since it must contain a list of case expression branches, which are a combination of a `pattern` and +another expression. Finally, we get to writing our Bison file, `parser.y`. Here's what I come up with: {{< rawblock "compiler_parser.y" >}}