diff --git a/Makefile b/Makefile index 6403429..e9b1151 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,13 @@ all: scan -scan: main.cpp scanner.cpp - g++ main.cpp scanner.cpp -o scan +scan: main.cpp scanner.cpp parser.cpp + g++ main.cpp scanner.cpp parser.cpp -o scan scanner.cpp: scanner.l flex -o scanner.cpp scanner.l -parser.tab.cpp: parser.y - bison -d parser.y +parser.cpp parser.hpp: parser.y + bison -d -o parser.cpp parser.y clean: rm -f scan scanner.cpp diff --git a/parser.y b/parser.y index 4f7dbc0..0342610 100644 --- a/parser.y +++ b/parser.y @@ -1,6 +1,8 @@ %{ #include +#include +#include "parser.hpp" %} @@ -19,6 +21,8 @@ %token RETURN %token WHILE %token BOOLEAN +%token FLOAT +%token INTEGER %token IDENTIFIER %token ASSIGN %token PLUS @@ -36,23 +40,36 @@ %token COMMA %token COLON -%define api.value.type { std::string } +%define api.value.type { const char* } %define api.pure full %define api.push-pull push +%start program + +%{ + +void yyerror(const char* s) { + std::cout << s << std::endl; +} + +%} %% program - : statements {} + : program stmt + | stmt ; -statements - : statement statements {} - | statement {} +stmt + : expr NEWLINE; + +expr + : assign + | INTEGER ; -statement - : PLUS {} +assign + : IDENTIFIER ASSIGN expr ; %% diff --git a/scanner.l b/scanner.l index 3a98a1d..8227fbe 100644 --- a/scanner.l +++ b/scanner.l @@ -14,6 +14,7 @@ #include #include #include +#include "parser.hpp" /* * We'll use this stack to keep track of indentation level, as described in @@ -22,6 +23,17 @@ * https://docs.python.org/3/reference/lexical_analysis.html#indentation */ std::stack _indent_stack; +yypstate* state = yypstate_new(); + +#define PUSH_TOKEN(i, str) do { \ + std::cout << i << ", " << str << std::endl; \ + YYSTYPE temp = str; \ + int s = yypush_parse(state, i, &temp); \ + if (s != YYPUSH_MORE) { \ + yypstate_delete(state); \ + return s; \ + } } while(0); + %} %% @@ -59,7 +71,7 @@ std::stack _indent_stack; * then emit an INDENT and push the new indentation level onto * the stack. */ - std::cout << "INDENT" << std::endl; + PUSH_TOKEN(INDENT, ""); _indent_stack.push(yyleng); } else { /* @@ -70,7 +82,7 @@ std::stack _indent_stack; */ while (!_indent_stack.empty() && _indent_stack.top() != yyleng) { _indent_stack.pop(); - std::cout << "DEDENT" << std::endl; + PUSH_TOKEN(DEDENT, ""); } /* @@ -95,13 +107,13 @@ std::stack _indent_stack; */ while (_indent_stack.top() != 0) { _indent_stack.pop(); - std::cout << "DEDENT" << std::endl; + PUSH_TOKEN(DEDENT, ""); } REJECT; } \r?\n { - std::cout << "NEWLINE" << std::endl; + PUSH_TOKEN(NEWLINE, ""); } <> { @@ -111,58 +123,58 @@ std::stack _indent_stack; */ while(_indent_stack.top() != 0) { _indent_stack.pop(); - std::cout << "DEDENT" << std::endl; + PUSH_TOKEN(DEDENT, ""); } yyterminate(); } [ \t] { /* Ignore spaces that haven't been handled above. */ } -"and" { std::cout << "AND\t\t" << yytext << std::endl; } -"break" { std::cout << "BREAK\t\t" << yytext << std::endl; } -"def" { std::cout << "DEF\t\t" << yytext << std::endl; } -"elif" { std::cout << "ELIF\t\t" << yytext << std::endl; } -"else" { std::cout << "ELSE\t\t" << yytext << std::endl; } -"for" { std::cout << "FOR\t\t" << yytext << std::endl; } -"if" { std::cout << "IF\t\t" << yytext << std::endl; } -"not" { std::cout << "NOT\t\t" << yytext << std::endl; } -"or" { std::cout << "OR\t\t" << yytext << std::endl; } -"return" { std::cout << "RETURN\t\t" << yytext << std::endl; } -"while" { std::cout << "WHILE\t\t" << yytext << std::endl; } +"and" { PUSH_TOKEN(AND, ""); } +"break" { PUSH_TOKEN(BREAK, ""); } +"def" { PUSH_TOKEN(DEF, ""); } +"elif" { PUSH_TOKEN(ELIF, ""); } +"else" { PUSH_TOKEN(ELSE, ""); } +"for" { PUSH_TOKEN(FOR, ""); } +"if" { PUSH_TOKEN(IF, ""); } +"not" { PUSH_TOKEN(NOT, ""); } +"or" { PUSH_TOKEN(OR, ""); } +"return" { PUSH_TOKEN(RETURN, ""); } +"while" { PUSH_TOKEN(WHILE, ""); } -"True" { std::cout << "BOOLEAN\t\t" << true << std::endl; } -"False" { std::cout << "BOOLEAN\t\t" << false << std::endl; } +"True" { PUSH_TOKEN(BOOLEAN, "True"); } +"False" { PUSH_TOKEN(BOOLEAN, "True"); } [a-zA-Z_][a-zA-Z0-9_]* { - std::cout << "IDENTIFIER\t" << yytext << std::endl; + PUSH_TOKEN(IDENTIFIER, ""); } -?[0-9]*"."[0-9]+ { - std::cout << "FLOAT\t\t" << atof(yytext) << std::endl; + PUSH_TOKEN(FLOAT, ""); } -?[0-9]+ { - std::cout << "INTEGER\t\t" << atoi(yytext) << std::endl; + PUSH_TOKEN(INTEGER, ""); } -"=" { std::cout << "ASSIGN\t\t" << yytext << std::endl; } -"+" { std::cout << "PLUS\t\t" << yytext << std::endl; } -"-" { std::cout << "MINUS\t\t" << yytext << std::endl; } -"*" { std::cout << "TIMES\t\t" << yytext << std::endl; } -"/" { std::cout << "DIVIDEDBY\t" << yytext << std::endl; } +"=" { PUSH_TOKEN(ASSIGN, ""); } +"+" { PUSH_TOKEN(PLUS, ""); } +"-" { PUSH_TOKEN(MINUS, ""); } +"*" { PUSH_TOKEN(TIMES, ""); } +"/" { PUSH_TOKEN(DIVIDEDBY, ""); } -"==" { std::cout << "EQ\t\t" << yytext << std::endl; } -"!=" { std::cout << "NEQ\t\t" << yytext << std::endl; } -">" { std::cout << "GT\t\t" << yytext << std::endl; } -">=" { std::cout << "GTE\t\t" << yytext << std::endl; } -"<" { std::cout << "LT\t\t" << yytext << std::endl; } -"<=" { std::cout << "LTE\t\t" << yytext << std::endl; } +"==" { PUSH_TOKEN(EQ, ""); } +"!=" { PUSH_TOKEN(NEQ, ""); } +">" { PUSH_TOKEN(GT, ""); } +">=" { PUSH_TOKEN(GTE, ""); } +"<" { PUSH_TOKEN(LT, ""); } +"<=" { PUSH_TOKEN(LTE, ""); } -"(" { std::cout << "LPAREN\t\t" << yytext << std::endl; } -")" { std::cout << "RPAREN\t\t" << yytext << std::endl; } +"(" { PUSH_TOKEN(LPAREN, ""); } +")" { PUSH_TOKEN(RPAREN, ""); } -"," { std::cout << "COMMA\t\t" << yytext << std::endl; } -":" { std::cout << "COLON\t\t" << yytext << std::endl; } +"," { PUSH_TOKEN(COMMA, ""); } +":" { PUSH_TOKEN(COLON, ""); } . { std::cerr << "Unrecognized token on line " << yylineno << ": "