How to use C++ with Bison, V 1.0.2 |
I assume that you already know C++ and how to use Bison, so I will talk just about the important parts.
prompt -> exp '\n' | prompt exp '\n'. exp -> exp '+' exp | exp '*' exp | ident | number | ident '='exp. ident -> 'A'|..|'Z'. number -> digit | number digit. digit -> '0'|..|'9'. |
Since the example is very simple I have chosen to declare all classes in a single file and the function members are defined in the class interface. The file is ast.h .
class Expression {
public:
virtual ~Expression () {}
// It's necessary because we need to clone objects without
// knowing the exact type.
virtual Expression *clone () = 0;
// The value represented by the expression
virtual int value () = 0;
};
// For addictive expressions
class Plus : public Expression {
Expression *m_left, *m_right;
public:
Plus (Expression *left, Expression *right): m_left (left), m_right (right) {}
// Copy constructor
Plus (const Plus &other) {
m_left = other.m_left->clone ();
m_right = other.m_right->clone ();
}
virtual ~Plus ()
{
delete m_left;
delete m_right;
}
Plus &operator = (const Plus &other) {
if (&other != this) {
delete m_left;
delete m_right;
m_left = other.m_left->clone ();
m_right = other.m_right->clone ();
}
}
virtual Expression *clone () { return new Plus (*this); }
virtual int value () { return m_left->value () + m_right->value (); }
};
// For multiplicative expressions
class Times : public Expression {
Expression *m_left, *m_right;
public:
Times (Expression *left, Expression *right): m_left (left), m_right (right) {}
// Copy constructor
Times (const Times &other) {
m_left = other.m_left->clone ();
m_right = other.m_right->clone ();
}
virtual ~Times ()
{
delete m_left;
delete m_right;
}
Times &operator = (const Times &other) {
if (&other != this) {
delete m_left;
delete m_right;
m_left = other.m_left->clone ();
m_right = other.m_right->clone ();
}
}
virtual Expression *clone () { return new Times (*this); }
virtual int value () { return m_left->value () * m_right->value (); }
};
// For numbers
class Number : public Expression {
int m_val;
public:
Number (int val): m_val (val) {}
// Copy constructor
Number (const Number &other) { m_val = other.m_val; }
Number &operator = (const Number &other) {
if (&other != this)
m_val = other.m_val;
}
virtual Expression *clone () { return new Number (*this); }
virtual int value () { return m_val; }
};
// For identifiers
class Ident : public Expression {
int *m_val;
public:
Ident (int *val): m_val (val) {}
// Copy constructor
Ident (const Ident &other) { m_val = other.m_val; }
Ident &operator = (const Ident &other) {
if (&other != this)
m_val = other.m_val;
}
virtual Expression *clone () { return new Ident (*this); }
virtual int value () { return *m_val; }
};
|
There is a subtle difference between using Bison with C and using Bison with C++, function prototypes. In the example you see the prototypes for yyerror () and for yylex () , since you have to put them in case of C++, but in C you can forget them (I don't advice you to do that, because it's bad practice).
The problem that appeared in the thread that I made mention above, was about how to cope with the restrictions that C++ makes to unions and at the same time use unions in Bison. The solution I think is more manageable is to declare the fields that are classes as pointers, in that case there isn't any problem, because they are pointers and there aren't any restrictions to pointer data members.
In the first version of this tutorial I said that I wouldn't discuss the problem of deleting the nodes that are in the parse stack when a parse error happens, but after some suggestions done to this tutorial I've decided to add the code to solve the mentionated situation.
I've opted to use a stack that contains all the nodes that were allocated until the moment that the error has occurred. In that moment the stack is cleared and its nodes deleted. When the parser reckonizes a + or a * the two top nodes are removed because they are the children that will be handled by the father node and the father is pushed in the stack.
Now for the file (it's implemented in grammar.y) :
%{
#include <iostream.h>
#include <cctype>
#include <cstring>
#include <vector>
#include <stack>
#include "ast.h"
// Bring the standard library into the
// global namespace
using namespace std;
// Prototypes to keep the compiler happy
void yyerror (const char *error);
int yylex ();
void clear_stack ();
// Variables
int vars ['Z'- 'A' + 1];
// stack class that takes care of all the nodes that were allocated
stack <Expression *> nodes;
%}
%token IDENT NUMBER
%union {
Expression *exp; /* For the expressions. Since it is a pointer, no problem. */
int value; /* For the lexical analyser. NUMBER tokens */
char ident; /* For the lexical analyser. IDENT tokens */
}
/* Lets inform Bison about the type of each terminal and non-terminal */
%type <exp> exp
%type <value> NUMBER
%type <ident> IDENT
/* Precedence information to resolve ambiguity */
%left '+'
%left '*'
%%
prompt : exp '\n' {
if ($1) {
cout << $1->value () << endl;
clear_stack ();
}
}
| prompt exp '\n' {
if ($2) {
cout << $2->value () << endl;
clear_stack ();
}
}
| error '\n' { clear_stack (); }
;
exp : exp '+' exp {
$$ = new Plus ($1, $3);
nodes.pop (); // The childreen are handled by Plus so we
nodes.pop (); // take them of the allocated nodes stack.
nodes.push ($$);
}
| exp '*' exp {
$$ = new Times ($1, $3);
nodes.pop (); // The same as above.
nodes.pop ();
nodes.push ($$);
}
| IDENT { $$ = new Ident (&vars [$1 - 'A']); nodes.push ($$); }
| NUMBER { $$ = new Number ($1); nodes.push ($$); }
| IDENT '=' exp { vars [$1 - 'A'] = $3->value (); $$ = $3; nodes.push ($$); }
;
%%
// we need to provid the following functions
int main ()
{
memset (vars, 0, sizeof (vars));
return yyparse ();
}
void yyerror (const char *error)
{
cout << error << endl;
}
int yylex ()
{
char ch;
do {
ch = cin.peek ();
if (isalpha (ch)) {
cin.get ();
yylval.ident = ch;
return IDENT;
}
else if (isdigit (ch)) {
int value = 0;
while (!cin.eof () && isdigit (ch)) {
cin.get ();
value = value * 10 + ch - '0';
ch = cin.peek ();
}
yylval.value = value;
return NUMBER;
}
else if (ch == '+' || ch == '\n' || ch == '*' || ch == '=') {
cin.get ();
return ch;
}
else
cin.get ();
} while (!cin.eof ());
return -1;
}
// Deletes all the nodes that were allocated
void clear_stack ()
{
while (!nodes.empty ()) {
delete nodes.top ();
nodes.pop ();
}
}
|
To test this example use the script create, and run the example file. Please note that this tutorial was tested using g++ 3.3.6 with the libstdc++.