A Simple Recursive Descend Parser in C/C++   Leave a comment

#ifndef __EXP_PARSER_h__
#define __EXP_PARSER_h__

#include <cstdio>
#include <tchar.h>
#include <string>
#include <iostream>
#include <vector>
#include <map>

using namespace std;

typedef struct Symbol Symbol;
typedef struct ExpTree Tree;

int __stdcall eval(Tree* tree);

typedef Symbol* LPSYMBOL;

typedef enum SYMBOL_TYPE SYMBOL_TYPE;

enum SYMBOL_TYPE
{
	NUMBER		= 0x0,
	VARIABLE	= 0x1,
	ADD			= 0x2,
	SUBSTRACT	= 0x3,
	DIVIDE		= 0x4,
	MULTIPLY	= 0x5,
	MODULUS		= 0x6,
	MAX			= 0x7,
	MIN			= 0x8,
	ASSIGN		= 0xA,

	NOOPER		= 0xF,
};

struct Symbol
{
	SYMBOL_TYPE type;
	int value;
	TCHAR* name;

	Symbol() : name(0), value(0xCCCCCCCC), type(NUMBER) {}
	~Symbol(){
		delete []name;
	}
};

struct ExpTree
{
	int op;				//the operation code;
	int value;			//value if number
	bool negate;		//whether or not is a negated expr
	Symbol* symbol;		//symbol entry if available
	Tree* left;
	Tree* right;

	ExpTree() : symbol(0), op(NOOPER), left(0), right(0), value(0xCCCCCCCC), negate(false) {}
	~ExpTree(){
		delete left;
		delete right;
	}
};

map<basic_string<TCHAR, char_traits >, LPSYMBOL> sym_tabl;

class PurgeSymbols
{
public:
	PurgeSymbols(){}
	~PurgeSymbols(){
		map<basic_string<TCHAR, char_traits >, LPSYMBOL>::iterator it_sym = sym_tabl.begin();
		while(it_sym != sym_tabl.end()){
			delete (*it_sym).second;
			it_sym++;
		}
	}
} purger;

//op functions
/*
//VERSION EVALUATOR ONLY
int __stdcall pushdata(Tree* tree)
	{ return tree->value; }	//	NUMBER
int __stdcall pushvar(Tree* tree)
	{ return tree->symbol->value;} // 	VARIABLE
int __stdcall pushconst(Tree* tree)
	{ return tree->symbol->value;} //	CONSTANT

int __stdcall addop(Tree* tree)
	{ return eval(tree->left) + eval(tree->right);} //	ADD
int __stdcall subop(Tree* tree)
	{ return eval(tree->left) - eval(tree->right);} // 	SUBSTRACT
int __stdcall divop(Tree* tree) //	DIVIDE
{
	int left, right;
	left = eval(tree->left);
	right = eval(tree->right);
	if(right==0){
		cout << _T("Division by Zero. Invalid operation!") <left) * eval(tree->right);} //	MULTIPLY
int __stdcall modop(Tree* tree)
	{ return eval(tree->left) * eval(tree->right);} //	MODULUS
int __stdcall maxfn(Tree* tree)
	{ return _MAX(eval(tree->left), eval(tree->right));} //	MAX
int __stdcall minfn(Tree* tree)
	{ return _MIN(eval(tree->left), eval(tree->right));} //	MIN
int __stdcall assignop(Tree* tree) //	ASSIGN,
{
	if(tree->left->op==VARIABLE){
		tree->left->symbol->value = eval(tree->right);
		return tree->op;
	}
	else
		cout << _T("Syntax Error: l-value must be a variable!") <op](tree);
}
*/
//the code generator function

typedef union Code
{
	void (__stdcall *op)(void);	//function if is an operator
	int value;			//value if a number	
	Symbol* symbol;		//symbol if a variable or const
} Code;

typedef struct CodeEx
{
	Code code;
	bool negate;
}CodeEx;

#define MAX_INSTRUCTIONS		1000
#define MAX_STACK_INSTRUCTIONS	1000

CodeEx code[MAX_INSTRUCTIONS];
int stack[MAX_STACK_INSTRUCTIONS];

int stackp = 0;
volatile int next_code = 0;
int left_var = 0;

LPSYMBOL curr_sym[MAX_INSTRUCTIONS];

void __stdcall pushdata()
	{ 
		bool negate = code[next_code].negate;
		stack[stackp++] = negate ? -code[next_code++].code.value : code[next_code++].code.value; }	//	NUMBER

void __stdcall pushvar()
	{
		bool negate = code[next_code].negate;
		curr_sym[left_var] = code[next_code++].code.symbol;
		stack[stackp++] =  negate ? -curr_sym[left_var++]->value : curr_sym[left_var++]->value;
	} // 	VARIABLE

void __stdcall addop()
	{
		bool negate = code[next_code - 1].negate;
		int left, right;
		right = stack[--stackp];
		left = stack[--stackp];
		stack[stackp++] = left + right;
		if(negate)
			stack[stackp - 1] *= -1;
	} //	ADD
void __stdcall subop()
	{ 
		bool negate = code[next_code - 1].negate;
		int left, right;
		right = stack[--stackp];
		left = stack[--stackp];
		stack[stackp++] = left - right;
		if(negate)
			stack[stackp - 1] *= -1;

	} // 	SUBSTRACT
void __stdcall divop() //	DIVIDE
	{
		bool negate = code[next_code - 1].negate;
		int left, right;
		right = stack[--stackp];
		left = stack[--stackp];

		if(right==0){
			cout << _T("Division by Zero. Invalid operation!") <value = left;
	}

int generate(int codep, Tree* tree)
{
	switch(tree->op)
	{
	case NUMBER://		= 0x0,
		code[codep++].code.op	= pushdata;
		code[codep].negate	= tree->negate;
		code[codep++].code.value = tree->value;
		return codep;
		break;
	case VARIABLE://	= 0x1,
		code[codep++].code.op		= pushvar;
		code[codep].negate	= tree->negate;
		code[codep++].code.symbol	= tree->symbol;
		return codep;
		break;
	case ADD://			= 0x3,
		codep = generate(codep, tree->left);
		codep = generate(codep, tree->right);
		code[codep].negate	= tree->negate;
		code[codep++].code.op = addop;
		return codep;
		break;
	case SUBSTRACT://	= 0x4,
		codep = generate(codep, tree->left);
		codep = generate(codep, tree->right);
		code[codep].negate	= tree->negate;
		code[codep++].code.op = subop;
		return codep;
		break;
	case DIVIDE://		= 0x5,
		codep = generate(codep, tree->left);
		codep = generate(codep, tree->right);
		code[codep].negate	= tree->negate;
		code[codep++].code.op = divop;
		return codep;
		break;
	case MULTIPLY://	= 0x6,
		codep = generate(codep, tree->left);
		codep = generate(codep, tree->right);
		code[codep].negate	= tree->negate;
		code[codep++].code.op = multop;
		return codep;
		break;
	case MODULUS://		= 0x7,
		codep = generate(codep, tree->left);
		codep = generate(codep, tree->right);
		code[codep].negate	= tree->negate;
		code[codep++].code.op = modop;
		return codep;
		break;
	case MAX://			= 0x8,
		codep = generate(codep, tree->left);
		codep = generate(codep, tree->right);
		code[codep].negate	= tree->negate;
		code[codep++].code.op = maxfn;
		return codep;
		break;
	case MIN://			= 0x9,
		codep = generate(codep, tree->left);
		codep = generate(codep, tree->right);
		code[codep].negate	= tree->negate;
		code[codep++].code.op = minfn;
		return codep;
		break;
	case ASSIGN://		= 0xA,		
		codep = generate(codep, tree->left);
		codep = generate(codep, tree->right);
		code[codep++].code.op = assignop;
		return codep;
		break;
	default:
		break;
	};
	return codep;
}

//eval : evaluate expression from generated code
int __stdcall eval(Tree* tree)
{
	next_code = generate(0, tree);
	code[next_code].code.op = NULL;

	stackp		= 0;
	next_code	= 0;
	left_var	= 0;

	while(code[next_code].code.op!= NULL)
		(code[next_code++].code.op)();

	return stack[0];
}

#define MAX_TOKEN_SIZE 256

const TCHAR* in_ptr = NULL;

Tree* expr();
Tree* proc_fn(const TCHAR* token);

void skip_ws()
{
	while(in_ptr!=NULL && _istspace(*in_ptr))
		in_ptr++;
}

Tree* factor()
{
	volatile bool bFlagNegate = false;
	Tree* the_factor = NULL;
	if(*in_ptr!=_T(''))
	{
PARSE_FACTOR:
		skip_ws();
		unsigned int chr_pos = 0;
		TCHAR token[MAX_TOKEN_SIZE];		
		if(_istalpha(*in_ptr))
		{
			while(*in_ptr!=_T('') && (_istalpha(*in_ptr) || _istdigit(*in_ptr)))
			{
				token[chr_pos++] = *in_ptr++;
			}
			token[chr_pos++] = _T('');
			skip_ws();
			if(*in_ptr==_T('('))
			{		
				in_ptr++;
				the_factor = proc_fn(token);skip_ws();
				if(*in_ptr==_T(')')){
					in_ptr++;
				}
					else
						cout << _T("error: missing \')\'.") << endl;					
			}else{
				the_factor = new Tree;				

				map<basic_string<TCHAR, char_traits >, LPSYMBOL>::iterator it_sym;
				if((it_sym= sym_tabl.find(token))==sym_tabl.end()){
					the_factor->symbol = new Symbol;
					sym_tabl[token] = the_factor->symbol;

					the_factor->op = VARIABLE;
					the_factor->symbol->name = new TCHAR[_tcslen(token)];
					_tcscpy(the_factor->symbol->name, token);
					the_factor->symbol->value = 0xCCCCCCCC;
					the_factor->symbol->type = VARIABLE;
				}else{
					the_factor->symbol = it_sym->second;				
					the_factor->op = VARIABLE;
				}

			}
		}else if(_istdigit(*in_ptr))
		{
			while(*in_ptr!=_T('') && _istdigit(*in_ptr))
			{
				token[chr_pos++] = *in_ptr++;
			}
			token[chr_pos++] = _T('');
			skip_ws();
			the_factor = new Tree;
			the_factor->op = NUMBER;
			the_factor->value = _ttoi(token);
		}else if(*in_ptr==_T('('))
		{
			in_ptr++;
			the_factor = expr();skip_ws();			
			if(*in_ptr==_T(')'))
			{
				in_ptr++;					

			}else
				cout << _T("error: missing \')\'.") <negate = bFlagNegate;
	return the_factor;
}

Tree* term()
{
	Tree* factor_op = NULL;
	Tree* factor_left = factor();		
	skip_ws();
	for(;;)
		switch(*in_ptr)
		{
		case _T('%'):
			{
				++in_ptr;
				factor_op			= new Tree;

				factor_op->op		= MODULUS;
				factor_op->left		= factor_left;
				factor_op->right	= factor();

				factor_left			= factor_op;
			}
			break;
		case _T('/'):
			{
				++in_ptr;
				factor_op			= new Tree;

				factor_op->op = DIVIDE;
				factor_op->left = factor_left;
				factor_op->right = factor();

				factor_left			= factor_op;
			}
			break;
		case _T('*'):
			{
				++in_ptr;
				factor_op			= new Tree;

				factor_op->op = MULTIPLY;
				factor_op->left = factor_left;
				factor_op->right = factor();

				factor_left			= factor_op;
			}
			break;
		default:
			return factor_left;
			break;
		};

	return factor_left;
}

Tree* expr()
{

	Tree* term_op = NULL;
	Tree* term_left = term();	
	skip_ws();
	for(;;)
		switch(*in_ptr)
		{
		case _T('+'):
			{
				++in_ptr;
				term_op			= new Tree;

				term_op->op		= ADD;
				term_op->left	= term_left;
				term_op->right	= term();

				term_left		= term_op;
			}
			break;
		case _T('-'):
			{
				++in_ptr;
				term_op			= new Tree;

				term_op->op		= SUBSTRACT;
				term_op->left	= term_left;
				term_op->right	= term();

				term_left		= term_op;
			}
			break;
		default:
			return term_left;
			break;
		};	
	return term_left;
}

Tree* proc_fn(const TCHAR* token)
{	
	Tree* funct = new Tree;
	funct->left = NULL;				
	funct->right = NULL;
	skip_ws();

	if(_tcsicmp(token, _T("max"))==0)
		funct->op = MAX;
	else if(_tcsicmp(token, _T("min"))==0)
		funct->op = MIN;
	else
		cout << _T("error: undeclared identifier: ") << token << _T(".") <left		= expr();
	if(*in_ptr==_T(','))
		in_ptr++;
	funct->right	= expr();

	return funct;
}

Tree* prop(const TCHAR* source)
{
	TCHAR token[MAX_TOKEN_SIZE];
	unsigned int chr_pos = 0;
	in_ptr = source;
	if(in_ptr!=NULL && *in_ptr!=_T(''))
	{
		skip_ws();
		if(_istalpha(*in_ptr))
		{
			while(*in_ptr!=_T('') && _istalpha(*in_ptr) || _istdigit(*in_ptr))
			{
				token[chr_pos++] = *in_ptr++;
			}
			token[chr_pos++] = _T('');
		}
		skip_ws();
		switch(*in_ptr)
		{
		case _T('='):
			{
				++in_ptr;
				Tree* identifier = new Tree;								
				identifier->left = NULL;
				identifier->right = NULL;

				map<basic_string<TCHAR, char_traits >, LPSYMBOL>::iterator it_sym;
				if((it_sym= sym_tabl.find(token))==sym_tabl.end()){
					identifier->symbol = new Symbol;
					sym_tabl[token] = identifier->symbol;

					identifier->op = VARIABLE;
					identifier->symbol->value = 0;
					identifier->symbol->name = new TCHAR[_tcslen(token) + 1];
					_tcscpy(identifier->symbol->name, token);
					identifier->symbol->type = VARIABLE;
				}else{
					identifier->symbol = it_sym->second;
					identifier->op = VARIABLE;
				}

				Tree* assign = new Tree;
				assign->op = ASSIGN;
				 //x = expr
				assign->left = identifier;
				assign->right = expr();

				return assign;
			}
			break;
		case _T('('):
			{
				++in_ptr;
				Tree* fnp = proc_fn(token);
				if(*in_ptr==_T(')')){
						in_ptr++;
						skip_ws();
				}
				else
					cout << _T("error: missing \')\'.") << endl;				
				return fnp;
			}
			break;		
		default:
			cout << _T("Syntax Error in statement.") << endl;
			break;
		};
	}
	return NULL;	
}

Tree* parse(const TCHAR* source)
{	
	in_ptr = source;
	while(*in_ptr!=_T(''))
	{
		Tree* next_prop = prop(in_ptr);		
		if(*in_ptr==_T(';')){
			in_ptr++;	
			skip_ws();
		}
		else
			cout << _T("error: missing \';\'.") << endl;
		return next_prop;
	}

	return NULL;
}

void emit(Code inst)
{
	inst; //to byte stream...
}

int compile(const TCHAR* source)
{
	in_ptr = source;
	Tree* tree = NULL;	
	volatile int result = 0;
	while((tree = parse(in_ptr))!=NULL)
	{
		result++;
		next_code = generate(0, tree); //each call to each opfn do emit binary stream code...
		code[next_code].code.op = NULL;

		stackp		= 0;
		next_code	= 0;
		left_var	= 0;
		while(code[next_code].code.op != NULL)
		{		
			(code[next_code++].code.op)();
		}
		delete tree;		
	}
	return result;
}

int eval_expr(const TCHAR* source)
{
	in_ptr = source;
	Tree* tree = NULL;	
	volatile int retVal = 0;
	while((tree = parse(in_ptr))!=NULL)
	{		
		retVal = eval(tree);
		delete tree;		
	}
	return retVal;
}

#endif //__EXP_PARSER_h__

Posted February 2, 2013 by hmarzan

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: