Roll no - 4109
1 Title:
Intermediate Code Generation.
2 Problem Definition:
Intermediate code generation for sample language using LEX and YACC.
3 Learning Objectives:
1. To understand Divide and Conquer strategy.
5 Theory
In the analysis-synthesis model of a compiler, the front end analyzes a source
program and creates an intermediate representation, from which the back end
generates target code. Ideally, details of the source language are confined to
the front end, and details of the target machine to the back end. The front end
translates a source program into an intermediate representation from which the
back end generates target code. With a suitably defined intermediate represen-
tation, a compiler for language i and machine j can then be built by combining
the front end for language i with the back end for machine j.
This approach to creating suite of compilers can save a considerable amount of
effort: m x n compilers can be built by writing just m front ends and n back ends.
1
5.1 Benefits of using a machine-independent intermediate
form are:
1. Compiler for a different machine can be created by attaching a back end
for the new machine to an existing front end.
2. A machine-independent code optimizer can be applied to the intermediate
representation.
5.4 Quadruples:
A quadruple is a record structure with four fields, which are, op, arg1,
arg2 and result.
The op field contains an internal code for the operator. The 3 address
statement x = y op z is represented by placing y in arg1, z in arg2 and x
in result.
2
The contents of fields arg1, arg2 and result are normally pointers to the
symbol-table entries for the names represented by these fields. If so, tem-
porary names must be entered into the symbol table as they are created.
5.5 Triples:
To avoid entering temporary names into the symbol table, we might refer
to a temporary value by the position of the statement that computes it.
If we do so, three-address statements can be represented by records with
only three fields: op, arg1 and arg2.
The fields arg1 and arg2, for the arguments of op, are either pointers to the
symbol table or pointers into the triple structure ( for temporary values ).
Since three fields are used, this intermediate code format is known as
triples.
6 Related Mathematics
Let S be the solution perspective of the given problem.
The set S is defined as:
S = { s, e, X, Y, F, DD, N DD|s }
Where,
s= Start point
e= End point
F= Set of main functions
DD= set of deterministic data
NDD= set of non deterministic data
X= Input Set.
X = source program code in high level language.
Y = {intermediatecodef orthesamplecode}
3
fret :function generate intermediate code using the syntax tree.
7 State Diagram
8 Program Code:
A5.l:
%{
#include "y.tab.h"
#include <stdio.h>
#include <string.h>
%}
%%
[ \t\n]+ ;
int|float|char|double {strcpy(yylval.str,yytext); return DTYPE;}
if {return IF;}
else {return ELSE;}
while {return WHILE;}
[0-9]+ {yylval.ival=atoi(yytext); return NUM;}
";"|"="|","|"("|")"|"{"|"}" {return yytext[0];}
"+"|"-"|"*"|"/" {return yytext[0];}
">"|"<"|">="|"<="|"==" {strcpy(yylval.str,yytext); return RLOP;}
4
[a-zA-Z][a-zA-Z0-9]* {strcpy(yylval.str,yytext); return VAR;}
%%
int yywrap()
{return 1;}
A5.y:
%{
#include <stdio.h>
#include <string.h>
extern FILE* yyin;
static int temp_no=1;
struct SymTab
{
char name[20];
char type[10];
int value;
};
struct Quadr
{
char opn[10];
char op1[20];
char op2[20];
char res[20];
};
struct SymTab sym[30];
int cnt=0;
struct Quadr qtab[40];
int qcnt=0;
char stack[50][20];
int stop=-1;
int ifpos=-1;
%}
%token <ival> NUM
%token <str> VAR
%token <str> DTYPE
%token <str> OP
%token IF
%token ELSE
%token WHILE
%token <str>RLOP
%left + -
%left * /
%union
{
5
int ival;
char str[20];
}
%%
6
strcpy(op1,pop());
strcpy(op2,pop());
printf("Type: %s %s",ret_type(op1),ret_type(op2));
if(strcmp(ret_type(op1),ret_type(op2))!=0)
{ if(prec(ret_type(op2))>prec(ret_type(op1)))
{ char str1[5];
strcpy(str1,temp_name());
addQuadr(ret_type(op2),op1,"",str1);
char str[5];
strcpy(str,temp_name());
addQuadr("+",op2,str1,str);
push(str);
add(str,ret_type(op2),0);
else if(prec(ret_type(op2))<prec(ret_type(op1)))
{ char str1[5];
strcpy(str1,temp_name());
addQuadr(ret_type(op1),op2,"",str1);
char str[5];
strcpy(str,temp_name());
addQuadr("+",op1,str1,str);
push(str);
add(str,ret_type(op1),0);
}
else{
char str[5];
strcpy(str,temp_name());
addQuadr("+",op1,op2,str);
push(str);}
}}|
expr - expr
{
char op1[30],op2[30];
strcpy(op1,pop());
strcpy(op2,pop());
printf("Type: %s %s",ret_type(op1),ret_type(op2));
if(strcmp(ret_type(op1),ret_type(op2))!=0)
{ if(prec(ret_type(op2))>prec(ret_type(op1)))
{
char str1[5];
strcpy(str1,temp_name());
addQuadr(ret_type(op2),op1,"",str1);
char str[5];
7
strcpy(str,temp_name());
addQuadr("-",op2,str1,str);
push(str);
add(str,ret_type(op2),0);
}
else if(prec(ret_type(op2))<prec(ret_type(op1)))
{
char str1[5];
strcpy(str1,temp_name());
addQuadr(ret_type(op1),op2,"",str1);
char str[5];
strcpy(str,temp_name());
addQuadr("-",op1,str1,str);
push(str);
add(str,ret_type(op1),0);
}
else{
char str[5];
strcpy(str,temp_name());
addQuadr("-",op1,op2,str);
push(str);}
}
}|
expr * expr
{
char op1[30],op2[30];
strcpy(op1,pop());
strcpy(op2,pop());
printf("Type: %s %s",ret_type(op1),ret_type(op2));
if(strcmp(ret_type(op1),ret_type(op2))!=0)
{ if(prec(ret_type(op2))>prec(ret_type(op1)))
{
char str1[5];
strcpy(str1,temp_name());
addQuadr(ret_type(op2),op1,"",str1);
char str[5];
strcpy(str,temp_name());
addQuadr("*",op2,str1,str);
push(str);
add(str,ret_type(op2),0);
}
else if(prec(ret_type(op2))<prec(ret_type(op1)))
{
char str1[5];
strcpy(str1,temp_name());
addQuadr(ret_type(op1),op2,"",str1);
8
char str[5];
strcpy(str,temp_name());
addQuadr("*",op1,str1,str);
push(str);
add(str,ret_type(op1),0);
}
else{
char str[5];
strcpy(str,temp_name());
addQuadr("*",op1,op2,str);
push(str);
}
}
}|
expr / expr
{ char op1[30],op2[30];
strcpy(op1,pop());
strcpy(op2,pop());
printf("Type: %s %s",ret_type(op1),ret_type(op2));
if(strcmp(ret_type(op1),ret_type(op2))!=0)
{ if(prec(ret_type(op2))>prec(ret_type(op1)))
{
char str1[5];
strcpy(str1,temp_name());
addQuadr(ret_type(op2),op1,"",str1);
char str[5];
strcpy(str,temp_name());
addQuadr("/",op2,str1,str);
push(str);
add(str,ret_type(op2),0);
}
else if(prec(ret_type(op2))<prec(ret_type(op1)))
{
char str1[5];
strcpy(str1,temp_name());
addQuadr(ret_type(op1),op2,"",str1);
char str[5];
strcpy(str,temp_name());
addQuadr("/",op1,str1,str);
push(str);
add(str,ret_type(op1),0);
}
else{
char str[5];
strcpy(str,temp_name());
addQuadr("/",op1,op2,str);
9
push(str);
}
}
}|
VAR
{
if(search($1)==-1)
{printf("\nUndefined Variable %s",$1);}
else
push($1);
};
assign: VAR = NUM ;
{ printf("\nAssignment");
strcpy(qtab[qcnt].opn,"=");
sprintf(qtab[qcnt].op1, "%d", $3);
strcpy(qtab[qcnt].res,$1);
qcnt++;
}
;
%%
void addQuadr(char* op,char* o1, char* o2, char* r)
{
strcpy(qtab[qcnt].opn,op);
strcpy(qtab[qcnt].op1,o1);
strcpy(qtab[qcnt].op2,o2);
strcpy(qtab[qcnt].res,r);
qcnt++;
}
void push(char* str)
{
strcpy(stack[++stop],str);
}
char* pop()
{
return stack[stop--];
}
char* temp_name()
{
char str[5];
strcpy(str,"t");
char str1[5];
sprintf(str1,"%d",temp_no);
temp_no++;
strcat(str,str1);
printf("%s",str);
10
return str;
}
int prec(char type[10])
{
if(strcmp(type,"char")==0) return 0;
else if(strcmp(type,"int")==0) return 1;
else if(strcmp(type,"float")==0) return 2;
else if(strcmp(type,"double")==0) return 3;
}
int search(char name[20])
{
if(cnt==0) {return -1;}
else
{
int i;
for(i=0;i<cnt;i++)
{if(strcmp(name,sym[i].name)==0) return 0;}
}
return -1;
}
void add(char name[20],char type[10],int val)
{
if(search(name)==0) {printf("\nRedeclaration of variable %s\n",name); return;}
strcpy(sym[cnt].name,name);
strcpy(sym[cnt].type,type);
sym[cnt].value=val;
cnt++;
}
void disp()
{
int i;
for(i=0;i<cnt;i++)
{
printf("\n Name: %s Type: %s Value: %d",sym[i].name,sym[i].type,sym[i].value);
}
}
char* ret_type(char name[20])
{
if(cnt==0) {return NULL;}
else
{
int i;
for(i=0;i<cnt;i++)
{if(strcmp(name,sym[i].name)==0) return sym[i].type;}
}
return NULL;
11
}
void yyerror(char* s)
{
printf("Incorrect Syntax at %s",s);
}
int main()
{
char ifile[100];
printf("\nEnter input file name: ");
scanf("%s",ifile);
yyin=fopen(ifile,"r");
yyparse();
printf("\nSymbol Table:\n");
disp();
int i;
printf("\nQuadruple:\n");
for(i=0;i<qcnt;i++)
{
printf("\n%s\t%s\t%s\t%s",qtab[i].opn,qtab[i].op1,qtab[i].op2,qtab[i].res);
}
}
INPUT FILE:
int a=100,b,c=55,d=98;
float e=112,f=976;
int p=98;
char k=125;
double g,h;
a=100;
f=a;
g=b;
a=b+e;
if(e>f)
{
p=k/a;}
else{
g=12;
}
h=90;
a=80;
while(a>b)
{ g=12;}
h=100;
12
9 Output:
[be@dbsl15 ~]$ cd 4246/A5
[be@dbsl15 A5]$ lex A5.l
[be@dbsl15 A5]$ yacc -d A5.y
[be@dbsl15 A5]$ gcc -o A5 lex.yy.c y.tab.c
A5.y: In function ^
atemp_name^a:
A5.y:330:1: warning: function returns address of local variable [-Wreturn-local-addr]
return str;
^
[be@dbsl15 A5]$ ./A5
= 100 a
float a t1
13
= t1 f
double b t2
= t2 g
float b t3
+ e t3 t4
int t4 t5
= t5 a
>e f t6
if t6 goto L1
L1:
int k t7
/ a t7 t8
= t8 p
goto L2:
else:
L3:
= 12 g
L2:
= 90 h
= 80 a
>a b t9
if t9 goto L1
else: goto L2:
L1:
= 12 g
goto L1
L2:
[be@dbsl15 A5]$
14
15
16
10 Conclusion:
In this way the Intermediate code generated for sample language using LEX
and YACC.
17