C Program LEXICAL ANALYSER

How to write a C Program Lexical Analyser in C Programming Language ?


Soution:
/*Implement Lexical Analyse program in C*/



#include <stdio.h>
#include <malloc.h>

void scan(FILE *);
void ll(char*,char *);
void fail(FILE*);

int islet(int);
int isdel(int);
int isdig(int);

void state0(FILE *, int);
void state1(FILE *, int);
void state6(FILE *, int);
void state2(FILE *, int);
void state3(FILE *, int);
void state4(FILE *, int);
void state5(FILE *, int);
void state01(FILE *, int);
void state02(FILE *, int);

struct node
{
char *class;
char *token;
struct node* ptr;
};

struct node* head;
int count;
char arr[10];

void ll(char* ar,char *cl)
{

struct node* temp;
temp=head;
head=NULL;

struct node *nn;
nn=(struct node*)malloc(sizeof(struct node));

nn->token=ar;
nn->class=cl;
nn->ptr=NULL;

nn->ptr=temp;
head=nn;
}

void display()
{
struct node *temp;
temp=head;

if(temp != NULL)
{
printf("Token is %s\nClass is %s\n\n",temp->token,temp->class);
}
}

int main()
{
FILE *f=fopen("file.txt","r");
if(f==NULL)
printf("File open unsuccessful\n");

scan(f);
fclose(f);
display();
}

void scan(FILE *f)
{
int c;
count=0;
c=fgetc(f);

while(c==' '||c=='\n'||c=='\t')
{
c=fgetc(f);
}

if(c=='i')
state1(f,c);
else if(c=='e')
state2(f,c);
else if(c=='c')
state5(f,c);
else if(islet(c) || c=='_')
state0(f,c);
else if(c=='<')
state3(f,c);
else if(c=='>')
state4(f,c);
else if(isdig(c))
state6(f,c);
else
printf("END OF SYMBOLS\n");
}

void state6(FILE *f, int c)
{
printf("STATE6 OF NUMBERS\n");
arr[count]=c;
count++;
c=fgetc(f);
arr[count]=c;
/* if(isdig(c))
{
count++;
c=fgetc(f);
arr[count]=c;
*/ if(isdig(c))
state6(f,c);
else if(isdel(c))
{
printf("NUMBER  DETECTED\n");
char *cl="NUMBER";
char *ar="INTEGER";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);
}
else if(c=='.')
{
count++;
c=fgetc(f);
arr[count]=c;
if(isdig(c))
state6(f,c);
else
{
while(!isdel(c))
c=fgetc(f);

fseek(f,1,SEEK_CUR);
scan(f);

}
}
else if(c=='E')
{
count++;
c=fgetc(f);
arr[count]=c;
if(isdig(c))
state6(f,c);
else
{
while(!isdel(c))
c=fgetc(f);

fseek(f,1,SEEK_CUR);
scan(f);

}

}
/* else
{
if(isdel(c))
{
printf("NUMBER  DETECTED\n");
char *cl="NUMBER";
char *arr="INTEGER";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(arr,cl);
scan(f);
}
else if(c=='.')
{
count++;
c=fgetc(f);
arr[count]=c;
if(isdig(c))
state6(f,c);
else
{
while(!isdel(c))
c=fgetc(f);

fseek(f,1,SEEK_CUR);
scan(f);

}
}
else if(c=='E')
{
count++;
c=fgetc(f);
arr[count]=c;
if(isdig(c))
state6(f,c);
else
{
while(!isdel(c))
c=fgetc(f);

fseek(f,1,SEEK_CUR);
scan(f);

}

}
else
{
while(!isdel(c))
c=fgetc(f);

fseek(f,1,SEEK_CUR);
scan(f);
}
}

}*/
else
{
while(!isdel(c))
c=fgetc(f);

fseek(f,1,SEEK_CUR);
scan(f);
}
}
void state5(FILE *f, int c)
{
printf("STATE5 OF DATATYPE CHAR\n");
arr[count]=c;
// printf("%c\n",arr[count]);
c=fgetc(f);
if(c=='h')
{
arr[count]=c;
// printf("%c\n",arr[count]);
c=fgetc(f);
if(c=='a')
{
arr[count]=c;
// printf("%c\n",arr[count]);
c=fgetc(f);
if(c=='r')
{
arr[count]=c;
// printf("%c\n",arr[count]);
c=fgetc(f);
if(isdel(c))
{
printf("Datatype CHAR  DETECTED\n");
char *cl="DATATYPE";
char *ar="CHAR";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);

}
else
{
printf("DATATYPE CHAR NOT DETECTED\n");
fseek(f,-4,SEEK_CUR);
state0(f,c);
}

}
else
{
printf("DATATYPE CHAR UNDETECTED\n");
fseek(f,-4,SEEK_CUR);
state0(f,c);
}


}
else
{
printf("DATATYPE CHAR UNDETECTED\n");
fseek(f,-3,SEEK_CUR);
state0(f,c);
}
}
else
{
printf("DATATYPE CHAR UNDETECTED\n");
fseek(f,-2,SEEK_CUR);
state0(f,c);
}
}

void state4(FILE *f, int c)
{
printf("STATE4 OF RELATIONAL OP >\n");
// arr[count]=c;
if(c=='>')
{
count++;
// arr[count]=c;
c=fgetc(f);
if(isdel(c))
{
printf("REL OPER > DETECTED\n");
char *ar=">";
char *cl="RELOP";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);

}

else if(c=='=')
{
count++;
c=fgetc(f);
if (isdel(c))
{
printf("REL OPER >= DETECTED\n");
char *ar=">=";
char *cl="RELOP";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);

}
}


}
printf("OUT OF STATE4 OF RELATIONAL OPERATOR >\n");
}

void state3(FILE *f, int c)
{
printf("STATE3 OF RELATIONAL OP <\n");
count;
// arr[count]=c;
if(c=='<')
{
count++;
// arr[count]=c;
c=fgetc(f);
if(isdel(c))
{
printf("REL OPER < DETECTED\n");
char *ar="<";
char *cl="RELOP";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);
}
else if(c=='=')
{
count++;
c=fgetc(f);
if(isdel(c))
{
printf("REL OPER <= DETECTED\n");
char *cl="RELOP";
char *ar="<=";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);

}
}
else if(c=='>')
{
count++;
c=fgetc(f);
if(isdel(c))
{
printf("REL OPER <> DETECTED\n");
char *cl="RELOP";
char *ar="<>";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);

}
}
}
printf("OUT OF STATE3 OF RELATIONAL OPERATOR <\n");

}

void state2(FILE *f,int c)
{
printf("STATE2 for 'ELSE' keyword\n");
// count;
// arr[count]=c;

if(c=='e')
{
c=fgetc(f);
if(c=='l')
{
count++;
// arr[count]=c;
c=fgetc(f);
if(c=='s')
{
count++;
// arr[count]=c;
c=fgetc(f);
if(c=='e')
{
count++;
c=fgetc(f);
if(isdel(c))
{
printf("Keyword DETECTED\n");
char *cl="KEYWORD";
char *ar="ELSE";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);

}
else
{
printf("KEYWORD 'ELSE' is not detected\n");
fseek(f,-5,SEEK_CUR);
state0(f,c);
}
}
}

}
}
}



void state1(FILE *f, int c)
{
printf("STATE1 for 'IF' keyword and 'INT' DATATYPE\n");
// count;
// arr[count]=c;

if(c=='i')
{
count++;
c=fgetc(f);
if(c=='f')
{
count++;
// arr[count]=c;
if(isdel(c))
{
printf("Keyword DETECTED\n");
char *cl="KEYWORD";
char *ar="IF";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);
}

else
{
printf("KEYWORD 'IF' is not detected\n");
fseek(f,-3,SEEK_CUR);
state0(f,c);
}
}
else if(c=='n')
{
count++;
c=fgetc(f);
if(c=='t')
{
count++;
c=fgetc(f);
if(isdel(c))
{
printf("Datatype INT DETECTED\n");
char *cl="DATATYPE";
char *ar="INT";
count++;
// arr[count]=c;
fseek(f,0,SEEK_CUR);
ll(ar,cl);
scan(f);
}

else
{
printf("Datatype 'INT' is not detected\n");
fseek(f,-4,SEEK_CUR);
state0(f,c);
}
}
}
}

}

int islet(int c)
{
return(((c>='a' && c<='z') || (c>='A' && c<='Z'))?1:0);
}

void state0(FILE *f, int c)
{
printf("STATE0 of Identifier\n");
// count++;
arr[count]=c;
if(islet(c) || c=='_')
state01(f,c);
else
fail(f);
}

void fail(FILE *f)
{
printf("Invalid\n");
}

void state01(FILE* f, int c)
{
printf("STATE01 of Identifier\n");
count++;
c=fgetc(f);
arr[count]=c;
if(islet(c) || isdig(c))
state01(f,c);
else if(isdel(c))
state02(f,c);
else
fail(f);
}

void state02(FILE *f, int c)
{
printf("STATE02 of Identifier\n");
printf("IDENTIFIER DETECTED\n");
char *cl="ID";
count++;
arr[count]=c;
printf("Array is %s\n",arr);
fseek(f,0,SEEK_CUR);
ll(arr,cl);
scan(f);
}

int isdel(int c)
{
return((c==',' || c==' '|| c=='\n' || c==';')?1:0);
}

int isdig(int c)
{
return((c>='0' && c<='9')?1:0);
}


Learn More :