词法分析器--DFA(c++实现)

时间:2022-02-08 15:43:54

语言名为TINY

实例程序:

begin
var x,y:interger;
x:=;
read(x);
if y< then x:=x-y;
x:=x+y;
write(x);
end

TINY语言扫描程序的DFA:词法分析器--DFA(c++实现)

代码

//ExplLexicalAnalyzer.h
#ifndef EXPLLEXICALANALYZER_H
#define EXPLLEXICALANALYZER_H #define MAXTOKENLEN 40
#define MAXRESERVED 13 typedef enum {
ENDFILE, ERROR,
IF, THEN, ELSE, END, REPEAT, UNTIL, READ, WRITE, VAR, BEGIN, INTEGER, DOUBLE, STRING,
ID, NUM,
ASSIGN, EQ, LT, PLUS, MINUS, TIMES, OVER, LPAREN, RPAREN, SEMI, COMMA, DEFINE
} TokenType; //typedef struct {
// TokenType kind;
// int row = -1;
// int column = -1;
// double value;
// std::string ID;
//} Token; TokenType getToken(void); #endif //LEARN_2_EXPLLEXICALANALYZER_H
 //ExplLexicalAnalyzer.cpp
#include <cstdio>
#include <iostream>
#include <fstream>
#include <cstring>
#include "ExplLexicalAnalyzer.h" using namespace std; typedef enum {
START, INASSIGN, INCOMMENT, INNUM, INID, DONE
} StateType; char tokenString[MAXTOKENLEN + ]; #define BUFLEN 256 static char lineBuf[BUFLEN];
static int linepos = ;
static int bufsize = ;
static int EOF_flag = false;
static string filename;
static fstream get;
static int lineno = ;
static int columnpos = ;
bool TraceScan = true;
StateType state; static struct {
const char *str;
TokenType tok;
} reservedWords[MAXRESERVED]
= {{"if", IF},
{"then", THEN},
{"else", ELSE},
{"end", END},
{"repeat", REPEAT},
{"until", UNTIL},
{"read", READ},
{"write", WRITE},
{"begin", BEGIN},
{"var", VAR},
{"interger", INTEGER},
{"double", DOUBLE},
{"string", STRING}}; static char
getNextChar() {
if (linepos >= bufsize) {
lineno = ;
if (state != START)
return ' ';
if (get.getline(lineBuf, BUFLEN - )) {
printf("%d: %s\n", columnpos++, lineBuf);
bufsize = (int) strlen(lineBuf);
linepos = ;
return lineBuf[linepos++];
} else {
return EOF;
}
} else return lineBuf[linepos++];
} static TokenType reservedLookup(char *s) {
int i;
for (i = ; i < MAXRESERVED; i++)
if (!strcmp(s, reservedWords[i].str))
return reservedWords[i].tok;
return ID;
} //退回一个字符
static void ungetNextChar(void) { if (!EOF_flag) linepos--; } //打印分析结果
void printToken(TokenType token, const char *tokenString) {
switch (token) {
case IF:
case THEN:
case ELSE:
case END:
case REPEAT:
case UNTIL:
case READ:
case WRITE:
case BEGIN:
case VAR:
case INTEGER:
case DOUBLE:
case STRING:
printf("reserved word: %s\n", tokenString);
break;
case DEFINE:
printf(":\n");
break;
case COMMA:
printf(",\n");
break;
case ASSIGN:
printf(":=\n");
break;
case LT:
printf("<\n");
break;
case EQ:
printf("=\n");
break;
case LPAREN:
printf("(\n");
break;
case RPAREN:
printf(")\n");
break;
case SEMI:
printf(";\n");
break;
case PLUS:
printf("+\n");
break;
case MINUS:
printf("-\n");
break;
case TIMES:
printf("*\n");
break;
case OVER:
printf("/\n");
break;
case ENDFILE:
break;
case NUM:
printf("NUM, val= %s\n", tokenString);
break;
case ID:
printf("ID, name= %s\n", tokenString);
break;
case ERROR:
printf("ERROR: %s\n", tokenString);
break;
default:
printf("Unknown token: %d\n", token);
}
} //词法分析
TokenType getToken(void) {
int tokenStringIndex = ;
TokenType currentToken;
state = START;
bool save;
while (state != DONE) {
char c = getNextChar();
save = true;
switch (state) {
case START:
if (isdigit(c))
state = INNUM;
else if (isalpha(c))
state = INID;
else if (c == ':')
state = INASSIGN;
else if ((c == ' ') || (c == '\t') || (c == '\n'))
save = false;
else if (c == '{') {
save = false;
state = INCOMMENT;
} else {
state = DONE;
switch (c) {
case EOF:
return ENDFILE;
case ',':
currentToken = COMMA;
break;
case '=':
currentToken = EQ;
break;
case '<':
currentToken = LT;
break;
case '+':
currentToken = PLUS;
break;
case '-':
currentToken = MINUS;
break;
case '*':
currentToken = TIMES;
break;
case '/':
currentToken = OVER;
break;
case '(':
currentToken = LPAREN;
break;
case ')':
currentToken = RPAREN;
break;
case ';':
currentToken = SEMI;
break;
default:
currentToken = ERROR;
break;
}
}
break;
case INCOMMENT:
save = false;
if (c == EOF) {
state = DONE;
currentToken = ENDFILE;
} else if (c == '}') state = START;
break;
case INASSIGN:
state = DONE;
if (c == '=')
currentToken = ASSIGN;
else {
currentToken = DEFINE;
ungetNextChar();
}
break;
case INNUM:
if (!isdigit(c)) {
ungetNextChar();
save = false;
state = DONE;
currentToken = NUM;
}
break;
case INID:
if (!isalpha(c)) {
tokenString[tokenStringIndex] = '\0';
if (!strcmp(tokenString, "begin") || !strcmp(tokenString, "end")) {
save = false;
state = DONE;
currentToken = ID;
break;
}
ungetNextChar();
save = false;
state = DONE;
currentToken = ID;
}
break;
case DONE:
break;
}
if ((save) && (tokenStringIndex <= MAXTOKENLEN) && (state != START && !isspace(c)))
tokenString[tokenStringIndex++] = c;
if (state == DONE) {
tokenString[tokenStringIndex] = '\0';
if (currentToken == ID)
currentToken = reservedLookup(tokenString);
}
}
if (TraceScan) {
printf("\t%d: ", lineno++);
printToken(currentToken, tokenString);
}
return currentToken;
} int
main() {
if (cin >> filename && filename == "q") {
filename = "......";
}
get.open(filename, ios::in);
while (getToken() != ENDFILE);
}

运行结果:

词法分析器--DFA(c++实现)