词法分析器--DFA（c++实现）

语言名为TINY

实例程序：

begin

     var x,y:interger;

     x:=;

     read(x);

     if y< then x:=x-y;

     x:=x+y;

     write(x);

end

TINY语言扫描程序的DFA：词法分析器--DFA（c++实现）

代码

//ExplLexicalAnalyzer.h

#ifndef EXPLLEXICALANALYZER_H

#define EXPLLEXICALANALYZER_H

#define MAXTOKENLEN 40

#define MAXRESERVED 13

typedef enum {

    ENDFILE, ERROR,

    IF, THEN, ELSE, END, REPEAT, UNTIL, READ, WRITE, VAR, BEGIN, INTEGER, DOUBLE, STRING,

    ID, NUM,

    ASSIGN, EQ, LT, PLUS, MINUS, TIMES, OVER, LPAREN, RPAREN, SEMI, COMMA, DEFINE

} TokenType;

//typedef struct {

//    TokenType kind;

//    int row = -1;

//    int column = -1;

//    double value;

//    std::string ID;

//} Token;

TokenType getToken(void);

#endif //LEARN_2_EXPLLEXICALANALYZER_H

 //ExplLexicalAnalyzer.cpp

 #include <cstdio>

 #include <iostream>

 #include <fstream>

 #include <cstring>

 #include  "ExplLexicalAnalyzer.h"

 using namespace std;

 typedef enum {

     START, INASSIGN, INCOMMENT, INNUM, INID, DONE

 } StateType;

 char tokenString[MAXTOKENLEN + ];

 #define BUFLEN 256

 static char lineBuf[BUFLEN];

 static int linepos = ;

 static int bufsize = ;

 static int EOF_flag = false;

 static string filename;

 static fstream get;

 static int lineno = ;

 static int columnpos = ;

 bool TraceScan = true;

 StateType state;

 static struct {

     const char *str;

     TokenType tok;

 } reservedWords[MAXRESERVED]

         = {{"if",       IF},

            {"then",     THEN},

            {"else",     ELSE},

            {"end",      END},

            {"repeat",   REPEAT},

            {"until",    UNTIL},

            {"read",     READ},

            {"write",    WRITE},

            {"begin",    BEGIN},

            {"var",      VAR},

            {"interger", INTEGER},

            {"double",   DOUBLE},

            {"string",   STRING}};

 static char

 getNextChar() {

     if (linepos >= bufsize) {

         lineno = ;

         if (state != START)

             return ' ';

         if (get.getline(lineBuf, BUFLEN - )) {

             printf("%d: %s\n", columnpos++, lineBuf);

             bufsize = (int) strlen(lineBuf);

             linepos = ;

             return lineBuf[linepos++];

         } else {

             return EOF;

         }

     } else return lineBuf[linepos++];

 }

 static TokenType reservedLookup(char *s) {

     int i;

     for (i = ; i < MAXRESERVED; i++)

         if (!strcmp(s, reservedWords[i].str))

             return reservedWords[i].tok;

     return ID;

 }

 //退回一个字符

 static void ungetNextChar(void) { if (!EOF_flag) linepos--; }

 //打印分析结果

 void printToken(TokenType token, const char *tokenString) {

     switch (token) {

         case IF:

         case THEN:

         case ELSE:

         case END:

         case REPEAT:

         case UNTIL:

         case READ:

         case WRITE:

         case BEGIN:

         case VAR:

         case INTEGER:

         case DOUBLE:

         case STRING:

             printf("reserved word: %s\n", tokenString);

             break;

         case DEFINE:

             printf(":\n");

             break;

         case COMMA:

             printf(",\n");

             break;

         case ASSIGN:

             printf(":=\n");

             break;

         case LT:

             printf("<\n");

             break;

         case EQ:

             printf("=\n");

             break;

         case LPAREN:

             printf("(\n");

             break;

         case RPAREN:

             printf(")\n");

             break;

         case SEMI:

             printf(";\n");

             break;

         case PLUS:

             printf("+\n");

             break;

         case MINUS:

             printf("-\n");

             break;

         case TIMES:

             printf("*\n");

             break;

         case OVER:

             printf("/\n");

             break;

         case ENDFILE:

             break;

         case NUM:

             printf("NUM, val= %s\n", tokenString);

             break;

         case ID:

             printf("ID, name= %s\n", tokenString);

             break;

         case ERROR:

             printf("ERROR: %s\n", tokenString);

             break;

         default:

             printf("Unknown token: %d\n", token);

     }

 }

 //词法分析

 TokenType getToken(void) {

     int tokenStringIndex = ;

     TokenType currentToken;

     state = START;

     bool save;

     while (state != DONE) {

         char c = getNextChar();

         save = true;

         switch (state) {

             case START:

                 if (isdigit(c))

                     state = INNUM;

                 else if (isalpha(c))

                     state = INID;

                 else if (c == ':')

                     state = INASSIGN;

                 else if ((c == ' ') || (c == '\t') || (c == '\n'))

                     save = false;

                 else if (c == '{') {

                     save = false;

                     state = INCOMMENT;

                 } else {

                     state = DONE;

                     switch (c) {

                         case EOF:

                             return ENDFILE;

                         case ',':

                             currentToken = COMMA;

                             break;

                         case '=':

                             currentToken = EQ;

                             break;

                         case '<':

                             currentToken = LT;

                             break;

                         case '+':

                             currentToken = PLUS;

                             break;

                         case '-':

                             currentToken = MINUS;

                             break;

                         case '*':

                             currentToken = TIMES;

                             break;

                         case '/':

                             currentToken = OVER;

                             break;

                         case '(':

                             currentToken = LPAREN;

                             break;

                         case ')':

                             currentToken = RPAREN;

                             break;

                         case ';':

                             currentToken = SEMI;

                             break;

                         default:

                             currentToken = ERROR;

                             break;

                     }

                 }

                 break;

             case INCOMMENT:

                 save = false;

                 if (c == EOF) {

                     state = DONE;

                     currentToken = ENDFILE;

                 } else if (c == '}') state = START;

                 break;

             case INASSIGN:

                 state = DONE;

                 if (c == '=')

                     currentToken = ASSIGN;

                 else {

                     currentToken = DEFINE;

                     ungetNextChar();

                 }

                 break;

             case INNUM:

                 if (!isdigit(c)) {

                     ungetNextChar();

                     save = false;

                     state = DONE;

                     currentToken = NUM;

                 }

                 break;

             case INID:

                 if (!isalpha(c)) {

                     tokenString[tokenStringIndex] = '\0';

                     if (!strcmp(tokenString, "begin") || !strcmp(tokenString, "end")) {

                         save = false;

                         state = DONE;

                         currentToken = ID;

                         break;

                     }

                     ungetNextChar();

                     save = false;

                     state = DONE;

                     currentToken = ID;

                 }

                 break;

             case DONE:

                 break;

         }

         if ((save) && (tokenStringIndex <= MAXTOKENLEN) && (state != START && !isspace(c)))

             tokenString[tokenStringIndex++] = c;

         if (state == DONE) {

             tokenString[tokenStringIndex] = '\0';

             if (currentToken == ID)

                 currentToken = reservedLookup(tokenString);

         }

     }

     if (TraceScan) {

         printf("\t%d: ", lineno++);

         printToken(currentToken, tokenString);

     }

     return currentToken;

 }

 int

 main() {

     if (cin >> filename && filename == "q") {

         filename = "......";

     }

     get.open(filename, ios::in);

     while (getToken() != ENDFILE);

 }

运行结果：

词法分析器--DFA（c++实现）

秒客网

词法分析器--DFA（c++实现）

相关文章