简单的LR核心项集和Goto表填充演示程序

 /*

  * 该程序用于计算语言的核心项集

  * RexfieldVon

  * 2013年8月24日21:19:25

  */

 #include <stdio.h>

 #include <stdlib.h>

 #include <string.h>

 #include <assert.h>

 #ifndef bool

 #    define bool char

 #endif

 #ifndef true

 #    define true

 #endif

 #ifndef false

 #    define false

 #endif

 #define NEXTSIZE 256

 struct TrieTreeNode

 {

     struct TrieTreeNode *Next[NEXTSIZE];

     bool Accepted;

 };

 struct TrieTreeRoot

 {

     int NodeCount;

     struct TrieTreeNode *Tree;

 };

 struct Collection

 {

     char *Expression;                // 产生式

     struct Collection *next;

 };

 struct CoreCollection

 {

     struct Collection *S;            // 项集

     int id;                            // 项集序号

     bool marked;                    // 是否被处理

     unsigned char *FeatureString;    // 特征字串

     int FeatureStringLength;        // 特征字串长度

     unsigned int FeatureHash;        // 特征哈希

     struct CoreCollection *next;

 };

 struct Record

 {

     int RecordRow;                // 当前最大项位

     int RecordRowMax;            // 最大分配项数

     int **Record;                // 记录指针

 };

 /* 三级指针

  * 第一级指向整个产生式组

  * 第二级指向单个产生式

  * 第三级指向产生式符号单元

  * 约定：①所有的大写字母为非终结符②所有小写字母为终结符③'\377'为eof④'\0'为ε⑤'\376'为占位符·

  */

 char*** GrammerRule;

 /*

  * 文法书写约定：

  * 每个字符串表示一个单独的产生式

  * 第一个字符为产生式左边的非终结符，由初始化引擎进行产生式归并

  * 整个文法以 null 结束

  */

 char *Grammer[] =

 {

     "GL",

     "LLP", "LP",

     "P(P)", "P()",

     "\0"

 };

 /*

  * 构建 Trie 树并初始化

  * 返回一个新的 Trie 根节点

  */

 struct TrieTreeRoot *BuildTrieTree()

 {

     struct TrieTreeRoot *Root = (struct TrieTreeRoot *)malloc(sizeof(struct TrieTreeRoot));

     Root->NodeCount = ;

     Root->Tree = (struct TrieTreeNode *)malloc(sizeof(struct TrieTreeNode));

     memset(Root->Tree, '\0', sizeof(struct TrieTreeNode));

     return Root;

 }

 /*

  * 插入新的字符串

  * Root : struct TrieTreeRoot* 要操作的 Trie 树根节点

  * Item : char* 要插入的字符串

  */

 void InsertItem(struct TrieTreeRoot *Root, char *Item)

 {

     struct TrieTreeNode *Ptr = Root->Tree;

     int index = ;

     unsigned char Charactor;

     while ((Charactor = Item[index]) != '\0')

     {

         if (Ptr->Next[Charactor] == NULL)

         {

             Ptr->Next[Charactor] = (struct TrieTreeNode *)malloc(sizeof(struct TrieTreeNode));

             memset(Ptr->Next[Charactor], '\0', sizeof(struct TrieTreeNode));

             Root->NodeCount++;

         }

         Ptr = Ptr->Next[Charactor];

         index++;

     }

     Ptr->Accepted = true;

 }

 /*

  * 递归序列化 Trie 树

  * Node : struct TrieTreeNode* 当前操作的 Trie 节点

  * WritePtr : unsigned char* 特征串写入指针

  */

 unsigned char *DoFeature(struct TrieTreeNode *Node, unsigned char *WritePtr)

 {

     int i, count = ;

     unsigned char *ErgodicPtr;

     *WritePtr = (unsigned char)Node->Accepted;    // 写入节点是否接受

     WritePtr++;

     ErgodicPtr = WritePtr;                // 记录集合起始地址

     for (i = ; i < NEXTSIZE; i++)        // 将该组记录写入特征串

     {

         if (Node->Next[i] != NULL)

         {

             *WritePtr = (char)i;

             WritePtr++;

             count++;

         }

     }

     *WritePtr = '\0';                    // 写入组分隔符

     WritePtr++;

     for (i = ; i < count; i++)            // 递归调用处理所有边

     {

         WritePtr = DoFeature(Node->Next[ErgodicPtr[i]], WritePtr);

     }

     return WritePtr;

 }

 /*

  * 取得 Trie 的特征串，即序列化 Trie 树

  * Root : struct TrieTreeRoot* 要操作的 Trie 树根节点

  * StringLength : int* 长度指针（为了返回二进制串而设置）

  */

 unsigned char *GetFeatureString(struct TrieTreeRoot *Root, int *StringLength)

 {

     struct TrieTreeNode *Ptr = Root->Tree;

     // 假设最坏情况下，每个节点只有一条边，那么存储该节点就需要三个单元（Accepted、边、分隔符）

     // 但实际上真正用到的只有 3N-1 个字节

     unsigned char *FeatureString = (unsigned char *)malloc(Root->NodeCount * );

     unsigned char *WritePtr = FeatureString;

     WritePtr = DoFeature(Ptr, WritePtr);

     *StringLength = WritePtr - FeatureString;

     return FeatureString;

 }

 /*

  * 初始化文法序列

  */

 void InitizationGrammerRule()

 {

     // 分配表头空间

     GrammerRule = (char***)malloc(sizeof(int) * );

     memset(GrammerRule, '\0', sizeof(int) * );

     // 扫描整个文法记录每个非终结符产生式的个数

     int UnterminalOp[], index;

     unsigned char Unterminal;

     memset(UnterminalOp, '\0',  * );

     for (index = ; (Unterminal = Grammer[index][]) != '\0'; index++)

     {

         UnterminalOp[Unterminal]++;

     }

     // 写入产生式

     for (index = ; (Unterminal = Grammer[index][]) != '\0'; index++)

     {

         if(GrammerRule[Unterminal] == NULL)

         {

             GrammerRule[Unterminal] = (char**)malloc(sizeof(int) * (UnterminalOp[Unterminal] + ));

             memset(GrammerRule[Unterminal], '\0', sizeof(int) * (UnterminalOp[Unterminal] + ));

         }

         // 找到空位

         int blank = ;

         while (GrammerRule[Unterminal][blank] != '\0') {blank++;}

         GrammerRule[Unterminal][blank] = &Grammer[index][];

     }

 }

 /*

  * 取得终结符数量

  * return 终结符的数量

  */

 int GetTerminalCount()

 {

     int i, TerminalCount = ;

     for (i = ; i < ; i++)

     {

         if (GrammerRule[i] != NULL)

         {

             int k = ;

             while (GrammerRule[i][k] != NULL)

             {

                 int n = ;

                 while (GrammerRule[i][k][n] != '\0')

                 {

                     char c = GrammerRule[i][k][n];

                     if (c < 'A' || c > 'Z')

                     {

                         TerminalCount++;

                     }

                     n++;

                 }

                 k++;

             }

         }

     }

     return TerminalCount;

 }

 /*

  * 递归取得 FIRST 集

  * Token : unsigned char 需要打印的符号

  * FIRST : char* FIRST集

  * Ptr : int* FIRST集的位置指针

  */

 void GetFIRST(unsigned char Token, char *FIRST, int *Ptr)

 {

     if (Token >= 'A' && Token <= 'Z' && GrammerRule[Token] != NULL)

     {

         int i = ;

         while (GrammerRule[Token][i] != NULL)

         {

             GetFIRST(GrammerRule[Token][i++][], FIRST, Ptr);

         }

     }

     else if (Token < 'A' || Token > 'Z')

     {

         FIRST[*Ptr] = Token;

         *Ptr = *Ptr + ;

     }

 }

 /*

  * 打印 LR(1) 项

  * Item : struct Collection* 需要打印的项

  */

 void PrintItem(struct Collection *Item)

 {

     printf("[%c ->", Item->Expression[]);

     int i = ;

     for(; Item->Expression[i + ] != '\0'; i++)

     {

         printf(" ");

         switch (Item->Expression[i])

         {

             case '\377':

                 printf("<eof>");

                 break;

             case '\376':

                 printf("<@>");

                 break;

             default:

                 printf("%c", Item->Expression[i]);

                 break;

         }

     }

     if (Item->Expression[i] == '\377')

     {

         printf(", <eof>]");

     }

     else

     {

         printf(", %c]", Item->Expression[i]);

     }

 }

 /*

  * 打印项集

  * Item : struct Collection* 需要打印的项集

  */

 void PrintCollections(struct Collection *S)

 {

     printf("-------- Collection ---------\n");

     for (; S != NULL; S = S->next)

     {

         PrintItem(S);

         printf("\n");

     }

     printf("-----------------------------\n");

 }

 /*

  * 添加项到集合

  * S : struct Collection* 项集

  * Tail : struct Collection* 尾部指针

  * LeftUnterminal : char 左非终结符

  * Expression : char* 产生式

  * PreviewSymbol : char 前瞻符号

  */

 void AddItem(struct Collection *S, struct Collection **Tail, char *Expression)

 {

     if (Tail == NULL) {Tail = (struct Collection **)malloc(sizeof(struct Collection **)); (*Tail) = NULL;}

     if ((*Tail) == NULL) {(*Tail) = S;}

     while ((*Tail)->next != NULL) {(*Tail) = (*Tail)->next;}

     // 检查是否重复

     struct Collection *SPtr = S;

     for (; SPtr != NULL; SPtr = SPtr->next)

     {

         if (SPtr->Expression != NULL &&

             Expression != NULL &&

             strcmp(SPtr->Expression, Expression) == )

         {

             return;

         }

     }

     struct Collection *NewItem = (struct Collection*)malloc(sizeof(struct Collection));

     NewItem->Expression = strdup(Expression);

     NewItem->next = NULL;

     (*Tail)->next = NewItem;

     (*Tail) = (*Tail)->next;

 }

 /*

  * 闭包运算

  * S : struct Collection* 项集

  * TerminalCount : int 终结符个数

  */

 void Closure(struct Collection *S, int TerminalCount)

 {

     bool CollectChanged;

     struct Collection *Ptr = S, *Tail = S;

     do        // while (S is still changing)

     {

         CollectChanged = false;

         while (Ptr != NULL)        // for each item [A->β·Cζ，α]∈S

         {

             char *Placeholder = strchr(Ptr->Expression, '\376');

             if (Placeholder != NULL &&

                 *(Placeholder + ) != '\0' &&

                 *(Placeholder + ) != '\0')        // 占位符不能在产生式尾(= =)更不能在前瞻符号的位置上(= =#)！

             {

                 unsigned char Unterminal = *(Placeholder + );

                 if (Unterminal >= 'A' && Unterminal <= 'Z')

                 {

                     int ProductionIndex;

                     for (ProductionIndex = ; GrammerRule[Unterminal][ProductionIndex] != NULL; ProductionIndex++)    // for each production C->γ∈P

                     {

                         char *FIRST = (char*)malloc(TerminalCount + ), FirstSymbol = *(Placeholder + );

                         memset(FIRST, '\0', TerminalCount + );

                         int FIRSTCount = , i;

                         GetFIRST(FirstSymbol, FIRST, &FIRSTCount);

                         for (i = ; i < FIRSTCount; i++)        // for each b∈FIRST(ζα)

                         {

                             if (FIRST[i] != '\0')               // S <- S∪{[C->·γ,b]}

                             {

                                 char *Expr, *GRExpr = GrammerRule[Unterminal][ProductionIndex];

                                 int GRExprLength = strlen(GRExpr);

                                 Expr = (char*)malloc( + GRExprLength +  + );

                                 Expr[] = Unterminal;

                                 Expr[] = '\376';

                                 memcpy(Expr + , GRExpr, GRExprLength);

                                 Expr[ + GRExprLength +  - ] = FIRST[i];

                                 Expr[ + GRExprLength +  +  - ] = '\0';

                                 AddItem(S, &Tail, Expr);

                                 CollectChanged = true;

                             }

                         }

                     }

                 }

             }

             Ptr = Ptr->next;

         }

     }

     while (CollectChanged == true);

 }

 /*

  * Goto 运算

  * S : struct Collection* 项集

  * Symbol : char 前瞻符号

  * TerminalCount : int 终结符个数

  */

 struct Collection *Goto(struct Collection *S, char Symbol, int TerminalCount)

 {

     // moved <- 空集

     struct Collection *Moved = (struct Collection*)malloc(sizeof(struct Collection));

     memset(Moved, '\0', sizeof(struct Collection));

     struct Collection *Tail = Moved;

     while (S != NULL)    // for each item i∈S

     {

         char *Placeholder = strchr(S->Expression, '\376');

         if (Placeholder != NULL && *(Placeholder + ) == Symbol)    // if the form of i is [α->β·xζ,a] then

         {

             char *Expr = strdup(S->Expression);

             Placeholder = strchr(Expr, '\376');

             *Placeholder = Symbol;

             *(Placeholder + ) = '\376';

             AddItem(Moved, &Tail, Expr);    // moved <- moved∪{[α->βx·ζ,a]}

         }

         S = S->next;

     }

     struct Collection *FreeNode = Moved;

     Moved = Moved->next;

     free(FreeNode);

     Closure(Moved, TerminalCount);    // return closure(moved)

     return Moved;

 }

 /*

  * 可以计算字串的 ELFHash

  * str : unsigned char* 字串

  * length : int 字串长度

  */

 unsigned int ELFHash_Bin(unsigned char *str, int length)

 {

     int i = ;

     unsigned int hash = , x = ;

     while (i < length)

     {

         hash = (hash << ) + (str[i++]);

         if ((x = hash & 0xF0000000L) != )

         {

             hash ^= (x >> );

             hash &= ~x;

         }

     }

     return (hash & 0x7FFFFFFF);

 }

 /*

  * 完成特征值计算

  * CC : struct CoreCollection* 要计算特征值的核心项集

  */

 void CompleteFeature(struct CoreCollection *CC)

 {

     struct TrieTreeRoot *TrieRoot = BuildTrieTree();

     struct Collection *SPtr;

     for (SPtr = CC->S; SPtr != NULL; SPtr = SPtr->next)

     {

         InsertItem(TrieRoot, SPtr->Expression);

     }

     CC->FeatureString = GetFeatureString(TrieRoot, &CC->FeatureStringLength);

     CC->FeatureHash = ELFHash_Bin(CC->FeatureString, CC->FeatureStringLength);

 }

 /*

  * 检查核心项集是否存在，并返回项集 ID

  * CC : struct CoreCollection* 核心项集

  * S : struct CoreCollection* 待检测的项集

  */

 int CollectionExist(struct CoreCollection *CC, struct CoreCollection *S)

 {

     // 计算集合 S 的特征码

     CompleteFeature(S);

     // 开始逐个比较特征

     struct CoreCollection *CCPtr = CC;

     for (; CCPtr != NULL; CCPtr = CCPtr->next)

     {

         if (CCPtr->FeatureString == NULL ||

             CCPtr->FeatureHash ==  ||

             CCPtr->FeatureStringLength == )

         {

             CompleteFeature(CCPtr);

         }

         if (CCPtr->FeatureHash == S->FeatureHash &&

             CCPtr->FeatureStringLength == S->FeatureStringLength &&

             memcmp(CCPtr->FeatureString, S->FeatureString, S->FeatureStringLength) == )

         {

             return CCPtr->id;

         }

     }

     return -;

 }

 /*

  * 添加项集到核心项集

  * CC : struct CoreCollection* 核心项集

  * Tail : struct CoreCollection** 核心项集的尾部指针

  * S : struct Collection* 待添加的项集

  * CCid : int 上一个核心项集的 ID

  */

 int AddCoreCollection(struct CoreCollection *CC, struct CoreCollection **Tail, struct Collection *S, int CCid)

 {

     if (Tail == NULL) {Tail = (struct CoreCollection **)malloc(sizeof(struct CoreCollection **)); (*Tail) = NULL;}

     if ((*Tail) == NULL) {(*Tail) = CC;}

     while ((*Tail)->next != NULL) {(*Tail) = (*Tail)->next;}

     struct CoreCollection *CCItem = (struct CoreCollection*)malloc(sizeof(struct CoreCollection));

     CCItem->id = CCid + ;

     CCItem->marked = false;

     CCItem->S = S;

     CCItem->next = NULL;

     int id = CollectionExist(CC, CCItem);

     if (id == -)        // if temp!∈CC

     {

         id = CCItem->id;

         (*Tail)->next = CCItem;                        // CC <- {CC0}

         (*Tail) = (*Tail)->next;

     }

     return id;

 }

 /*

  * 记录 Goto[CCi, symbol]->CCj

  * RecordTable : struct Record* 记录表

  * CCi : int 当前项集 ID

  * Symbol : unsigned char 转移符号

  * CCj : int 转移目的项集 ID

  */

 void Record(struct Record *RecordTable, int CCi, unsigned char Symbol, int CCj)

 {

     // [CCi, Symbol] -> CCj

     if (RecordTable->RecordRow < CCi)        // 新请求的位置大于最大项位，需要更新项位

     {

         // 一次分配 32 条记录空间

         if (RecordTable->RecordRowMax <= CCi)    // 新请求的位置超过最大可使用项数，追加新的项表空间

         {

             RecordTable->RecordRowMax = ((int)(CCi / ) + ) * ;

             RecordTable->Record = (int **)realloc(RecordTable->Record, RecordTable->RecordRowMax);

         }

         RecordTable->RecordRow = CCi;

         int *tmp_spc = (int*)malloc(sizeof(int) * );

         memset(tmp_spc, '\0', sizeof(int) * );

         RecordTable->Record[CCi] = tmp_spc;

     }

     if (RecordTable->Record[CCi][Symbol] == CCj)

     {

         // printf("Find Repeat.\n");

     }

     else if (RecordTable->Record[CCi][Symbol] != )

     {

         printf("Find Conflict.\n");

     }

     else

     {

         RecordTable->Record[CCi][Symbol] = CCj;

         printf("[CC%d, %c] -> CC%d\n", CCi, Symbol, CCj);

     }

 }

 /*

  * 计算 LR 核心项集以及 Goto 表

  */

 void LRCollection()

 {

     int TerminalCount = GetTerminalCount(), CCid = ;

     struct Record *RecordTable = (struct Record *)malloc(sizeof(struct Record));

     memset(RecordTable, '\0', sizeof(struct Record));

     RecordTable->RecordRow = -;

     RecordTable->RecordRowMax = ;

     RecordTable->Record = (int **)malloc(sizeof(int) * );

     struct Collection *S = (struct Collection*)malloc(sizeof(struct Collection));

     memset(S, '\0', sizeof(struct Collection));

     S->Expression = strdup("G\376L\377");

     S->next = NULL;

     Closure(S, TerminalCount);        // CC0 <- closure({[S -> · S', eof]})

     struct CoreCollection *CC = (struct CoreCollection*)malloc(sizeof(struct CoreCollection)), *CCPtr, *CCTail;

     CC->id = ;

     CC->marked = false;

     CC->S = S;                        // CC <- {CC0}

     CC->next = NULL;

     CompleteFeature(CC);

     CCTail = CC;

     for (CCPtr = CC; CCPtr != NULL; CCPtr = CCPtr->next)        // while (new sets are still being added to CC)

     {

         if (CCPtr->marked == false)        // for each unmarked set CCi∈CC

         {

             CCPtr->marked = true;        // mark CCi as processed

             struct Collection *ExprPtr = NULL;

             for (ExprPtr = CCPtr->S; ExprPtr != NULL; ExprPtr = ExprPtr->next)        // for each x following a · in an item in CCi

             {

                 char *Placeholder = strchr(ExprPtr->Expression, '\376');

                 if (Placeholder != NULL && *(Placeholder + ) != '\0' && *(Placeholder + ) != '\0')

                 {

                     unsigned char PrevSym = *(Placeholder + );

                     struct Collection *temp = Goto(CCPtr->S, PrevSym, TerminalCount);    // temp <- goto(CCi, x)

                     int temp_id = AddCoreCollection(CC, &CCTail, temp, CCid);

                     if (temp_id > CCid)

                     {

                         printf("Goto(CC%d, %c):\n", CCPtr->id, PrevSym);

                         PrintCollections(temp);

                         CCid++;        // 意味着新的 CCID 被分配

                     }

                     // record transition form CCi to temp on X

                     Record(RecordTable, CCPtr->id, PrevSym, temp_id);

                     printf("\n");

                 }

             }

         }

     }

 }

 int main(int argc, char **argv)

 {

     InitizationGrammerRule();    // 初始化文法

     LRCollection();

     return ;

 }