/*
* 该程序用于计算语言的核心项集
* RexfieldVon
* 2013年8月24日21:19:25
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h> #ifndef bool
# define bool char
#endif #ifndef true
# define true
#endif #ifndef false
# define false
#endif #define NEXTSIZE 256 struct TrieTreeNode
{
struct TrieTreeNode *Next[NEXTSIZE];
bool Accepted;
}; struct TrieTreeRoot
{
int NodeCount;
struct TrieTreeNode *Tree;
}; struct Collection
{
char *Expression; // 产生式
struct Collection *next;
};
struct CoreCollection
{
struct Collection *S; // 项集
int id; // 项集序号
bool marked; // 是否被处理
unsigned char *FeatureString; // 特征字串
int FeatureStringLength; // 特征字串长度
unsigned int FeatureHash; // 特征哈希
struct CoreCollection *next;
};
struct Record
{
int RecordRow; // 当前最大项位
int RecordRowMax; // 最大分配项数
int **Record; // 记录指针
};
/* 三级指针
* 第一级指向整个产生式组
* 第二级指向单个产生式
* 第三级指向产生式符号单元
* 约定:①所有的大写字母为非终结符②所有小写字母为终结符③'\377'为eof④'\0'为ε⑤'\376'为占位符·
*/
char*** GrammerRule;
/*
* 文法书写约定:
* 每个字符串表示一个单独的产生式
* 第一个字符为产生式左边的非终结符,由初始化引擎进行产生式归并
* 整个文法以 null 结束
*/
char *Grammer[] =
{
"GL",
"LLP", "LP",
"P(P)", "P()",
"\0"
}; /*
* 构建 Trie 树并初始化
* 返回一个新的 Trie 根节点
*/
struct TrieTreeRoot *BuildTrieTree()
{
struct TrieTreeRoot *Root = (struct TrieTreeRoot *)malloc(sizeof(struct TrieTreeRoot));
Root->NodeCount = ;
Root->Tree = (struct TrieTreeNode *)malloc(sizeof(struct TrieTreeNode));
memset(Root->Tree, '\0', sizeof(struct TrieTreeNode));
return Root;
} /*
* 插入新的字符串
* Root : struct TrieTreeRoot* 要操作的 Trie 树根节点
* Item : char* 要插入的字符串
*/
void InsertItem(struct TrieTreeRoot *Root, char *Item)
{
struct TrieTreeNode *Ptr = Root->Tree;
int index = ;
unsigned char Charactor; while ((Charactor = Item[index]) != '\0')
{
if (Ptr->Next[Charactor] == NULL)
{
Ptr->Next[Charactor] = (struct TrieTreeNode *)malloc(sizeof(struct TrieTreeNode));
memset(Ptr->Next[Charactor], '\0', sizeof(struct TrieTreeNode));
Root->NodeCount++;
}
Ptr = Ptr->Next[Charactor];
index++;
} Ptr->Accepted = true;
} /*
* 递归序列化 Trie 树
* Node : struct TrieTreeNode* 当前操作的 Trie 节点
* WritePtr : unsigned char* 特征串写入指针
*/
unsigned char *DoFeature(struct TrieTreeNode *Node, unsigned char *WritePtr)
{
int i, count = ;
unsigned char *ErgodicPtr; *WritePtr = (unsigned char)Node->Accepted; // 写入节点是否接受
WritePtr++; ErgodicPtr = WritePtr; // 记录集合起始地址 for (i = ; i < NEXTSIZE; i++) // 将该组记录写入特征串
{
if (Node->Next[i] != NULL)
{
*WritePtr = (char)i;
WritePtr++;
count++;
}
} *WritePtr = '\0'; // 写入组分隔符
WritePtr++; for (i = ; i < count; i++) // 递归调用处理所有边
{
WritePtr = DoFeature(Node->Next[ErgodicPtr[i]], WritePtr);
} return WritePtr;
} /*
* 取得 Trie 的特征串,即序列化 Trie 树
* Root : struct TrieTreeRoot* 要操作的 Trie 树根节点
* StringLength : int* 长度指针(为了返回二进制串而设置)
*/
unsigned char *GetFeatureString(struct TrieTreeRoot *Root, int *StringLength)
{
struct TrieTreeNode *Ptr = Root->Tree;
// 假设最坏情况下,每个节点只有一条边,那么存储该节点就需要三个单元(Accepted、边、分隔符)
// 但实际上真正用到的只有 3N-1 个字节
unsigned char *FeatureString = (unsigned char *)malloc(Root->NodeCount * );
unsigned char *WritePtr = FeatureString; WritePtr = DoFeature(Ptr, WritePtr); *StringLength = WritePtr - FeatureString;
return FeatureString;
} /*
* 初始化文法序列
*/
void InitizationGrammerRule()
{
// 分配表头空间
GrammerRule = (char***)malloc(sizeof(int) * );
memset(GrammerRule, '\0', sizeof(int) * );
// 扫描整个文法记录每个非终结符产生式的个数
int UnterminalOp[], index;
unsigned char Unterminal;
memset(UnterminalOp, '\0', * );
for (index = ; (Unterminal = Grammer[index][]) != '\0'; index++)
{
UnterminalOp[Unterminal]++;
}
// 写入产生式
for (index = ; (Unterminal = Grammer[index][]) != '\0'; index++)
{
if(GrammerRule[Unterminal] == NULL)
{
GrammerRule[Unterminal] = (char**)malloc(sizeof(int) * (UnterminalOp[Unterminal] + ));
memset(GrammerRule[Unterminal], '\0', sizeof(int) * (UnterminalOp[Unterminal] + ));
}
// 找到空位
int blank = ;
while (GrammerRule[Unterminal][blank] != '\0') {blank++;}
GrammerRule[Unterminal][blank] = &Grammer[index][];
}
} /*
* 取得终结符数量
* return 终结符的数量
*/
int GetTerminalCount()
{
int i, TerminalCount = ;
for (i = ; i < ; i++)
{
if (GrammerRule[i] != NULL)
{
int k = ;
while (GrammerRule[i][k] != NULL)
{
int n = ;
while (GrammerRule[i][k][n] != '\0')
{
char c = GrammerRule[i][k][n];
if (c < 'A' || c > 'Z')
{
TerminalCount++;
}
n++;
}
k++;
}
}
}
return TerminalCount;
} /*
* 递归取得 FIRST 集
* Token : unsigned char 需要打印的符号
* FIRST : char* FIRST集
* Ptr : int* FIRST集的位置指针
*/
void GetFIRST(unsigned char Token, char *FIRST, int *Ptr)
{
if (Token >= 'A' && Token <= 'Z' && GrammerRule[Token] != NULL)
{
int i = ;
while (GrammerRule[Token][i] != NULL)
{
GetFIRST(GrammerRule[Token][i++][], FIRST, Ptr);
}
}
else if (Token < 'A' || Token > 'Z')
{
FIRST[*Ptr] = Token;
*Ptr = *Ptr + ;
}
} /*
* 打印 LR(1) 项
* Item : struct Collection* 需要打印的项
*/
void PrintItem(struct Collection *Item)
{
printf("[%c ->", Item->Expression[]);
int i = ;
for(; Item->Expression[i + ] != '\0'; i++)
{
printf(" ");
switch (Item->Expression[i])
{
case '\377':
printf("<eof>");
break;
case '\376':
printf("<@>");
break;
default:
printf("%c", Item->Expression[i]);
break;
}
}
if (Item->Expression[i] == '\377')
{
printf(", <eof>]");
}
else
{
printf(", %c]", Item->Expression[i]);
}
} /*
* 打印项集
* Item : struct Collection* 需要打印的项集
*/
void PrintCollections(struct Collection *S)
{
printf("-------- Collection ---------\n");
for (; S != NULL; S = S->next)
{
PrintItem(S);
printf("\n");
}
printf("-----------------------------\n");
} /*
* 添加项到集合
* S : struct Collection* 项集
* Tail : struct Collection* 尾部指针
* LeftUnterminal : char 左非终结符
* Expression : char* 产生式
* PreviewSymbol : char 前瞻符号
*/
void AddItem(struct Collection *S, struct Collection **Tail, char *Expression)
{
if (Tail == NULL) {Tail = (struct Collection **)malloc(sizeof(struct Collection **)); (*Tail) = NULL;}
if ((*Tail) == NULL) {(*Tail) = S;}
while ((*Tail)->next != NULL) {(*Tail) = (*Tail)->next;}
// 检查是否重复
struct Collection *SPtr = S;
for (; SPtr != NULL; SPtr = SPtr->next)
{
if (SPtr->Expression != NULL &&
Expression != NULL &&
strcmp(SPtr->Expression, Expression) == )
{
return;
}
}
struct Collection *NewItem = (struct Collection*)malloc(sizeof(struct Collection));
NewItem->Expression = strdup(Expression);
NewItem->next = NULL;
(*Tail)->next = NewItem;
(*Tail) = (*Tail)->next;
} /*
* 闭包运算
* S : struct Collection* 项集
* TerminalCount : int 终结符个数
*/
void Closure(struct Collection *S, int TerminalCount)
{
bool CollectChanged;
struct Collection *Ptr = S, *Tail = S;
do // while (S is still changing)
{
CollectChanged = false;
while (Ptr != NULL) // for each item [A->β·Cζ,α]∈S
{
char *Placeholder = strchr(Ptr->Expression, '\376');
if (Placeholder != NULL &&
*(Placeholder + ) != '\0' &&
*(Placeholder + ) != '\0') // 占位符不能在产生式尾(= =)更不能在前瞻符号的位置上(= =#)!
{
unsigned char Unterminal = *(Placeholder + );
if (Unterminal >= 'A' && Unterminal <= 'Z')
{
int ProductionIndex;
for (ProductionIndex = ; GrammerRule[Unterminal][ProductionIndex] != NULL; ProductionIndex++) // for each production C->γ∈P
{
char *FIRST = (char*)malloc(TerminalCount + ), FirstSymbol = *(Placeholder + );
memset(FIRST, '\0', TerminalCount + );
int FIRSTCount = , i;
GetFIRST(FirstSymbol, FIRST, &FIRSTCount);
for (i = ; i < FIRSTCount; i++) // for each b∈FIRST(ζα)
{
if (FIRST[i] != '\0') // S <- S∪{[C->·γ,b]}
{
char *Expr, *GRExpr = GrammerRule[Unterminal][ProductionIndex];
int GRExprLength = strlen(GRExpr);
Expr = (char*)malloc( + GRExprLength + + );
Expr[] = Unterminal;
Expr[] = '\376';
memcpy(Expr + , GRExpr, GRExprLength);
Expr[ + GRExprLength + - ] = FIRST[i];
Expr[ + GRExprLength + + - ] = '\0';
AddItem(S, &Tail, Expr);
CollectChanged = true;
}
}
}
}
}
Ptr = Ptr->next;
}
}
while (CollectChanged == true);
} /*
* Goto 运算
* S : struct Collection* 项集
* Symbol : char 前瞻符号
* TerminalCount : int 终结符个数
*/
struct Collection *Goto(struct Collection *S, char Symbol, int TerminalCount)
{
// moved <- 空集
struct Collection *Moved = (struct Collection*)malloc(sizeof(struct Collection));
memset(Moved, '\0', sizeof(struct Collection));
struct Collection *Tail = Moved;
while (S != NULL) // for each item i∈S
{
char *Placeholder = strchr(S->Expression, '\376');
if (Placeholder != NULL && *(Placeholder + ) == Symbol) // if the form of i is [α->β·xζ,a] then
{
char *Expr = strdup(S->Expression);
Placeholder = strchr(Expr, '\376');
*Placeholder = Symbol;
*(Placeholder + ) = '\376';
AddItem(Moved, &Tail, Expr); // moved <- moved∪{[α->βx·ζ,a]}
}
S = S->next;
}
struct Collection *FreeNode = Moved;
Moved = Moved->next;
free(FreeNode);
Closure(Moved, TerminalCount); // return closure(moved)
return Moved;
} /*
* 可以计算字串的 ELFHash
* str : unsigned char* 字串
* length : int 字串长度
*/
unsigned int ELFHash_Bin(unsigned char *str, int length)
{
int i = ;
unsigned int hash = , x = ;
while (i < length)
{
hash = (hash << ) + (str[i++]);
if ((x = hash & 0xF0000000L) != )
{
hash ^= (x >> );
hash &= ~x;
}
}
return (hash & 0x7FFFFFFF);
} /*
* 完成特征值计算
* CC : struct CoreCollection* 要计算特征值的核心项集
*/
void CompleteFeature(struct CoreCollection *CC)
{
struct TrieTreeRoot *TrieRoot = BuildTrieTree();
struct Collection *SPtr;
for (SPtr = CC->S; SPtr != NULL; SPtr = SPtr->next)
{
InsertItem(TrieRoot, SPtr->Expression);
}
CC->FeatureString = GetFeatureString(TrieRoot, &CC->FeatureStringLength);
CC->FeatureHash = ELFHash_Bin(CC->FeatureString, CC->FeatureStringLength);
} /*
* 检查核心项集是否存在,并返回项集 ID
* CC : struct CoreCollection* 核心项集
* S : struct CoreCollection* 待检测的项集
*/
int CollectionExist(struct CoreCollection *CC, struct CoreCollection *S)
{
// 计算集合 S 的特征码
CompleteFeature(S);
// 开始逐个比较特征
struct CoreCollection *CCPtr = CC;
for (; CCPtr != NULL; CCPtr = CCPtr->next)
{
if (CCPtr->FeatureString == NULL ||
CCPtr->FeatureHash == ||
CCPtr->FeatureStringLength == )
{
CompleteFeature(CCPtr);
}
if (CCPtr->FeatureHash == S->FeatureHash &&
CCPtr->FeatureStringLength == S->FeatureStringLength &&
memcmp(CCPtr->FeatureString, S->FeatureString, S->FeatureStringLength) == )
{
return CCPtr->id;
}
}
return -;
} /*
* 添加项集到核心项集
* CC : struct CoreCollection* 核心项集
* Tail : struct CoreCollection** 核心项集的尾部指针
* S : struct Collection* 待添加的项集
* CCid : int 上一个核心项集的 ID
*/
int AddCoreCollection(struct CoreCollection *CC, struct CoreCollection **Tail, struct Collection *S, int CCid)
{
if (Tail == NULL) {Tail = (struct CoreCollection **)malloc(sizeof(struct CoreCollection **)); (*Tail) = NULL;}
if ((*Tail) == NULL) {(*Tail) = CC;}
while ((*Tail)->next != NULL) {(*Tail) = (*Tail)->next;} struct CoreCollection *CCItem = (struct CoreCollection*)malloc(sizeof(struct CoreCollection));
CCItem->id = CCid + ;
CCItem->marked = false;
CCItem->S = S;
CCItem->next = NULL; int id = CollectionExist(CC, CCItem);
if (id == -) // if temp!∈CC
{
id = CCItem->id;
(*Tail)->next = CCItem; // CC <- {CC0}
(*Tail) = (*Tail)->next;
}
return id;
} /*
* 记录 Goto[CCi, symbol]->CCj
* RecordTable : struct Record* 记录表
* CCi : int 当前项集 ID
* Symbol : unsigned char 转移符号
* CCj : int 转移目的项集 ID
*/
void Record(struct Record *RecordTable, int CCi, unsigned char Symbol, int CCj)
{
// [CCi, Symbol] -> CCj
if (RecordTable->RecordRow < CCi) // 新请求的位置大于最大项位,需要更新项位
{
// 一次分配 32 条记录空间
if (RecordTable->RecordRowMax <= CCi) // 新请求的位置超过最大可使用项数,追加新的项表空间
{
RecordTable->RecordRowMax = ((int)(CCi / ) + ) * ;
RecordTable->Record = (int **)realloc(RecordTable->Record, RecordTable->RecordRowMax);
}
RecordTable->RecordRow = CCi; int *tmp_spc = (int*)malloc(sizeof(int) * );
memset(tmp_spc, '\0', sizeof(int) * );
RecordTable->Record[CCi] = tmp_spc;
}
if (RecordTable->Record[CCi][Symbol] == CCj)
{
// printf("Find Repeat.\n");
}
else if (RecordTable->Record[CCi][Symbol] != )
{
printf("Find Conflict.\n");
}
else
{
RecordTable->Record[CCi][Symbol] = CCj;
printf("[CC%d, %c] -> CC%d\n", CCi, Symbol, CCj);
}
} /*
* 计算 LR 核心项集以及 Goto 表
*/
void LRCollection()
{
int TerminalCount = GetTerminalCount(), CCid = ; struct Record *RecordTable = (struct Record *)malloc(sizeof(struct Record));
memset(RecordTable, '\0', sizeof(struct Record));
RecordTable->RecordRow = -;
RecordTable->RecordRowMax = ;
RecordTable->Record = (int **)malloc(sizeof(int) * ); struct Collection *S = (struct Collection*)malloc(sizeof(struct Collection));
memset(S, '\0', sizeof(struct Collection));
S->Expression = strdup("G\376L\377");
S->next = NULL;
Closure(S, TerminalCount); // CC0 <- closure({[S -> · S', eof]}) struct CoreCollection *CC = (struct CoreCollection*)malloc(sizeof(struct CoreCollection)), *CCPtr, *CCTail;
CC->id = ;
CC->marked = false;
CC->S = S; // CC <- {CC0}
CC->next = NULL;
CompleteFeature(CC);
CCTail = CC; for (CCPtr = CC; CCPtr != NULL; CCPtr = CCPtr->next) // while (new sets are still being added to CC)
{
if (CCPtr->marked == false) // for each unmarked set CCi∈CC
{
CCPtr->marked = true; // mark CCi as processed
struct Collection *ExprPtr = NULL;
for (ExprPtr = CCPtr->S; ExprPtr != NULL; ExprPtr = ExprPtr->next) // for each x following a · in an item in CCi
{
char *Placeholder = strchr(ExprPtr->Expression, '\376');
if (Placeholder != NULL && *(Placeholder + ) != '\0' && *(Placeholder + ) != '\0')
{
unsigned char PrevSym = *(Placeholder + );
struct Collection *temp = Goto(CCPtr->S, PrevSym, TerminalCount); // temp <- goto(CCi, x)
int temp_id = AddCoreCollection(CC, &CCTail, temp, CCid);
if (temp_id > CCid)
{
printf("Goto(CC%d, %c):\n", CCPtr->id, PrevSym);
PrintCollections(temp);
CCid++; // 意味着新的 CCID 被分配
}
// record transition form CCi to temp on X
Record(RecordTable, CCPtr->id, PrevSym, temp_id);
printf("\n");
}
}
}
}
} int main(int argc, char **argv)
{
InitizationGrammerRule(); // 初始化文法 LRCollection();
return ;
}