[置顶] NASM源代码分析--预处理过程

时间:2022-07-30 04:50:54

 

预处理过程   switch ( operating_mode ) {     case op_depend:         ...     break;       case op_preprocess:     {         char *line;                 // getline()返回值         char *file_name = NULL;     // 用于函数src_get()         long prior_linnum=0;       // prior_linnum->以前已经处理的行数;         int   lineinc=0;            // 本次处理的行数           if (*outname)               // outname输出文件,有参数 -o 指定         {             ofile = fopen(outname, "w");        // 打开输出文件,权限为“写”             if (!ofile)                        // 不能打开输出文件,报错并退出                 report_error (ERR_FATAL | ERR_NOFILE,"unable to open output file `%s'", outname);           }         else             ofile = NULL;                // 当前汇编代码的位置,包括代码所在段和段偏移,known表示是否清楚当前代码的段和偏移         location.known = FALSE;       // 为预处理初始化各个变量,实际运行的函数是pp_reset()(具体讨论)         preproc->reset (inname, 2, report_error, evaluate, &nasmlist);                while ( (line = preproc->getline()) )       // 从源程序获取一行代码并处理(具体讨论)         { // linnum->当前已经处理的行数; prior_linnum->以前已经处理的行数; lineinc->本次处理的行数             long linnum = prior_linnum += lineinc;              int altline = src_get(&linnum, &file_name);             if (altline)             {                 if (altline==1 && lineinc==1)                         nasm_fputs("", ofile);                 else                 {                     lineinc = (altline != -1 || lineinc!=1);                     fprintf(ofile ? ofile : stdout, "%%line %ld+%d %s/n",                             linnum, lineinc, file_name);                 }                 prior_linnum = linnum;             }             nasm_fputs(line, ofile);                // 将预处理后的该行代码输出到输出文件中。             nasm_free (line);         }         nasm_free(file_name);         preproc->cleanup();                         // 预处理完成,清理内存         if (ofile)             fclose(ofile);         if (ofile && terminate_after_phase)             remove(outname);     }         break;   case op_normal:     ...     break; }       static void pp_reset (char *file, int apass, efunc errfunc, evalfunc eval,               ListGen *listgen) {     int h;     error = errfunc;                        // 设置报错函数     cstk = NULL;                            // 初始上下文堆栈     istk = nasm_malloc(sizeof(Include));    // 初始化文件包含堆栈     istk->next = NULL;     istk->conds = NULL;     istk->expansion = NULL;     istk->mstk = NULL;     istk->fp = fopen(file, "r");            // 初始化打开的是第一个输入文件(源程序文件)     istk->fname = NULL;     src_set_fname(nasm_strdup(file));     src_set_linnum(0);     istk->lineinc = 1;       if (!istk->fp)         error (ERR_FATAL|ERR_NOFILE, "unable to open input file `%s'", file); defining = NULL;                // defining->当前正在进行的多行宏或正在读取的%rep块   for (h=0; h<NHASH; h++)         // 初始化单行和多行宏空间(用哈希算法保存)     {         mmacros[h] = NULL;         smacros[h] = NULL;     }     unique = 0;     stdmacpos = stdmac;             // 赋值系统宏定义     any_extrastdmac = (extrastdmac != NULL);     list = listgen;                 // 设置列表     evaluate = eval;                    pass = apass;                   // 只产生Makefile依赖关系时(参数-M),pass=0; 预处理时(参数-e),pass=2; }       // pp_getline:读取代码并处理 // 读取的代码分两类:1.多行宏,每执行一次,读取整个mmacro(分2种,多行宏和%rep块) //                   2.普通代码,每执行一次,读取一行代码 static char *pp_getline (void) {     char *line;     Token *tline;     int ret;       while (1)     {         /*          * Fetch a tokenised line, either from the macro-expansion          * buffer or from the input file.          */         tline = NULL;         while (istk->expansion && istk->expansion->finishes)                        {   // 读取mmacro(在NASM中,分为多行宏和%rep...%endrep块)             Line *l = istk->expansion;             if (!l->finishes->name && l->finishes->in_progress > 1)             {   // 读取%rep...%endrep块(%rep块的名字,即l->finishes->name为NULL)                 Line *ll;                   l->finishes->in_progress--;     // in_progress%rep块重复遍数                                 // 注解4 for (l = l->finishes->expansion; l; l = l->next)                 {                     Token *t, *tt, **tail;                       ll = nasm_malloc(sizeof(Line));                     ll->next = istk->expansion;                     ll->finishes = NULL;                     ll->first = NULL;                     tail = &ll->first;                       for (t = l->first; t; t = t->next)                     {                         if (t->text)                         {                             tt = *tail = nasm_malloc(sizeof(Token));                             tt->next = NULL;                             tail = &tt->next;                             tt->type = t->type;                             tt->text = nasm_strdup(t->text);                             tt->mac = NULL;                         }                     }                       istk->expansion = ll;                 }             }             else             {   在多行宏中的%rep...%endrep块                 if (defining)                 {                     if (defining->name)                         error (ERR_PANIC,"defining with name in expansion");                     else if (istk->mstk->name)                         error (ERR_FATAL, "`%%rep' without `%%endrep' within"                            " expansion of macro `%s'", istk->mstk->name);                 }                   /*                  * FIXME: investigate the relationship at this point between                  * istk->mstk and l->finishes                  */                 {                     MMacro *m = istk->mstk;                     istk->mstk = m->next_active;                     if (m->name)                     {                         /*                          * This was a real macro call, not a %rep, and                          * therefore the parameter information needs to                          * be freed.                          */                         nasm_free(m->params);                         free_tlist(m->iline);                         nasm_free(m->paramlen);                         l->finishes->in_progress = FALSE;                     }                 else                     free_mmacro(m);                 }                 istk->expansion = l->next;                 nasm_free (l);                 list->downlevel (LIST_MACRO);             }         }           while (1)                       /* until we get a line we can use */         {             if (istk->expansion)        /* from a macro expansion */             { // 处理多行宏中的代码,将之从Token转化为源代码                 char *p;                 Line *l = istk->expansion;                 tline = l->first;                 istk->expansion = l->next;                 nasm_free (l);                 p = detoken(tline);                 list->line (LIST_MACRO, p);                 nasm_free(p);                 break;             }             line = read_line();             // 获取一行并处理,重点!!!             if (line)             {                   line = prepreproc(line);        // 格式翻译,将表示行信息的GUN CPP格式(# line “filename”)翻译成NASM预处理格式(%line lineno “filename”),若该行不是行信息,则什么都不做                 tline = tokenise(line);         // token                 nasm_free (line);                 break;             }             {                 // 如果程序能运行到这里,说明上面的函数read_line()不能从当前包含文件中读取源码,即已经从该文件中读完代码,                 // 则关闭当前包含文件并指向下一个包含文件直到处理完所有源代码文件                 Include *i = istk;                 fclose(i->fp);                      if (i->conds)                         error(ERR_FATAL, "expected `%%endif' before end of file");                 istk = i->next;                         // 指向下一个包含文件                 list->downlevel (LIST_INCLUDE);         // 从list方面取出一个包含文件,与此对应,添加一个包含文件则为list->uplevel(LIST_INCLUDE),可参见函数do_directive()的case:PP_INCLUDE项                 src_set_linnum(i->lineno);              // 见注解1                 nasm_free ( src_set_fname(i->fname) );                 nasm_free (i);                 if (!istk)                     return NULL;             }         }           if (!defining && !(istk->conds && !emitting(istk->conds->state)))       // 如果不是多行宏定义内代码,且             tline = expand_mmac_params(tline);           ret = do_directive(tline);      // 处理操作符,返回值:bit0位设置了1,表示该行的操作符是已定义,若不合法,则bit0置为0         if (ret & 1)         {                               // 符合操作,下一个循环             continue;         }         else if (defining)              {   // 如果不是预处理符,则判断该行代码是否在多行宏定义内             // 注解2             Line *l = nasm_malloc(sizeof(Line));             l->next = defining->expansion;             l->first = tline;             l->finishes = FALSE;             defining->expansion = l;             continue;         }         else if (istk->conds && !emitting(istk->conds->state))         {             // 注解3             free_tlist(tline);             continue;         }         else if (istk->mstk && !istk->mstk->in_progress)         {   // 多行宏/%rep块             free_tlist(tline);             continue;         }         else         {             tline = expand_smacro(tline);             ret = expand_mmacro(tline);             if (!ret)             {                 /*                  * De-tokenise the line again, and emit it.                  */                 line = detoken(tline);                 free_tlist (tline);                 break;             }             else             {                 continue;          /* expand_mmacro calls free_tlist */             }         }     }     return line; }     注解: 1.src_set_linnum(i->lineno); 设置包含文件读取行信息, 例如代码文件file0.c:         ...     语句1     %include “file1.c”     语句2     ...       当运行完file0.c的语句1,读取到%include时,nasm将文件file1.c加入到包含文件堆栈中,并将istk指向这个包含文件,在下一个循环读取代码时,将从文件file1.c第一行开始处理, 处理完file1.c后从堆栈中删除包含文件file1.c,即将istk指向包含文件file0.c,代码为             istk = i->next;                                     list->downlevel (LIST_INCLUDE);         并运行             src_set_linnum(i->lineno);              从file0.c的语句2开始继续处理。 (istk是指向包含文件堆栈栈顶的指针)     2.如果当前读取的代码不是预处理符号时,则有多种情况,其中一种情况就是该代码在多行宏定义中。     多行宏的处理过程: a)     为多行宏建立一个保存信息的空间defining,这是在NASM读到%macro或%imacro时做的,代码见函数do_directive(Token *tline)的case PP_MACRO b)     将宏定义中的每一行代码都保存到defining->expansion链表中,注解2的代码就是这个功能 c)     NASM读到代码%endmarco时,结束当前宏定义,并将defining信息保存到mmacros中,同时清defining, 代码见函数do_directive(Token *tline)的case PP_ENDMACRO     3.类似于处 %if FALSE     ......     ...... %endif  预处理中不需要使用的代码,可以不用处理,用continue直接跳过。可以参考代码preproc.c中函数do_directive()的case PP_IF或case PP_ELIF     4.在NASM中,%rep的语法是     %rep    重复遍数(数字)         语句     %endrep  例如:     %rep 4         1)         inc eax     %endrep  则相当于     inc eax         (2)     inc eax     inc eax     inc eax  在该部分后面的for循环就是将代码(1)转换成代码(2)并将语句加入到istk->expansion链表中。