uvm_regex——DPI在UVM中的实现(三)

时间:2022-06-12 12:02:00

  UVM的正则表达是在uvm_regex.cc 和uvm_regex.svh 中实现的,uvm_regex.svh实现UVM的正则表达式的源代码如下:

`ifndef UVM_REGEX_NO_DPI
import "DPI-C" context function int uvm_re_match(string re, string str);
import "DPI-C" context function void uvm_dump_re_cache();
import "DPI-C" context function string uvm_glob_to_re(string glob);

`else

// The Verilog only version does not match regular expressions,
// it only does glob style matching.
function int uvm_re_match(string re, string str);
  int e, es, s, ss;
  string tmp;
  e  = 0; s  = 0;
  es = 0; ss = 0;

  if(re.len() == 0)
    return 0;

  // The ^ used to be used to remove the implicit wildcard, but now we don't
  // use implicit wildcard so this character is just stripped.
  if(re[0] == "^")
    re = re.substr(1, re.len()-1);

  //This loop is only needed when the first character of the re may not
  //be a *. 
  while (s != str.len() && re.getc(e) != "*") begin
    if ((re.getc(e) != str.getc(s)) && (re.getc(e) != "?"))
      return 1;
    e++; s++;
  end

  while (s != str.len()) begin
    if (re.getc(e) == "*") begin
      e++;
      if (e == re.len()) begin
        return 0;
      end
      es = e;
      ss = s+1;
    end
    else if (re.getc(e) == str.getc(s) || re.getc(e) == "?") begin
      e++;
      s++;
    end
    else begin
      e = es;
      s = ss++;
    end
  end
  while (e < re.len() && re.getc(e) == "*")
    e++;
  if(e == re.len()) begin
    return 0;
  end
  else begin
    return 1;
  end
endfunction

function void uvm_dump_re_cache();
endfunction

function string uvm_glob_to_re(string glob);
  return glob;
endfunction

`endif

然后,再看看uvm_regex.cc的源代码:

uvm_regex——DPI在UVM中的实现(三)uvm_regex——DPI在UVM中的实现(三)
#include "uvm_dpi.h"
#include <sys/types.h>


const char uvm_re_bracket_char = '/';
#define UVM_REGEX_MAX_LENGTH 2048
static char uvm_re[UVM_REGEX_MAX_LENGTH+4];


//--------------------------------------------------------------------
// uvm_re_match
//
// Match a string to a regular expression.  The regex is first lookup
// up in the regex cache to see if it has already been compiled.  If
// so, the compile version is retrieved from the cache.  Otherwise, it
// is compiled and cached for future use.  After compilation the
// matching is done using regexec().
//--------------------------------------------------------------------
int uvm_re_match(const char * re, const char *str)
{
  regex_t *rexp;
  int err;

  // safety check.  Args should never be ~null~ since this is called
  // from DPI.  But we'll check anyway.
  if(re == NULL)
    return 1;
  if(str == NULL)
    return 1;

  int len = strlen(re);
  char * rex = &uvm_re[0];

  if (len > UVM_REGEX_MAX_LENGTH) {
      const char* err_str = "uvm_re_match : regular expression greater than max %0d: |%s|";
      char buffer[strlen(err_str) + int_str_max(10) + strlen(re)];
      sprintf(buffer, err_str, UVM_REGEX_MAX_LENGTH, re);
      m_uvm_report_dpi(M_UVM_ERROR,
                       (char*) "UVM/DPI/REGEX_MAX",
                       &buffer[0],
                       M_UVM_NONE,
                       (char*)__FILE__,
                       __LINE__);
    return 1;
  }

  // we copy the regexp because we need to remove any brackets around it
  strncpy(&uvm_re[0],re,UVM_REGEX_MAX_LENGTH);
  if (len>1 && (re[0] == uvm_re_bracket_char) && re[len-1] == uvm_re_bracket_char) {
    uvm_re[len-1] = '\0';
    rex++;
  }

  rexp = (regex_t*)malloc(sizeof(regex_t));

  if (rexp == NULL) {
      m_uvm_report_dpi(M_UVM_ERROR,
                       (char*) "UVM/DPI/REGEX_ALLOC",
                       (char*) "uvm_re_match: internal memory allocation error",
                       M_UVM_NONE,
                       (char*)__FILE__,
                       __LINE__);
    return 1;
  }

  err = regcomp(rexp, rex, REG_EXTENDED);

  if (err != 0) {
      regerror(err,rexp,uvm_re,UVM_REGEX_MAX_LENGTH-1);
      const char * err_str = "uvm_re_match : invalid glob or regular expression: |%s||%s|";
      char buffer[strlen(err_str) + strlen(re) + strlen(uvm_re)];
      sprintf(buffer, err_str, re, uvm_re);
      m_uvm_report_dpi(M_UVM_ERROR,
                       (char*) "UVM/DPI/REGEX_INV",
                       &buffer[0],
                       M_UVM_NONE,
                       (char*)__FILE__,
                       __LINE__);
    regfree(rexp);
    free(rexp);
    return err;
  }

  err = regexec(rexp, str, 0, NULL, 0);

  //vpi_printf((PLI_BYTE8*)  "UVM_INFO: uvm_re_match: re=%s str=%s ERR=%0d\n",rex,str,err);
  regfree(rexp);
  free(rexp);

  return err;
}


//--------------------------------------------------------------------
// uvm_glob_to_re
//
// Convert a glob expression to a normal regular expression.
//--------------------------------------------------------------------

const char * uvm_glob_to_re(const char *glob)
{
  const char *p;
  int len;

  // safety check.  Glob should never be ~null~ since this is called
  // from DPI.  But we'll check anyway.
  if(glob == NULL)
    return NULL;

  len = strlen(glob);

  if (len > 2040) {
      const char * err_str = "uvm_re_match : glob expression greater than max 2040: |%s|";
      char buffer[strlen(err_str) + strlen(glob)];
      sprintf(buffer, err_str, glob);
      m_uvm_report_dpi(M_UVM_ERROR,
                       (char*) "UVM/DPI/REGEX_MAX",
                       &buffer[0],
                       M_UVM_NONE,
                       (char*)__FILE__,
                       __LINE__);
    return glob;
  }

  // If either of the following cases appear then return an empty string
  //
  //  1.  The glob string is empty (it has zero characters)
  //  2.  The glob string has a single character that is the
  //      uvm_re_bracket_char  (i.e. "/")
  if(len == 0 || (len == 1 && *glob == uvm_re_bracket_char))
  {
    uvm_re[0] = '\0';
    return &uvm_re[0];  // return an empty string
  }

  // If bracketed with the /glob/, then it's already a regex
  if(glob[0] == uvm_re_bracket_char && glob[len-1] == uvm_re_bracket_char)
  {
    strcpy(uvm_re,glob);
    return &uvm_re[0];
  }
  else
  {
    // Convert the glob to a true regular expression (Posix syntax)
    len = 0;

    uvm_re[len++] = uvm_re_bracket_char;

    // ^ goes at the beginning...
    if (*glob != '^')
      uvm_re[len++] = '^';

    for(p = glob; *p; p++)
    {
      // Replace the glob metacharacters with corresponding regular
      // expression metacharacters.
      switch(*p)
      {
      case '*':
        uvm_re[len++] = '.';
        uvm_re[len++] = '*';
        break;

      case '+':
        uvm_re[len++] = '.';
        uvm_re[len++] = '+';
        break;
        
      case '.':
        uvm_re[len++] = '\\';
        uvm_re[len++] = '.';
        break;
        
      case '?':
        uvm_re[len++] = '.';
        break;

      case '[':
        uvm_re[len++] = '\\';
        uvm_re[len++] = '[';
        break;

      case ']':
        uvm_re[len++] = '\\';
        uvm_re[len++] = ']';
        break;

      case '(':
        uvm_re[len++] = '\\';
        uvm_re[len++] = '(';
        break;

      case ')':
        uvm_re[len++] = '\\';
        uvm_re[len++] = ')';
        break;
        
      default:
        uvm_re[len++] = *p;
        break;
      }
    }
  }

  // Let's check to see if the regular expression is bounded by ^ at
  // the beginning and $ at the end.  If not, add those characters in
  // the appropriate position.

  if (uvm_re[len-1] != '$')
    uvm_re[len++] = '$';

  uvm_re[len++] = uvm_re_bracket_char;

  uvm_re[len++] = '\0';

  return &uvm_re[0];
}


//--------------------------------------------------------------------
// uvm_dump_re_cache
//
// Dumps the set of regular expressions stored in the cache
//--------------------------------------------------------------------

void uvm_dump_re_cache()
{
    m_uvm_report_dpi(M_UVM_INFO,
                     (char*) "UVM/DPI/REGEX_MAX",
                     (char*)  "uvm_dump_re_cache: cache not implemented",
                     M_UVM_LOW,
                     (char*)__FILE__,
                     __LINE__);
}
View Code