Windows下ANSI、Unicode、UTF8字符编码转换

时间:2021-10-30 22:45:48

主意:输入字符串必须是以'\0'结尾,如果输入字符串没有以'\0'结尾,请手动设置,否则转换会有错误。

unsigned int EncodeUtil::AnsiToUcs2( char* pAnsi, wchar_t*& pUcs2 )
{
  //预转换,得到所需空间的大小
  int wcsLen = ::MultiByteToWideChar(CP_ACP, NULL, pAnsi, strlen(pAnsi), NULL, 0);
  //分配空间要给'\0'留个空间,MultiByteToWideChar不会给'\0'空间
  pUcs2 = new wchar_t[wcsLen + 1];
  //转换
  ::MultiByteToWideChar(CP_ACP, NULL, pAnsi, strlen(pAnsi), pUcs2, wcsLen);
  //最后加上'\0'
  pUcs2[wcsLen] = '\0';
  return wcsLen;
}

unsigned int EncodeUtil::Ucs2ToAnsi( wchar_t* pUcs2, char*& pAnsi )
{
  //预转换,得到所需空间的大小,这次用的函数和上面名字相反
  int ansiLen = ::WideCharToMultiByte(CP_ACP, NULL, pUcs2, wcslen(pUcs2), NULL, 0, NULL, NULL);
  //同上,分配空间要给'\0'留个空间
  pAnsi = new char[ansiLen + 1];
  //转换
  //unicode版对应的strlen是wcslen
  ::WideCharToMultiByte(CP_ACP, NULL, pUcs2, wcslen(pUcs2), pAnsi, ansiLen, NULL, NULL);
  //最后加上'\0'
  pAnsi[ansiLen] = '\0';
  return ansiLen;
}

unsigned int EncodeUtil::Ucs2ToUtf8( wchar_t* pUcs2, char*& pUtf8 )
{
  //预转换,得到所需空间的大小,这次用的函数和上面名字相反
  int u8Len = ::WideCharToMultiByte(CP_UTF8, NULL, pUcs2, wcslen(pUcs2), NULL, 0, NULL, NULL);
  //同上,分配空间要给'\0'留个空间
  //UTF8虽然是Unicode的压缩形式,但也是多字节字符串,所以可以以char的形式保存
  pUtf8 = new char[u8Len + 1];
  //转换
  //unicode版对应的strlen是wcslen
  ::WideCharToMultiByte(CP_UTF8, NULL, pUcs2, wcslen(pUcs2), pUtf8, u8Len, NULL, NULL);
  //最后加上'\0'
  pUtf8[u8Len] = '\0';
  return u8Len;
}

unsigned int EncodeUtil::Utf8ToUcs2( char* pUtf8, wchar_t*& pUcs2 )
{
  //预转换,得到所需空间的大小
  int wcsLen = ::MultiByteToWideChar(CP_UTF8, NULL, pUtf8, strlen(pUtf8), NULL, 0);
  //分配空间要给'\0'留个空间,MultiByteToWideChar不会给'\0'空间
  pUcs2 = new wchar_t[wcsLen + 1];
  //转换
  ::MultiByteToWideChar(CP_UTF8, NULL, pUtf8, strlen(pUtf8), pUcs2, wcsLen);
  //最后加上'\0'
  pUcs2[wcsLen] = '\0';
  return wcsLen;
}

unsigned int EncodeUtil::AnsiToUtf8( char* pAnsi, char*& pUtf8 )
{
  wchar_t* pUcs2 = NULL;
  AnsiToUcs2(pAnsi, pUcs2);
  unsigned int len = Ucs2ToUtf8(pUcs2, pUtf8);
  delete[] pUcs2;
  return len;
}