unicode字符和多字节字符的相互转换接口

时间:2023-03-10 03:43:47
unicode字符和多字节字符的相互转换接口

作者:朱金灿

来源:http://blog.****.net/clever101

发现开源代码的可利用资源真多,从sqlite3的源码中抠出了几个字符转换接口,稍微改造下了发现还挺好用的。下面是实现代码:

/*
** Convert a UTF-8 string to microsoft unicode (UTF-16?).
**
** Space to hold the returned string is obtained from malloc.
*/
static WCHAR *utf8ToUnicode(const char *zFilename){ int nChar;
WCHAR *zWideFilename; nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0);
zWideFilename = static_cast<WCHAR *>(malloc(nChar*sizeof(zWideFilename[0])));
if( zWideFilename==0 ){ return 0; }
nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nChar);
if( nChar==0 ){ free(zWideFilename);
zWideFilename = 0; }
return zWideFilename; } /*
** Convert microsoft unicode to UTF-8. Space to hold the returned string is
** obtained from malloc().
*/
static char *unicodeToUtf8(const WCHAR *zWideFilename){ int nByte;
char *zFilename; nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0);
zFilename = static_cast<char*>(malloc( nByte ));
if( zFilename==0 ){ return 0; }
nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte,
0, 0);
if( nByte == 0 )
{
free(zFilename);
zFilename = 0;
}
return zFilename; } /*
** Convert an ansi string to microsoft unicode, based on the
** current codepage settings for file apis.
**
** Space to hold the returned string is obtained
** from malloc.
*/
static WCHAR *mbcsToUnicode(const char *zFilename){ int nByte;
WCHAR *zMbcsFilename;
int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, NULL,0)*sizeof(WCHAR);
zMbcsFilename = static_cast<WCHAR*>(malloc( nByte*sizeof(zMbcsFilename[0])));
if( zMbcsFilename==0 ){ return 0; }
nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename, nByte);
if( nByte==0 )
{
free(zMbcsFilename);
zMbcsFilename = 0;
}
return zMbcsFilename; } /*
** Convert microsoft unicode to multibyte character string, based on the
** user's Ansi codepage.
**
** Space to hold the returned string is obtained from
** malloc().
*/
static char* unicodeToMbcs(const WCHAR* zWideFilename){ int nByte;
char *zFilename;
int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0);
zFilename = static_cast<char*>(malloc(nByte ));
if( zFilename==0 ){ return 0; }
nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename, nByte,
0, 0);
if( nByte == 0 ){ free(zFilename);
zFilename = 0;
}
return zFilename; } /*
** Convert multibyte character string to UTF-8. Space to hold the
** returned string is obtained from malloc().
*/
static char* mbcsToUtf8(const char *zFilename){ char *zFilenameUtf8;
WCHAR *zTmpWide; zTmpWide = mbcsToUnicode(zFilename);
if( zTmpWide==0 ){ return 0; }
zFilenameUtf8 = unicodeToUtf8(zTmpWide);
free(zTmpWide);
return zFilenameUtf8;
} /*
** Convert UTF-8 to multibyte character string. Space to hold the
** returned string is obtained from malloc().
*/
static char* utf8ToMbcs(const char *zFilename){ char *zFilenameMbcs;
WCHAR* zTmpWide; zTmpWide = utf8ToUnicode(zFilename);
if( zTmpWide==0 ){ return 0; }
zFilenameMbcs = unicodeToMbcs(zTmpWide);
free(zTmpWide);
return zFilenameMbcs;
} std::string MbcsToUtf8( const char* pszMbcs )
{
std::string str;
WCHAR *pwchar=0;
CHAR *pchar=0;
int len=0;
int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
len=MultiByteToWideChar(codepage, 0, pszMbcs, -1, NULL,0);
pwchar=new WCHAR[len];
if(pwchar!=0)
{
len = MultiByteToWideChar(codepage, 0, pszMbcs, -1, pwchar, len);
if( len!=0 )
{
len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, 0, 0, 0, 0);
pchar=new CHAR[len];
if(pchar!=0)
{
len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, pchar, len,0, 0);
if(len!=0)
{
str = pchar;
}
delete pchar;
}
delete pwchar;
}
}
return str;
}

要测试这些接口,为此我写了一个测试工程,是读取一个xml文件把里面的字符进行转换的,测试工程的代码下载地址如下:

unicode字符和多字节字符的相互转换接口及测试工程