unicode字符和多字节字符的相互转换接口

作者：朱金灿

发现开源代码的可利用资源真多，从sqlite3的源码中抠出了几个字符转换接口，稍微改造下了发现还挺好用的。下面是实现代码：

/*

** Convert a UTF-8 string to microsoft unicode (UTF-16?).

**

** Space to hold the returned string is obtained from malloc.

*/

static WCHAR *utf8ToUnicode(const char *zFilename){

	int nChar;

	WCHAR *zWideFilename;

	nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0);

	zWideFilename = static_cast<WCHAR *>(malloc(nChar*sizeof(zWideFilename[0])));

	if( zWideFilename==0 ){

		return 0;

	}

	nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nChar);

	if( nChar==0 ){

		free(zWideFilename);

		zWideFilename = 0;

	}

	return zWideFilename;

}

/*

** Convert microsoft unicode to UTF-8.  Space to hold the returned string is

** obtained from malloc().

*/

static char *unicodeToUtf8(const WCHAR *zWideFilename){

	int nByte;

	char *zFilename;

	nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0);

	zFilename = static_cast<char*>(malloc( nByte ));

	if( zFilename==0 ){

		return 0;

	}

	nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte,

		0, 0);

	if( nByte == 0 )

	{

		free(zFilename);

		zFilename = 0;

	}

	return zFilename;

}

/*

** Convert an ansi string to microsoft unicode, based on the

** current codepage settings for file apis.

**

** Space to hold the returned string is obtained

** from malloc.

*/

static WCHAR *mbcsToUnicode(const char *zFilename){

	int nByte;

	WCHAR *zMbcsFilename;

	int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;

	nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, NULL,0)*sizeof(WCHAR);

	zMbcsFilename = static_cast<WCHAR*>(malloc( nByte*sizeof(zMbcsFilename[0])));

	if( zMbcsFilename==0 ){

		return 0;

	}

	nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename, nByte);

	if( nByte==0 )

	{

		free(zMbcsFilename);

		zMbcsFilename = 0;

	}

	return zMbcsFilename;

}

/*

** Convert microsoft unicode to multibyte character string, based on the

** user's Ansi codepage.

**

** Space to hold the returned string is obtained from

** malloc().

*/

static char* unicodeToMbcs(const WCHAR* zWideFilename){

	int nByte;

	char *zFilename;

	int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;

	nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0);

	zFilename = static_cast<char*>(malloc(nByte ));

	if( zFilename==0 ){

		return 0;

	}

	nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename, nByte,

		0, 0);

	if( nByte == 0 ){

		free(zFilename);

		zFilename = 0;

	}

	return zFilename;

}

/*

** Convert multibyte character string to UTF-8.  Space to hold the

** returned string is obtained from malloc().

*/

static char* mbcsToUtf8(const char *zFilename){

	char *zFilenameUtf8;

	WCHAR *zTmpWide;

	zTmpWide = mbcsToUnicode(zFilename);

	if( zTmpWide==0 ){

		return 0;

	}

	zFilenameUtf8 = unicodeToUtf8(zTmpWide);

	free(zTmpWide);

	return zFilenameUtf8;

}

/*

** Convert UTF-8 to multibyte character string.  Space to hold the

** returned string is obtained from malloc().

*/

static char* utf8ToMbcs(const char *zFilename){

	char *zFilenameMbcs;

	WCHAR* zTmpWide;

	zTmpWide = utf8ToUnicode(zFilename);

	if( zTmpWide==0 ){

		return 0;

	}

	zFilenameMbcs = unicodeToMbcs(zTmpWide);

	free(zTmpWide);

	return zFilenameMbcs;

}

std::string MbcsToUtf8( const char* pszMbcs )

{

	std::string str;

	WCHAR   *pwchar=0;

	CHAR    *pchar=0;

	int len=0;

	int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;

	len=MultiByteToWideChar(codepage, 0, pszMbcs, -1, NULL,0);

	pwchar=new WCHAR[len];

	if(pwchar!=0)

	{

		len = MultiByteToWideChar(codepage, 0, pszMbcs, -1, pwchar, len);

		if( len!=0 )

		{

			len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, 0, 0, 0, 0);

			pchar=new CHAR[len];

			if(pchar!=0)

			{

				len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, pchar, len,0, 0);

				if(len!=0)

				{

					str = pchar;

				}

				delete pchar;

			}

			delete pwchar;

		}

	}

	return str;

}

要测试这些接口，为此我写了一个测试工程，是读取一个xml文件把里面的字符进行转换的，测试工程的代码下载地址如下：

unicode字符和多字节字符的相互转换接口及测试工程

秒客网

unicode字符和多字节字符的相互转换接口

相关文章