基于Poco的UTF8、UTF16、GBK、Hex之间的转换

时间:2022-11-05 14:08:13
/******Encoding.h*******/
#include
"Poco/UnicodeConverter.h"
#include
"Poco/Exception.h"
#include
"Poco/DigestEngine.h"

#define MyLib_API Foundation_API

using namespace Poco;

POCO_DECLARE_EXCEPTION(MyLib_API, EncodeException, Exception)

class Encoding
{
public:
enum ByteOrderType
{
BIG_ENDIAN_BYTE_ORDER,
LITTLE_ENDIAN_BYTE_ORDER,
UNKNOW
};

static void GBKToUTF16(const std::string& gbkString, std::wstring& utf16String) throw(EncodeException);
static void UTF16ToGBK(const std::wstring& utf16String, std::string& gbkString) throw(EncodeException);
static void UTF8ToUTF16(const std::string& utf8String, std::wstring& utf16String) throw(EncodeException);
static void UTF16ToUTF8(const std::wstring& utf16String, std::string& utf8String) throw(EncodeException);
static void UTF8ToGBK(const std::string& utf8String, std::string& gbkString) throw(EncodeException);
static void GBKToUTF8(const std::string& gbkString, std::string& utf8String) throw(EncodeException);
static void EncodeHexString(const std::string& bytes, std::string& hexString);
static void DecodeHexString(const std::string& hexString, std::string& bytes);
static void EncodeHexString(const std::wstring& bytes, std::string& hexString);
static void DecodeHexString(const std::string& hexString, std::wstring& bytes);
static ByteOrderType GetCurrentByteOrder();

private:
static Poco::UnicodeConverter _unicodeConverter;
static ByteOrderType _currentByteOrder;
};
/********Encoding.cpp********/#include "Encoding.h"
#include
"Poco/NumberParser.h"

Poco::UnicodeConverter Encoding::_unicodeConverter;
Encoding::ByteOrderType Encoding::_currentByteOrder;

POCO_IMPLEMENT_EXCEPTION(EncodeException, Poco::Exception,
"Encoding error")

void Encoding::GBKToUTF16(const std::string& gbkString, std::wstring& utf16String)
{
//获得需要分配的空间大小
int size = MultiByteToWideChar(936, 0, gbkString.c_str(), -1, NULL, 0);
std::vector
<wchar_t> buff(size);
if(MultiByteToWideChar(936, 0, gbkString.c_str(), -1, buff.data(), size) == 0)
{
//throw a exception
throw EncodeException("GBK convert to UTF16 failed", GetLastError());
}
if(!utf16String.empty())
utf16String.clear();
utf16String.append(buff.data(), buff.size());
}


void Encoding::UTF16ToGBK(const std::wstring& utf16String, std::string& gbkString)
{
int size = 0;

//获得需要分配的空间大小
size = WideCharToMultiByte(936, 0, utf16String.c_str(), -1, NULL, 0, NULL, NULL);
std::vector
<char> buff(size);
if(WideCharToMultiByte(936, 0, utf16String.c_str(), -1, buff.data(), size, NULL, NULL) == 0)
throw EncodeException("UTF16 convert to GBK failed", GetLastError());

if(!gbkString.empty())
gbkString.clear();
gbkString.append(buff.data(), buff.size());
}

void Encoding::UTF8ToUTF16(const std::string& utf8String, std::wstring& utf16String)
{
std::
string errorMessage;

try
{
_unicodeConverter.toUTF16(utf8String, utf16String);
}
catch(Poco::Exception &e)
{
errorMessage.append(
"UTF8 convert to UTF16 failed, ");
errorMessage.append(e.message());
}
if(!errorMessage.empty())
throw EncodeException(errorMessage, GetLastError());
}

void Encoding::UTF16ToUTF8(const std::wstring& utf16String, std::string& utf8String)
{
std::
string errorMessage;

try
{
_unicodeConverter.toUTF8(utf16String, utf8String);
}
catch(Poco::Exception &e)
{
errorMessage.append(
"UTF8 convert to UTF16 failed, ");
errorMessage.append(e.message());
}
if(!errorMessage.empty())
throw EncodeException(errorMessage, GetLastError());
}

void Encoding::UTF8ToGBK(const std::string& utf8String, std::string& gbkString)
{
std::wstring utf16String;
std::
string errorMessage;

try
{
_unicodeConverter.toUTF16(utf8String, utf16String);
UTF16ToGBK(utf16String, gbkString);
}
catch(EncodeException)
{
errorMessage
= "UTF8 convert to GBK failed";
}
catch(Poco::Exception &e)
{
errorMessage.append(
"UTF8 convert to UTF16 failed, ");
errorMessage.append(e.message());
}
if(!errorMessage.empty())
throw EncodeException(errorMessage, GetLastError());
}

void Encoding::GBKToUTF8(const std::string& gbkString, std::string& utf8String)
{
std::wstring utf16String;
std::
string errorMessage;

try
{
GBKToUTF16(gbkString, utf16String);
_unicodeConverter.toUTF8(utf16String, utf8String);
}
catch(EncodeException)
{
errorMessage
= "GBK convert to UTF8 failed";
}
catch(Poco::Exception &e)
{
errorMessage.append(
"UTF8 convert to UTF16 failed, ");
errorMessage.append(e.message());
}
if(!errorMessage.empty())
throw EncodeException(errorMessage, GetLastError());
}

void Encoding::EncodeHexString(const std::string& bytes, std::string& hexString)
{
if(!hexString.empty())
hexString.clear();

Poco::DigestEngine::Digest digest(bytes.begin(), bytes.end());
hexString
= Poco::DigestEngine::digestToHex(digest);
}

void Encoding::DecodeHexString(const std::string& hexString, std::string& bytes)
{
unsigned
int _value;
if(!bytes.empty())
bytes.clear();

for(std::string::size_type i = 0, j = 0; i < hexString.length(); i+=2)
{
if(NumberParser::tryParseHex(hexString.substr(i, 2), _value))
bytes.push_back(_value);
}
}

void Encoding::EncodeHexString(const std::wstring& utf16String, std::string& hexString)
{
if(!hexString.empty())
hexString.clear();

Poco::DigestEngine::Digest digest;
for(auto iter = utf16String.begin(); iter != utf16String.end(); ++iter)
{
const unsigned char* ptr = (const unsigned char*)&*iter;
if(GetCurrentByteOrder() == BIG_ENDIAN_BYTE_ORDER)
{
digest.push_back(
*ptr);
digest.push_back(
*(ptr + 1));
}
else if(GetCurrentByteOrder() == LITTLE_ENDIAN_BYTE_ORDER)
{
digest.push_back(
*(ptr + 1));
digest.push_back(
*ptr);
}
else
return;
}

hexString
= Poco::DigestEngine::digestToHex(digest);
}

void Encoding::DecodeHexString(const std::string& hexString, std::wstring& utf16String)
{
unsigned
int _value;
if(!utf16String.empty())
utf16String.clear();

for(std::string::size_type i = 0, j = 0; i < hexString.length(); i+=4)
{
if(NumberParser::tryParseHex(hexString.substr(i, 4), _value))
utf16String.push_back(_value);
}
}

Encoding::ByteOrderType Encoding::GetCurrentByteOrder()
{
static bool flag = false;
if(flag)
return _currentByteOrder;

union
{
char16_t s;
char c[2];
}un;

un.s
= 0x0102;
if(un.c[0] == 1 && un.c[1] == 2)
_currentByteOrder
= BIG_ENDIAN_BYTE_ORDER;
else if(un.c[0] == 2 && un.c[1] == 1)
_currentByteOrder
= LITTLE_ENDIAN_BYTE_ORDER;
else
_currentByteOrder
= UNKNOW;

flag
= true;
return _currentByteOrder;
}