基于Poco的UTF8、UTF16、GBK、Hex之间的转换

/******Encoding.h*******/
#include "Poco/UnicodeConverter.h"
#include "Poco/Exception.h"
#include "Poco/DigestEngine.h"

#define MyLib_API Foundation_API

using namespace Poco;

POCO_DECLARE_EXCEPTION(MyLib_API, EncodeException, Exception)

class Encoding
{
public:
enum ByteOrderType
    {
        BIG_ENDIAN_BYTE_ORDER,
        LITTLE_ENDIAN_BYTE_ORDER,
        UNKNOW
    };

static void GBKToUTF16(const std::string& gbkString, std::wstring& utf16String) throw(EncodeException);
static void UTF16ToGBK(const std::wstring& utf16String, std::string& gbkString) throw(EncodeException);
static void UTF8ToUTF16(const std::string& utf8String, std::wstring& utf16String) throw(EncodeException);
static void UTF16ToUTF8(const std::wstring& utf16String, std::string& utf8String) throw(EncodeException);
static void UTF8ToGBK(const std::string& utf8String, std::string& gbkString) throw(EncodeException);
static void GBKToUTF8(const std::string& gbkString, std::string& utf8String) throw(EncodeException);
static void EncodeHexString(const std::string& bytes, std::string& hexString);
static void DecodeHexString(const std::string& hexString, std::string& bytes);
static void EncodeHexString(const std::wstring& bytes, std::string& hexString);
static void DecodeHexString(const std::string& hexString, std::wstring& bytes);
static ByteOrderType GetCurrentByteOrder();

private:
static Poco::UnicodeConverter _unicodeConverter;
static ByteOrderType _currentByteOrder;
};

/********Encoding.cpp********/#include "Encoding.h"
#include "Poco/NumberParser.h"

Poco::UnicodeConverter Encoding::_unicodeConverter;
Encoding::ByteOrderType Encoding::_currentByteOrder;

POCO_IMPLEMENT_EXCEPTION(EncodeException, Poco::Exception, "Encoding error")

void Encoding::GBKToUTF16(const std::string& gbkString, std::wstring& utf16String)
{
//获得需要分配的空间大小
int size = MultiByteToWideChar(936, 0, gbkString.c_str(), -1, NULL, 0);
    std::vector<wchar_t> buff(size);
if(MultiByteToWideChar(936, 0, gbkString.c_str(), -1, buff.data(), size) == 0)
    {
//throw a exception
throw EncodeException("GBK convert to UTF16 failed", GetLastError());
    }
if(!utf16String.empty())
        utf16String.clear();
    utf16String.append(buff.data(), buff.size());
}


void Encoding::UTF16ToGBK(const std::wstring& utf16String, std::string& gbkString)
{
int size = 0;

//获得需要分配的空间大小
    size = WideCharToMultiByte(936, 0, utf16String.c_str(), -1, NULL, 0, NULL, NULL);
    std::vector<char> buff(size);
if(WideCharToMultiByte(936, 0, utf16String.c_str(), -1, buff.data(), size, NULL, NULL) == 0)
throw EncodeException("UTF16 convert to GBK failed", GetLastError());

if(!gbkString.empty())
        gbkString.clear();
    gbkString.append(buff.data(), buff.size());
}

void Encoding::UTF8ToUTF16(const std::string& utf8String, std::wstring& utf16String)
{
    std::string errorMessage;

try
    {
        _unicodeConverter.toUTF16(utf8String, utf16String);
    }
catch(Poco::Exception &e)
    {
        errorMessage.append("UTF8 convert to UTF16 failed, ");
        errorMessage.append(e.message());
    }
if(!errorMessage.empty())
throw EncodeException(errorMessage, GetLastError());
}

void Encoding::UTF16ToUTF8(const std::wstring& utf16String, std::string& utf8String)
{
    std::string errorMessage;

try
    {
        _unicodeConverter.toUTF8(utf16String, utf8String);
    }
catch(Poco::Exception &e)
    {
        errorMessage.append("UTF8 convert to UTF16 failed, ");
        errorMessage.append(e.message());
    }
if(!errorMessage.empty())
throw EncodeException(errorMessage, GetLastError());
}

void Encoding::UTF8ToGBK(const std::string& utf8String, std::string& gbkString)
{
    std::wstring utf16String;
    std::string errorMessage;

try
    {
        _unicodeConverter.toUTF16(utf8String, utf16String);
        UTF16ToGBK(utf16String, gbkString);
    }
catch(EncodeException)
    {
        errorMessage = "UTF8 convert to GBK failed";
    }
catch(Poco::Exception &e)
    {
        errorMessage.append("UTF8 convert to UTF16 failed, ");
        errorMessage.append(e.message());
    }
if(!errorMessage.empty())
throw EncodeException(errorMessage, GetLastError());
}

void Encoding::GBKToUTF8(const std::string& gbkString, std::string& utf8String)
{
    std::wstring utf16String;
    std::string errorMessage;

try
    {
        GBKToUTF16(gbkString, utf16String);
        _unicodeConverter.toUTF8(utf16String, utf8String);
    }
catch(EncodeException)
    {
        errorMessage = "GBK convert to UTF8 failed";
    }
catch(Poco::Exception &e)
    {
        errorMessage.append("UTF8 convert to UTF16 failed, ");
        errorMessage.append(e.message());
    }
if(!errorMessage.empty())
throw EncodeException(errorMessage, GetLastError());
}

void Encoding::EncodeHexString(const std::string& bytes, std::string& hexString)
{
if(!hexString.empty())
        hexString.clear();

    Poco::DigestEngine::Digest digest(bytes.begin(), bytes.end());
    hexString = Poco::DigestEngine::digestToHex(digest);
}

void Encoding::DecodeHexString(const std::string& hexString, std::string& bytes)
{
    unsigned int _value;
if(!bytes.empty())
        bytes.clear();

for(std::string::size_type i = 0, j = 0; i <  hexString.length(); i+=2)
    {
if(NumberParser::tryParseHex(hexString.substr(i, 2), _value))
            bytes.push_back(_value);
    }
}

void Encoding::EncodeHexString(const std::wstring& utf16String, std::string& hexString)
{
if(!hexString.empty())
        hexString.clear();

    Poco::DigestEngine::Digest digest;
for(auto iter = utf16String.begin(); iter != utf16String.end(); ++iter)
    {
const unsigned char* ptr = (const unsigned char*)&*iter;
if(GetCurrentByteOrder() == BIG_ENDIAN_BYTE_ORDER)
        {
            digest.push_back(*ptr);
            digest.push_back(*(ptr + 1));
        }
else if(GetCurrentByteOrder() == LITTLE_ENDIAN_BYTE_ORDER)
        {
            digest.push_back(*(ptr + 1));
            digest.push_back(*ptr);
        }
else
return;
    }

    hexString = Poco::DigestEngine::digestToHex(digest);
}

void Encoding::DecodeHexString(const std::string& hexString, std::wstring& utf16String)
{
    unsigned int _value;
if(!utf16String.empty())
        utf16String.clear();

for(std::string::size_type i = 0, j = 0; i <  hexString.length(); i+=4)
    {
if(NumberParser::tryParseHex(hexString.substr(i, 4), _value))
            utf16String.push_back(_value);
    }
}

Encoding::ByteOrderType Encoding::GetCurrentByteOrder()
{
static bool flag = false;
if(flag)
return _currentByteOrder;

    union
    {
        char16_t s;
char c[2];
    }un;

    un.s = 0x0102;
if(un.c[0] == 1 && un.c[1] == 2)
        _currentByteOrder = BIG_ENDIAN_BYTE_ORDER;
else if(un.c[0] == 2 && un.c[1] == 1)
        _currentByteOrder = LITTLE_ENDIAN_BYTE_ORDER;
else
        _currentByteOrder = UNKNOW;

    flag = true;
return _currentByteOrder;
}

秒客网

基于Poco的UTF8、UTF16、GBK、Hex之间的转换

相关文章