#include <iostream> #include <string> #include <vector> #include <inttypes.h> #include <time.h> using namespace std; size_t utf8_to_charset(const std::string &input, std::vector<std::string> &output) { std::string ch; for(size_t i = 0, len = 0; i != input.length(); i += len) { unsigned char byte = (unsigned)input[i]; if(byte >= 0xFC) len = 6; else if(byte >= 0xF8) len = 5; else if(byte >= 0xF0) len = 4; else if(byte >= 0xE0) len = 3; else if(byte >= 0xC0) len = 2; else len = 1; ch = input.substr(i,len); output.push_back(ch); } return output.size(); } string utf8_substr(const std::string &input,size_t pos,size_t length) { string returnStr; size_t added_len = 0; size_t cur_pos = 0; string ch; if(length == 0) return returnStr; for(size_t i = 0, len = 0; i != input.length(); i += len) { unsigned char byte = (unsigned)input[i]; if(byte >= 0xFC) len = 6 ; else if(byte >= 0xF8) len = 5; else if(byte >= 0xF0) len = 4; else if(byte >= 0xE0) len = 3; else if(byte >= 0xC0) len = 2; else len = 1; ++cur_pos; if(cur_pos < pos) continue; else { returnStr.append(input.substr(i,len)); added_len++; if(added_len == length) break; } } return returnStr; } int main(int argc,char* argv[]) { string s = "UTF-8字符串截取,123456,这是一个测试字符串,长度需要大于17个字符"; cout << "utf8_substr(s,0,0) ===> " << utf8_substr(s,0,0) << endl; cout << "utf8_substr(s,0,1) ===> " << utf8_substr(s,0,1) << endl; cout << "utf8_substr(s,0,17) ===> " << utf8_substr(s,0,17) << endl; cout << "utf8_substr(s,0,50) ===> " << utf8_substr(s,0,50) << endl; cout << "utf8_substr(s,10,0) ===> " << utf8_substr(s,10,0) << endl; cout << "utf8_substr(s,10,1) ===> " << utf8_substr(s,10,1) << endl; cout << "utf8_substr(s,10,10)===> " << utf8_substr(s,10,10) << endl; cout << "utf8_substr(s,10,30) ===> " << utf8_substr(s,10,30) << endl; return 0; }