#include <stdio.h> #include <stdint.h> #ifndef FALSE typedef int8_t BOOL; #define TRUE 1 #define FALSE 0 #endif typedef struct _UTF8_HEAD { uint8_t countOf1; uint8_t head; }UTF8_HEAD; const static UTF8_HEAD utf8Head[] = { {0, 0x0}, {1, 0x80}, {2, 0xC0}, {3, 0xE0}, {4, 0xF0}, {5, 0xF8}, {6, 0xFC}, {7, 0xFE}, }; /** * utf-8 1 bytes 0xxxxxxx 2 bytes 110xxxxx 10xxxxxx 3 bytes 1110xxxx 10xxxxxx 10xxxxxx 4 bytes 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ BOOL isutf8(const char* data, int32_t len) { if(NULL == data) { return TRUE; } const char* p = data; const char* end = data+len; while (p < end) { if (0 == (*p & 0x80)) { ++p; } else { uint8_t i; for (i = 2; i < sizeof(utf8Head) / sizeof(UTF8_HEAD) - 1; ++i) { if (utf8Head[i].head == (*p & utf8Head[i+1].head)) { uint8_t j; for(j = 1; j < i; ++j) { if (p + j > end) { return FALSE; } else if (0x80 != (*(p+j) & 0xC0)) { return FALSE; } } break; } else if (i >= 6) { //not matched even over 6 bytes return FALSE; } } if (p + i > end) { return FALSE; } p += i; } } return TRUE; } int main() { printf("%d\n", isutf8("中文", 6)); }