查看文章
 
UTF-8 与 UTF-16 相互转换及 \uhhhh 转换为 UTF-16 的 C++ 函数(下) 之使用篇
2008-07-17 17:27
文件需保存为UTF-8格式

int
main()
{
char str[256] = "汉字\n枯藤昏鸦\n罪恶之都\n资源不够不可以建造";
char ustr[256] =
"\\u6c49\\u5b57"
"\\u000a"
"\\u67af\\u85e4\\u660f\\u9e26"
"\\u000a"
"\\u7f6a\\u6076\\u4e4b\\u90fd"
"\\u000a"
"\\u8d44\\u6e90\\u4e0d\\u591f\\u4e0d\\u53ef\\u4ee5\\u5efa\\u9020";
wchar_t wstr[64];

size_t len;
size_t i;

printf("Start UTF convert test...\n");
printf("Before conversion...\n");
printf("UTF-8 HEX: \n");
for(i = 0; str[i]; i++)
{
printf("0x%02X ", str[i] & 0xFF);
}
printf("0x%02X \n", str[i + 1] & 0xFF);
printf("UTF-8 string: \n%s\n\n", str);

printf("Convert from UTF-8 to UTF-16...\n");
len = convert_UTF8_to_UTF16(str, wstr);
printf("len = %d\n", len);
printf("UTF-16 HEX: \n");
for(i = 0; i <= len; i++)
{
printf("0x%04X ", wstr[i] & 0xFFFF);
}
printf("\n\n");

printf("Convert from UTF-16 to UTF-8...\n");
len = convert_UTF16_to_UTF8(wstr, str);
printf("len = %d\n", len);
printf("UTF-8 HEX: \n");
for(i = 0; i <= len; i++)
{
printf("0x%02X ", str[i] & 0xFF);
}
printf("\n");
printf("UTF-8 string: \n%s\n\n", str);

printf("Start \\uhhhh sequence convert test...\n");
printf("Before conversion...\n");
printf("u sequence: \n%s\n\n", ustr);

printf("Convert from \\uhhhh sequence to UTF-16...\n");
len = convert_useq_to_UTF16(ustr, wstr);
printf("lenth = %d\n", len);
printf("UTF-16 HEX: \n");
for(i = 0; i < len; i++)
{
printf("0x%04X ", wstr[i] & 0xFFFF);
}
printf("\n\n");

printf("Convert from UTF-16 to UTF-8...\n");
len = convert_UTF16_to_UTF8(wstr, ustr);
printf("lenth = %d\n", len);
printf("UTF-8 string: \n%s\n", ustr);

return 0;
}


测试结果如下:
Start UTF convert test...
Before conversion...
UTF-8 HEX:
0xE6 0xB1 0x89 0xE5 0xAD 0x97 0x0A 0xE6 0x9E 0xAF 0xE8 0x97 0xA4 0xE6 0x98 0x8F
0xE9 0xB8 0xA6 0x0A 0xE7 0xBD 0xAA 0xE6 0x81 0xB6 0xE4 0xB9 0x8B 0xE9 0x83 0xBD
0x0A 0xE8 0xB5 0x84 0xE6 0xBA 0x90 0xE4 0xB8 0x8D 0xE5 0xA4 0x9F 0xE4 0xB8 0x8D
0xE5 0x8F 0xAF 0xE4 0xBB 0xA5 0xE5 0xBB 0xBA 0xE9 0x80 0xA0 0x00
UTF-8 string:
汉字
枯藤昏鸦
罪恶之都
资源不够不可以建造

Convert from UTF-8 to UTF-16...
len = 22
UTF-16 HEX:
0x6C49 0x5B57 0x000A 0x67AF 0x85E4 0x660F 0x9E26 0x000A 0x7F6A 0x6076
0x4E4B 0x90FD 0x000A 0x8D44 0x6E90 0x4E0D 0x591F 0x4E0D 0x53EF 0x4EE5
0x5EFA 0x9020 0x0000

Convert from UTF-16 to UTF-8...
len = 60
UTF-8 HEX:
0xE6 0xB1 0x89 0xE5 0xAD 0x97 0x0A 0xE6 0x9E 0xAF 0xE8 0x97 0xA4 0xE6 0x98 0x8F
0xE9 0xB8 0xA6 0x0A 0xE7 0xBD 0xAA 0xE6 0x81 0xB6 0xE4 0xB9 0x8B 0xE9 0x83 0xBD
0x0A 0xE8 0xB5 0x84 0xE6 0xBA 0x90 0xE4 0xB8 0x8D 0xE5 0xA4 0x9F 0xE4 0xB8 0x8D
0xE5 0x8F 0xAF 0xE4 0xBB 0xA5 0xE5 0xBB 0xBA 0xE9 0x80 0xA0 0x00
UTF-8 string:
汉字
枯藤昏鸦
罪恶之都
资源不够不可以建造

Start \uhhhh sequence convert test...
Before conversion...
u sequence:
\u6c49\u5b57\u000a\u67af\u85e4\u660f\u9e26\u000a\u7f6a\u6076\u4e4b\u90fd\u000a
\u8d44\u6e90\u4e0d\u591f\u4e0d\u53ef\u4ee5\u5efa\u9020

Convert from \uhhhh sequence to UTF-16...
lenth = 22
UTF-16 HEX:
0x6C49 0x5B57 0x000A 0x67AF 0x85E4 0x660F 0x9E26 0x000A 0x7F6A 0x6076
0x4E4B 0x90FD 0x000A 0x8D44 0x6E90 0x4E0D 0x591F 0x4E0D 0x53EF 0x4EE5
0x5EFA 0x9020

Convert from UTF-16 to UTF-8...
lenth = 60
UTF-8 string:
汉字
枯藤昏鸦
罪恶之都
资源不够不可以建造


类别:程序设计||添加到搜藏 |分享到i贴吧|浏览(2664)|评论 (0)
 
 
最近读者:
 
网友评论:
发表评论:
姓 名:
网址或邮箱: (选填)
内 容:
     

   
帮助中心 | 空间客服 | 投诉中心 | 空间协议
©2012 Baidu