gb2312和gbk每一个汉字由2个字节组成,这2个字节的ascii码大小分别是:
gb2312:
high8 = 0xa1–>0xfe (161 - 254)
low8 = 0xa1–>0xfe (161 - 254)
gbk:
high8 = 0x80–>0xfe (128 - 254)
low8 = 0x40–>0xfe (64 - 254)
汉字ascii对照表的打印方法:
// 打印ascii汉字编码表
for(int i = 129; i < 256; i) // 129 = 0x81
{
for(int j = 64; j < 256; j) // 64 = 0x40
{
char pchar[3];
pchar[0] = i;
pchar[1] = j;
pchar[2] = '/0';
cout << pchar << " " << i << "," << j << " ";
}
}
知道一个汉字,输出其ascii码:
void makechinese(int high, int low)
{
char sztemp[4] = { 0 };
wchar_t wszchar[sizeof(sztemp)];
sztemp[0] = (char)high;
sztemp[1] = (char)low;
unsigned short *hanzi = (unsigned short *)sztemp;
//mbstowcs(wszchar, sztemp, sizeof(wszchar));
printf("拼接汉字->%ls\n", hanzi[0]);
}
int main()
{
setlocale(lc_all, "");
wchar_t chinese[] = l"高";
size_t len = wcslen(chinese);
cout << len << endl;
wprintf(l"%ls\n", chinese);
const char *param = (char *)chinese;
unsigned char high = (unsigned)param[0]; // 216
unsigned char low = (unsigned)param[1]; // 154
makechinese((int)high, (int)low);
system("pause");
return 0;
}
char chinese[] = "高" <--> wchar_t chinese[] = l"高"
这两种汉字的存储形式,以及存储的编码又有什么区别了,可以继续深入下…