编码 练习代码,检测文件编码类型

编码 练习代码,检测文件编码类型

In normal text file

  • FF FE  UCS-2LE or UTF-16LE
  • FE FF  UCS-2BE or UTF-16BE
  • EF BB BF  UTF-8
    In xml file
  • 3C 00  UCS-2LE or UTF-16LE
  • 00 3C  UCS-2BE or UTF-16BE
  • 3C XX  UTF-8 (where XX is non-zero)

/*utf8

*/

#include “stdio.h”

#include “stdlib.h”

int main()

{

FILE *fp=NULL;

long fileSize=0;

unsigned char *buf;

//byte bb[8];

size_t result;

//fp=fopen(“d:\b.txt”,”rb”);

fp=fopen(“d:\e.txt”,”rb”);

if (fp==NULL) {fputs (“File error”,stderr); system(“pause”); exit (1);}

buf = (unsigned char*) malloc (sizeof(unsigned char)*3);

if (buf == NULL)

{fputs (“Memory error”,stderr); exit (2);}

result=fread (buf,1,3,fp);

//if (result != fileSize) {fputs (“Reading error”,stderr); system(“pause”); exit (3);}

printf(“read size=%ld,res=%s
“,result,buf);

for(int i=0;i<result;i++)

{

unsigned char *a=buf+i;

unsigned char b=*a+0;

printf(“c=%c,dec=%d,oct=%o,hex=%x
“,b,b,b,b);

}

printf(“buf=%x *(buf)&0xfe=%x %d
“,*(buf+1),*(buf)&0xfe,((*(buf)&0xfe)==0xfe));

if((*(buf)&0xfe)==0xfe&&(*(buf+1)&0xff)==0xff)

printf(“encoding=utf16BE
“);

else if((*(buf)&0xff)==0xff&&(*(buf+1)&0xfe)==0xfe)

printf(“encoding=utf16LE
“);

else if((*(buf)&0xef)==0xef&&(*(buf+1)&0xbb)==0xbb&&(*(buf+2)&0xbf)==0xbf)

printf(“encoding=utf8
“);

else

printf(“gb2312
“);

free(buf);

fclose(fp);

system(“pause”);

return 0;

}

 

参考资料:

Understanding Unicode™ – I

http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-Chapter04a

Character set encoding basicshttp://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-Chapter03How to Determine Text File Encoding
http://codesnipers.com/?q=how-to-determine-text-file-encoding

发表评论

电子邮件地址不会被公开。 必填项已用 * 标注

您可以使用这些 HTML 标签和属性: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>