将UTF-8编码转换为GBK编码的方法详解

参考链接:https://blog.csdn.net/yanchao7788/article/details/53196901

参考链接:你真的懂 Unicode 和 UTF-8 是什么关系吗?来看看这个就彻底懂了!_魔都飘雪的博客-CSDN博客_utf8和unicode的关系

UTF-8没办法直接转GBK,必须先转成unicode,再转成GBK

1.把UTF-8转成unicode,根据参考的链接,我做了实现。

2.把unicode转成GBK,这里的代码是FATFS文件系统里的,可以自己取官网下,里面的数组太长了,自己去下吧。

typedef unsigned int	UINT;	/* int must be 16-bit or 32-bit */
typedef unsigned char	BYTE;	/* char must be 8-bit */
typedef unsigned short	WORD;	/* 16-bit unsigned integer */
typedef unsigned int	DWORD;	/* 32-bit unsigned integer */
typedef WORD			WCHAR;	/* UTF-16 character type */


void ff_utf8touni(BYTE *inputS,//input string
                  WORD sLength,//string's length
                  DWORD *outputArr,//output Arr
                  WORD arrLength)//output arr max length
{
    WORD i,j,k,t,outputIndex,num;
    DWORD data;
    BYTE ch;
    outputIndex = 0;
    
    for(i=0;i<arrLength;i++)
    {
        outputArr[i] = 0;
    }

    if(sLength == 0 || arrLength == 0)return;


    for(i=0;i<sLength;)
    {
        ch = inputS[i];
        if(ch & 0X80)
        {
            num = 0;
            for(j=0;j<6;j++)//计算总共多少个个数
            {
                if(ch&0X80)
                {
                    num++;
                    ch <<= 1;
                }
                else
                {
                    break;
                }
            }
            if(i+num > sLength)break;

            k = 8 - (num+1);
            t=0;
            for(j=0;j<k;j++)
            {
                t <<= 1;
                t |= 1;
            }
            data = inputS[i] & t;//第一个字节

            for(j=1;j<num;j++)//其他的字节
            {
                data <<= 6;
                data |= inputS[i+j]&0X3F;
            }

            outputArr[outputIndex++] = data;

            i+=num;
        }
        else
        {
            outputArr[outputIndex++] = ch;
            i++;
        }

        if(outputIndex >= arrLength)break;
    }

}



WCHAR ff_uni2oem (	/* Returns OEM code character, zero on error */
	DWORD	uni	/* UTF-16 encoded character to be converted */
)
{
	const WCHAR *p;
	WCHAR c = 0, uc;
	UINT i, n, li, hi;


	if (uni < 0x80) {	/* ASCII? */
		c = (WCHAR)uni;

	} else {			/* Non-ASCII */
		if (uni < 0x10000) { /* Is it in BMP? */
			uc = (WCHAR)uni;
			p = 0;
            /* DBCS */
            //switch (cp) {	/* Get conversion table */
            //case 932 : p = uni2oem932; hi = sizeof uni2oem932 / 4 - 1; break;
            //case 936 : p = uni2oem936; hi = sizeof uni2oem936 / 4 - 1; break;
            //case 949 : p = uni2oem949; hi = sizeof uni2oem949 / 4 - 1; break;
            //case 950 : p = uni2oem950; hi = sizeof uni2oem950 / 4 - 1; break;
            //}
            p = uni2oem936; hi = sizeof uni2oem936 / 4 - 1;
            if (p) {	/* Is it valid code page? */
                li = 0;
                for (n = 16; n; n--) {	/* Find OEM code */
                    i = li + (hi - li) / 2;
                    if (uc == p[i * 2]) break;
                    if (uc > p[i * 2]) {
                        li = i;
                    } else {
                        hi = i;
                    }
                }
                if (n != 0) c = p[i * 2 + 1];
            }

		}
	}

	return c;
}

物联沃分享整理
物联沃-IOTWORD物联网 » 将UTF-8编码转换为GBK编码的方法详解

发表评论