程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> C語言 >> C++ >> C++入門知識 >> poco json 中文處理,pocojson

poco json 中文處理,pocojson

編輯:C++入門知識

poco json 中文處理,pocojson


 

 

起因

 最近linux服務器通信需要用到json.

 jsoncpp比較出名,但poco 1.5版本以後已經帶有json庫,所以決定使用poco::json(linux 上已經用到了poco這一套框架).

 網上關於 poco json處理中文比較少.

 有後有網友說:

 Latin1裝gbk字符然後轉utf8,當時喜出望外.如下:

"

Latin1是ISO-8859-1的別名,有些環境下寫作Latin-1。ISO-8859-1編碼是單字節編碼,向下兼容ASCII,其編碼范圍是0x00-0xFF,0x00-0x7F之間完全和ASCII一致,0x80-0x9F之間是控制字符,0xA0-0xFF之間是文字符號。

因為ISO-8859-1編碼范圍使用了單字節內的所有空間,在支持ISO-8859-1的系統中傳輸和存儲其他任何編碼的字節流都不會被拋棄。換言之,把其他任何編碼的字節流當作ISO-8859-1編碼看待都沒有問題。這是個很重要的特性,MySQL數據庫默認編碼是Latin1就是利用了這個特性。ASCII編碼是一個7位的容器,ISO-8859-1編碼是一個8位的容器。

"

Latin1與utf8轉換

測試代碼1:

json="{ \"test\" : \" 歡迎來到[email protected] \"}";
    //json="{ \"test\" : \" \u6b22\u8fce\u6765\[email protected] \"}";
    Poco::Latin1Encoding latin1;
    Poco::UTF8Encoding utf8;
    Poco::TextConverter converter(latin1, utf8);
    std::string strUtf8;
    converter.convert(json, strUtf8);

    Parser parser;
    Var result;

    try
    {
        result = parser.parse(strUtf8);
    }
    catch(JSONException& jsone)
    {
        std::cout << jsone.message() << std::endl;
        //assert(false);
    }

    //assert(result.type() == typeid(Object::Ptr));
    //if(result)

    Object::Ptr object = result.extract<Object::Ptr>();
    int nObjectSize=object->size();

    //assert(object->size() > 0);
    DynamicStruct ds = *object;

json="{ \"test\" : \" 歡迎來到[email protected] \"}";


json="{ \"test\" : \" \u6b22\u8fce\u6765\[email protected] \"}";

\u6b22  是unicode編碼: 歡->6b22

windows下是正常的,但在linux 下 卻會出現json格式錯誤的問題.  這真是無比蛋痛....(linux 下poco::json處理還是有問題)

連 std::cout<<json ; 都不能打印.

  Poco::Latin1Encoding latin1;
    Poco::UTF8Encoding utf8;
    Poco::TextConverter converter(latin1, utf8); ->latin1 轉utf8




 Poco::TextConverter converter2( utf8,latin1); ->utf8 轉latin1
都有問題.

 

 

最後只有自己寫了個編碼轉換.

 

使用iconv實現編碼轉換

上代碼:

xxx.h

#ifndef _MY_CODE_CONVERT_H
#define  _MY_CODE_CONVERT_H


#include <iostream>


using namespace std;

///////////
//編碼轉換類
//
class CMyCodeConvert
{
public:

    CMyCodeConvert();

    ~CMyCodeConvert();

    //
    static std::string Gb2312ToUtf8(std::string strSource);

    //
    static std::string Utf8ToGb2312(std::string strSource);

};



#endif

xxx.cpp

#include "MyCodeConverter.h"

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>

#include <iosfwd>
#include <string.h>
#include <stdarg.h>
#include <fstream>
#include <sstream>
#include <iomanip>  

#include <cassert>
#include<time.h>


#include <errno.h>


#ifdef WIN32   //window


#include <Windows.h>
#include <shlwapi.h>
#include <direct.h>

#include <tchar.h>
#pragma comment(lib,"Kernel32.lib")
#pragma comment(lib,"shlwapi.lib")

#elif __linux    //linux
#include <cassert>
#include <sys/types.h>
#include <iconv.h>
#include <stdio.h>

#endif


#ifdef WIN32   //window


static std::string Gb32ToUtf8(const char * lpszGb32Text)
{
    int nUnicodeBufLen=MultiByteToWideChar(CP_ACP, 0, lpszGb32Text, -1, 0, 0);
    if (nUnicodeBufLen==0)
        return _T("");

    WCHAR* pUnicodeBuf=new WCHAR[nUnicodeBufLen];
    if (pUnicodeBuf==0)
        return _T("");

    MultiByteToWideChar(CP_ACP, 0, lpszGb32Text, -1, pUnicodeBuf, nUnicodeBufLen);

    int nUtf8BufLen=WideCharToMultiByte(CP_UTF8, 0, pUnicodeBuf, -1, 0, 0, NULL, NULL);
    if (nUtf8BufLen==0)
    {
        delete[] pUnicodeBuf;
        return _T("");
    }

    char* pUft8Buf=new char[nUtf8BufLen];
    if (pUft8Buf==0)
    {
        delete[] pUnicodeBuf;
        return _T("");
    }

    WideCharToMultiByte(CP_UTF8, 0, pUnicodeBuf, -1, pUft8Buf, nUtf8BufLen, NULL, NULL);

    std::string strUtf8=pUft8Buf;

    delete[] pUnicodeBuf;
    delete[] pUft8Buf;

    return strUtf8;
}

static std::string Utf8ToGb32(const char * lpszUft8Text)
{
    int nUnicodeBufLen=MultiByteToWideChar(CP_UTF8, 0, lpszUft8Text, -1, 0, 0);
    if (nUnicodeBufLen==0)
        return _T("");

    WCHAR* pUnicodeBuf=new WCHAR[nUnicodeBufLen];
    if (pUnicodeBuf==0)
        return _T("");

    MultiByteToWideChar(CP_UTF8, 0, lpszUft8Text, -1, pUnicodeBuf, nUnicodeBufLen);

    int nGb32BufLen=WideCharToMultiByte(CP_ACP, 0, pUnicodeBuf, -1, 0, 0, NULL, NULL);
    if (nGb32BufLen==0)
    {
        delete[] pUnicodeBuf;
        return _T("");
    }

    char* pGb32Buf=new char[nGb32BufLen];
    if (pGb32Buf==0)
    {
        delete[] pUnicodeBuf;
        return _T("");
    }

    WideCharToMultiByte(CP_ACP, 0, pUnicodeBuf, -1, pGb32Buf, nGb32BufLen, NULL, NULL);

    std::string strGb32=pGb32Buf;

    delete[] pUnicodeBuf;
    delete[] pGb32Buf;

    return strGb32;
}
#elif __linux    //linux




// 編碼轉換操作類->基於byte
//
class CodeConverterBase {

    //示例...
    //std::string strxxxxcvx=json;
    //CodeConverterBase cc2 = CodeConverterBase("gb2312","utf-8");
    //char outbuf[3000];
    //cc2.convert((char *)strxxxxcvx.c_str(),strxxxxcvx.size(),outbuf,1024);
    //strUtf8=outbuf;


private:
    iconv_t cd;
public:

    // 構造
    CodeConverterBase(const char *from_charset,const char *to_charset) {
        cd = iconv_open(to_charset,from_charset);
    }

    // 析構
    ~CodeConverterBase() {
        iconv_close(cd);
    }

    // 轉換輸出
    int convert(char *inbuf,int inlen,char *outbuf,int outlen) {
        char **pin = &inbuf;
        char **pout = &outbuf;
        memset(outbuf,0,outlen);
        return iconv(cd,pin,(size_t *)&inlen,pout,(size_t *)&outlen);
    }
};

//字符串轉換操作 ->基於string
class CodeConverterString
{
public:
    CodeConverterString()
    {

    }

    ~CodeConverterString()
    {

    }


    static std::string Gb2312ToUtf8(std::string strSource)
    {
        std::string strRet="";

        CodeConverterBase cc2 = CodeConverterBase("gb2312","utf-8");
        int nLen=strSource.size()*4+4;
        char * szOutBuf=(char *)malloc(nLen);


        do 
        {
            if(NULL==szOutBuf)
                break;

            memset(szOutBuf,0,nLen);
            cc2.convert((char *)strSource.c_str(),strSource.size(),szOutBuf,nLen);
            strRet=szOutBuf;

        } while (0);

        if(szOutBuf)
        {
            free(szOutBuf);
            szOutBuf=NULL;
        }

        return strRet;
    }

    static std::string Utf8ToGb2312(std::string strSource)
    {
        std::string strRet="";

        CodeConverterBase cc2 = CodeConverterBase("utf-8","gb2312");
        int nLen=strSource.size()*4+4;
        char * szOutBuf=(char *)malloc(nLen);


        do 
        {
            if(NULL==szOutBuf)
                break;

            memset(szOutBuf,0,nLen);
            cc2.convert((char *)strSource.c_str(),strSource.size(),szOutBuf,nLen);
            strRet=szOutBuf;

        } while (0);

        if(szOutBuf)
        {
            free(szOutBuf);
            szOutBuf=NULL;
        }

        return strRet;
    }


};




#endif













CMyCodeConvert::CMyCodeConvert()
{

}

CMyCodeConvert::~CMyCodeConvert()
{

}


std::string CMyCodeConvert::Gb2312ToUtf8(std::string strSource)
{
#ifdef WIN32   //window

    return Gb32ToUtf8(strSource.c_str());

#elif __linux    //linux

    return CodeConverterString::Gb2312ToUtf8(strSource);
#endif
}



//
std::string CMyCodeConvert::Utf8ToGb2312(std::string strSource)
{
#ifdef WIN32   //window

    return Utf8ToGb32(strSource.c_str());

#elif __linux    //linux

    return CodeConverterString::Utf8ToGb2312(strSource);
#endif
}

 

 

結果多次調用後出現了如下錯誤: 

Utf8ToGb2312 ,nError=-1,errno =84
*** glibc detected *** ./HTTPTimeServer: free(): invalid next size (fast): 0x000000000167dda0 ***
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(+0x75be6)[0x7f6ed81cdbe6]
/lib/x86_64-linux-gnu/libc.so.6(cfree+0x6c)[0x7f6ed81d298c]
./HTTPTimeServer(_ZN19CodeConverterString12Utf8ToGb2312ESs+0x1c2)[0x41a553]
./HTTPTimeServer(_ZN14CMyCodeConvert12Utf8ToGb2312ESs+0x48)[0x419e12]
./HTTPTimeServer(_Z5suitev+0x17a3)[0x41c13e]
./HTTPTimeServer(main+0x1a6)[0x41d157]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xfd)[0x7f6ed8176ead]
./HTTPTimeServer[0x419c39]
======= Memory map: ========
00400000-0043e000 r-xp 00000000 08:01 797594                             /home/poco-1.6.0-all/Net/samples/HTTPTimeServer/HTTPTimeServer
0063e000-0063f000 rw-p 0003e000 08:01 797594                             /home/poco-1.6.0-all/Net/samples/HTTPTimeServer/HTTPTimeServer
0165d000-016a1000 rw-p 00000000 00:00 0                                  [heap]
7f6ed0000000-7f6ed0021000 rw-p 00000000 00:00 0 
7f6ed0021000-7f6ed4000000 ---p 00000000 00:00 0 
7f6ed74fa000-7f6ed7515000 r-xp 00000000 08:01 276623                     /usr/lib/x86_64-linux-gnu/gconv/GBK.so
7f6ed7515000-7f6ed7714000 ---p 0001b000 08:01 276623                     /usr/lib/x86_64-linux-gnu/gconv/GBK.so
7f6ed7714000-7f6ed7715000 r--p 0001a000 08:01 276623                     /usr/lib/x86_64-linux-gnu/gconv/GBK.so
7f6ed7715000-7f6ed7716000 rw-p 0001b000 08:01 276623                     /usr/lib/x86_64-linux-gnu/gconv/GBK.so
7f6ed7716000-7f6ed771f000 r-xp 00000000 08:01 270318                     /usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0
7f6ed771f000-7f6ed791e000 ---p 00009000 08:01 270318                     /usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0
7f6ed791e000-7f6ed791f000 rw-p 00008000 08:01 270318                     /usr/lib/x86_64-linux-gnu/libltdl.so.7.3.0
7f6ed791f000-7f6ed792f000 r-xp 00000000 08:01 400548                     /usr/local/lib/libodbcinst.so.2.0.0
7f6ed792f000-7f6ed7b2f000 ---p 00010000 08:01 400548                     /usr/local/lib/libodbcinst.so.2.0.0
7f6ed7b2f000-7f6ed7b30000 rw-p 00010000 08:01 400548                     /usr/local/lib/libodbcinst.so.2.0.0
7f6ed7b30000-7f6ed7b37000 r-xp 00000000 08:01 399741                     /lib/x86_64-linux-gnu/librt-2.13.so
7f6ed7b37000-7f6ed7d36000 ---p 00007000 08:01 399741                     /lib/x86_64-linux-gnu/librt-2.13.so
7f6ed7d36000-7f6ed7d37000 r--p 00006000 08:01 399741                     /lib/x86_64-linux-gnu/librt-2.13.so
7f6ed7d37000-7f6ed7d38000 rw-p 00007000 08:01 399741                     /lib/x86_64-linux-gnu/librt-2.13.so
7f6ed7d38000-7f6ed7d3a000 r-xp 00000000 08:01 399733                     /lib/x86_64-linux-gnu/libdl-2.13.so
7f6ed7d3a000-7f6ed7f3a000 ---p 00002000 08:01 399733                     /lib/x86_64-linux-gnu/libdl-2.13.so
7f6ed7f3a000-7f6ed7f3b000 r--p 00002000 08:01 399733                     /lib/x86_64-linux-gnu/libdl-2.13.so
7f6ed7f3b000-7f6ed7f3c000 rw-p 00003000 08:01 399733                     /lib/x86_64-linux-gnu/libdl-2.13.so
7f6ed7f3c000-7f6ed7f53000 r-xp 00000000 08:01 399742                     /lib/x86_64-linux-gnu/libpthread-2.13.so
7f6ed7f53000-7f6ed8152000 ---p 00017000 08:01 399742                     /lib/x86_64-linux-gnu/libpthread-2.13.so
7f6ed8152000-7f6ed8153000 r--p 00016000 08:01 399742                     /lib/x86_64-linux-gnu/libpthread-2.13.so
7f6ed8153000-7f6ed8154000 rw-p 00017000 08:01 399742                     /lib/x86_64-linux-gnu/libpthread-2.13.so
7f6ed8154000-7f6ed8158000 rw-p 00000000 00:00 0 
7f6ed8158000-7f6ed82d9000 r-xp 00000000 08:01 399727                     /lib/x86_64-linux-gnu/libc-2.13.so
7f6ed82d9000-7f6ed84d9000 ---p 00181000 08:01 399727                     /lib/x86_64-linux-gnu/libc-2.13.so
7f6ed84d9000-7f6ed84dd000 r--p 00181000 08:01 399727                     /lib/x86_64-linux-gnu/libc-2.13.so
7f6ed84dd000-7f6ed84de000 rw-p 00185000 08:01 399727                     /lib/x86_64-linux-gnu/libc-2.13.so
7f6ed84de000-7f6ed84e3000 rw-p 00000000 00:00 0 
7f6ed84e3000-7f6ed84f8000 r-xp 00000000 08:01 393220                     /lib/x86_64-linux-gnu/libgcc_s.so.1
7f6ed84f8000-7f6ed86f8000 ---p 00015000 08:01 393220                     /lib/x86_64-linux-gnu/libgcc_s.so.1
7f6ed86f8000-7f6ed86f9000 rw-p 00015000 08:01 393220                     /lib/x86_64-linux-gnu/libgcc_s.so.1
7f6ed86f9000-7f6ed877a000 r-xp 00000000 08:01 399721                     /lib/x86_64-linux-gnu/libm-2.13.so
7f6ed877a000-7f6ed8979000 ---p 00081000 08:01 399721                     /lib/x86_64-linux-gnu/libm-2.13.so
7f6ed8979000-7f6ed897a000 r--p 00080000 08:01 399721                     /lib/x86_64-linux-gnu/libm-2.13.so
7f6ed897a000-7f6ed897b000 rw-p 00081000 08:01 399721                     /lib/x86_64-linux-gnu/libm-2.13.so
7f6ed897b000-7f6ed8a63000 r-xp 00000000 08:01 266157                     /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.17
7f6ed8a63000-7f6ed8c63000 ---p 000e8000 08:01 266157                     /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.17
7f6ed8c63000-7f6ed8c6b000 r--p 000e8000 08:01 266157                     /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.17
7f6ed8c6b000-7f6ed8c6d000 rw-p 000f0000 08:01 266157                     /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.17
7f6ed8c6d000-7f6ed8c82000 rw-p 00000000 00:00 0 
7f6ed8c82000-7f6ed8fb8000 r-xp 00000000 08:01 400610                     /usr/local/lib/libmyodbc5a.so
7f6ed8fb8000-7f6ed91b7000 ---p 00336000 08:01 400610                     /usr/local/lib/libmyodbc5a.so
7f6ed91b7000-7f6ed9298000 rw-p 00335000 08:01 400610                     /usr/local/lib/libmyodbc5a.so
7f6ed9298000-7f6ed929e000 rw-p 00000000 00:00 0 
7f6ed929e000-7f6ed947a000 r-xp 00000000 08:01 400674                     /lib/libPocoFoundation.so.30
7f6ed947a000-7f6ed967a000 ---p 001dc000 08:01 400674                     /lib/libPocoFoundation.so.30
7f6ed967a000-7f6ed968a000 rw-p 001dc000 08:01 400674                     /lib/libPocoFoundation.so.30
7f6ed968a000-7f6ed968b000 rw-p 00000000 00:00 0 
7f6ed968b000-7f6ed9715000 r-xp 00000000 08:01 400698                     /lib/libPocoXML.so.30
7f6ed9715000-7f6ed9914000 ---p 0008a000 08:01 400698                     /lib/libPocoXML.so.30
7f6ed9914000-7f6ed991c000 rw-p 00089000 08:01 400698                     /lib/libPocoXML.so.30
7f6ed991c000-7f6ed9969000 r-xp 00000000 08:01 400678                     /lib/libPocoJSON.so.30
7f6ed9969000-7f6ed9b68000 ---p 0004d000 08:01 400678                     /lib/libPocoJSON.so.30
7f6ed9b68000-7f6ed9b6b000 rw-p 0004c000 08:01 400678                     /lib/libPocoJSON.so.30
7f6ed9b6b000-7f6ed9bdc000 r-xp 00000000 08:01 400694                     /lib/libPocoUtil.so.30
7f6ed9bdc000-7f6ed9ddc000 ---p 00071000 08:01 400694                     /lib/libPocoUtil.so.30
7f6ed9ddc000-7f6ed9de0000 rw-p 00071000 08:01 400694                     /lib/libPocoUtil.so.30
7f6ed9de0000-7f6ed9ef9000 r-xp 00000000 08:01 400686                     /lib/libPocoNet.so.30
7f6ed9ef9000-7f6eda0f9000 ---p 00119000 08:01 400686                     /lib/libPocoNet.so.30
7f6eda0f9000-7f6eda105000 rw-p 00119000 08:01 400686                     /lib/libPocoNet.so.30
7f6eda105000-7f6eda106000 rw-p 00000000 00:00 0 
7f6eda106000-7f6eda126000 r-xp 00000000 08:01 399739                     /lib/x86_64-linux-gnu/ld-2.13.so
7f6eda19a000-7f6eda311000 r--p 00000000 08:01 267804                     /usr/lib/locale/locale-archive
7f6eda311000-7f6eda31b000 rw-p 00000000 00:00 0 
7f6eda31b000-7f6eda322000 r--s 00000000 08:01 262813                     /usr/lib/x86_64-linux-gnu/gconv/gconv-modules.cache
7f6eda322000-7f6eda325000 rw-p 00000000 00:00 0 
7f6eda325000-7f6eda326000 r--p 0001f000 08:01 399739                     /lib/x86_64-linux-gnu/ld-2.13.so
7f6eda326000-7f6eda327000 rw-p 00020000 08:01 399739                     /lib/x86_64-linux-gnu/ld-2.13.so
7f6eda327000-7f6eda328000 rw-p 00000000 00:00 0 
7fff4e4f9000-7fff4e51a000 rw-p 00000000 00:00 0                          [stack]
7fff4e5ff000-7fff4e600000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
Aborted
root@debian:/home/poco-1.6.0-all/Net/samples/HTTPTimeServer# 1;2c

 

關於 

 "*** glibc detected *** ./HTTPTimeServer: free(): invalid next size (fast): 0x000000000167dda0 ***" 

的錯誤 網上搜索的結果是

 [日記] 當glibc detected *** free(): invalid next size (normal)出現
[ 2009年5月27日 星期三 ] shanghui    Yes 326 No 319
是內存洩露的問題。 
(1)一般是free了沒有分配的內存
(2)還有就是分配了內存忘記釋放也有可能會出現這樣的問題。 
(3)最後查出來是數組循環的時候越界了 , 寫到了其他的內存裡面, 然後一free那個區域就出現了這樣的問題。

是有allocate引起的,一個debug的方法是把你懷疑的矩陣(比如釋放了就會報內存錯誤)定義成靜態的,這樣如果出現越界,會報段錯誤。依次檢查是否你的數組真的越界了。

但這個轉換代碼很簡單,不應該是這個錯.然後就一直loop的查找...抓狂...

 

 

最後又看了

size_t iconv(iconv_t cd,
                    char **inbuf, size_t *inbytesleft,
                    char **outbuf, size_t *outbytesleft);

 

總覺得問題出在這兒.

打印出執行出錯的返回值和錯誤碼:結果...

iconv 返回失敗,但轉換成功了

iconv  返回-1 erron 84 

 

iconv錯誤信息要參見:http://blog.163.com/lqy_super/blog/static/19975102120121065399897/

 

這個引起了我的疑惑.後面有問網友說 size_t  在x86,x64下代表的長度不一樣可能引發問題.這個點醒了我,會不會size_t 引發的問題.

linux下就是查找錯誤特別麻煩 .

 

重新改寫了 xxx.cpp

#include "MyCodeConverter.h"

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>

#include <iosfwd>
#include <string.h>
#include <stdarg.h>
#include <fstream>
#include <sstream>
#include <iomanip>  

#include <cassert>
#include<time.h>


#include <errno.h>


#ifdef WIN32   //window


#include <Windows.h>
#include <shlwapi.h>
#include <direct.h>

#include <tchar.h>
#pragma comment(lib,"Kernel32.lib")
#pragma comment(lib,"shlwapi.lib")

#elif __linux    //linux
#include <cassert>
#include <sys/types.h>
#include <iconv.h>
#include <stdio.h>

#endif


#include "Poco/Foundation.h"
#include "Poco/Mutex.h"

using Poco::FastMutex;
FastMutex g_mutexConverter;

#ifdef WIN32   //window


static std::string Gb32ToUtf8(const char * lpszGb32Text)
{
    int nUnicodeBufLen=MultiByteToWideChar(CP_ACP, 0, lpszGb32Text, -1, 0, 0);
    if (nUnicodeBufLen==0)
        return _T("");

    WCHAR* pUnicodeBuf=new WCHAR[nUnicodeBufLen];
    if (pUnicodeBuf==0)
        return _T("");

    MultiByteToWideChar(CP_ACP, 0, lpszGb32Text, -1, pUnicodeBuf, nUnicodeBufLen);

    int nUtf8BufLen=WideCharToMultiByte(CP_UTF8, 0, pUnicodeBuf, -1, 0, 0, NULL, NULL);
    if (nUtf8BufLen==0)
    {
        delete[] pUnicodeBuf;
        return _T("");
    }

    char* pUft8Buf=new char[nUtf8BufLen];
    if (pUft8Buf==0)
    {
        delete[] pUnicodeBuf;
        return _T("");
    }

    WideCharToMultiByte(CP_UTF8, 0, pUnicodeBuf, -1, pUft8Buf, nUtf8BufLen, NULL, NULL);

    std::string strUtf8=pUft8Buf;

    delete[] pUnicodeBuf;
    delete[] pUft8Buf;

    return strUtf8;
}

static std::string Utf8ToGb32(const char * lpszUft8Text)
{
    int nUnicodeBufLen=MultiByteToWideChar(CP_UTF8, 0, lpszUft8Text, -1, 0, 0);
    if (nUnicodeBufLen==0)
        return _T("");

    WCHAR* pUnicodeBuf=new WCHAR[nUnicodeBufLen];
    if (pUnicodeBuf==0)
        return _T("");

    MultiByteToWideChar(CP_UTF8, 0, lpszUft8Text, -1, pUnicodeBuf, nUnicodeBufLen);

    int nGb32BufLen=WideCharToMultiByte(CP_ACP, 0, pUnicodeBuf, -1, 0, 0, NULL, NULL);
    if (nGb32BufLen==0)
    {
        delete[] pUnicodeBuf;
        return _T("");
    }

    char* pGb32Buf=new char[nGb32BufLen];
    if (pGb32Buf==0)
    {
        delete[] pUnicodeBuf;
        return _T("");
    }

    WideCharToMultiByte(CP_ACP, 0, pUnicodeBuf, -1, pGb32Buf, nGb32BufLen, NULL, NULL);

    std::string strGb32=pGb32Buf;

    delete[] pUnicodeBuf;
    delete[] pGb32Buf;

    return strGb32;
}
#elif __linux    //linux




// 編碼轉換操作類->基於byte
//
class CodeConverterBase {

    //示例...
    //std::string strxxxxcvx=json;
    //CodeConverterBase cc2 = CodeConverterBase("gb2312","utf-8");
    //char outbuf[3000];
    //cc2.convert((char *)strxxxxcvx.c_str(),strxxxxcvx.size(),outbuf,1024);
    //strUtf8=outbuf;


private:
    iconv_t cd;
public:

    // 構造
    CodeConverterBase(const char *from_charset,const char *to_charset) 
    {
        cd = iconv_open(to_charset,from_charset);
    }

    // 析構
    ~CodeConverterBase() {
        iconv_close(cd);
    }

    // 轉換輸出
    size_t convert(char *inbuf,size_t inlen,char *outbuf,size_t outlen) 
    {
        char *pin = inbuf;
        char *pout = outbuf;
        memset(outbuf,0,outlen);
        return iconv(cd,&pin,(size_t *)&inlen,&pout,(size_t *)&outlen);
    }
};

//字符串轉換操作 ->基於string
class CodeConverterString
{
public:
    CodeConverterString()
    {

    }

    ~CodeConverterString()
    {

    }


    static std::string Gb2312ToUtf8(std::string strSource)
    {
        std::string strRet="";
        if(strSource.size()<=0)
            return strRet;

        CodeConverterBase cc2 = CodeConverterBase("gbk","utf-8//TRANSLIT//IGNORE");//  //TRANSLIT//IGNORE 轉換失敗找相似編碼.
        
        size_t nInLen=(strSource.size())+1;
        char * szInBuf=(char *)malloc(nInLen);

        size_t nOutLen=(strSource.size())*4+4;
        char * szOutBuf=(char *)malloc(nOutLen);

        do 
        {
            if(0==szOutBuf ||0==szInBuf)
                break;

            memset(szInBuf, 0, nInLen);
            memcpy(szInBuf, strSource.c_str(), strSource.size());

            size_t nError=cc2.convert((char *)szInBuf,nInLen,szOutBuf,nOutLen);

            if(nError<0)
                std::cout<<"Gb2312ToUtf8 ,nError="<<nError<<",errno ="<<errno <<std::endl;

            strRet=szOutBuf;

        } while (0);

        if(szOutBuf)
        {
            free(szOutBuf);
            szOutBuf=NULL;
        }

        if(szInBuf)
        {
            free(szInBuf);
            szInBuf=NULL;
        }

        return strRet;
    }

    static std::string Utf8ToGb2312(std::string strSource)
    {
        std::string strRet="";
        
        if(strSource.size()<=0)
            return strRet;

        CodeConverterBase cc2 = CodeConverterBase("utf-8","gbk//TRANSLIT//IGNORE");//  //TRANSLIT//IGNORE 轉換失敗找相似編碼.

        size_t nInLen=(strSource.size())+1;
        char * szInBuf=(char *)malloc(nInLen);

        size_t nOutLen=(strSource.size())*4+4;
        char * szOutBuf=(char *)malloc(nOutLen);

        do 
        {
            if(0==szOutBuf ||0==szInBuf)
                break;
            
            memset(szInBuf, 0, nInLen);
            memcpy(szInBuf, strSource.c_str(), strSource.size());

            size_t nError=cc2.convert((char *)szInBuf,nInLen,szOutBuf,nOutLen);
            
            if(nError<0)
                std::cout<<"Utf8ToGb2312 ,nError="<<nError<<",errno ="<<errno <<std::endl;

            strRet=szOutBuf;

        } while (0);

        if(szOutBuf)
        {
            free(szOutBuf);
            szOutBuf=NULL;
        }

        if(szInBuf)
        {
            free(szInBuf);
            szInBuf=NULL;
        }

        return strRet;
    }


};




#endif













CMyCodeConvert::CMyCodeConvert()
{

}

CMyCodeConvert::~CMyCodeConvert()
{

}


std::string CMyCodeConvert::Gb2312ToUtf8(std::string strSource)
{

    FastMutex::ScopedLock lock(g_mutexConverter);
#ifdef WIN32   //window

    return Gb32ToUtf8(strSource.c_str());

#elif __linux    //linux

    return CodeConverterString::Gb2312ToUtf8(strSource);
#endif
}



//
std::string CMyCodeConvert::Utf8ToGb2312(std::string strSource)
{

    FastMutex::ScopedLock lock(g_mutexConverter);
#ifdef WIN32   //window

    return Utf8ToGb32(strSource.c_str());

#elif __linux    //linux
    

    return CodeConverterString::Utf8ToGb2312(strSource);
#endif
}

  

 

測試json代碼:

std::string  suite()
{


    std::string json = 
        "{"
        "\"id\": 1123,"
        "\"jsonrpc\": \" [email protected],大聖歸來.. \","
        "\"total\": 2,"
        "\"result\": "
        "["
        "{"
        "\"id\": null,"
        "\"picture\": \"http://placehold.it/32x32\","
        "\"name\": \"這是中文測試.Crossman\","
        "\"about\": \"《西游記之大聖歸來》是根據中國傳統神話故事進行拓展和演繹的3D動畫電影.xxxx Consectetuer suscipit volutpat eros dolor .\","
        "\"friends\": "
        "["
        "{"
        "},"
        "{"
        "\"id\": 2,"
        "\"name\": \"Bailey Oldridge2\""
        "},"
        "{"
        "\"id\": 3,"
        "\"name\": \"Makayla Campbell3\""
        "}"
        "]"
        "},"
        "{"
        "\"id\": 2,"
        "\"picture\": \"http://placehold.it/32x32\","
        "\"name\": \"2中名 歡迎 \","
        "\"about\": \"2北京你好 dolor .\","
        "\"friends\": "
        "["
        "]"
        "}"
        "]"
        "}";

    //json = 
    //    "{"
    //    "\"id\": 1123,"
    //    "\"jsonrpc\": \" [email protected],成都市錦江區. \""
    //    "}";

    std::string strUtf8;

    
    std::cout<<"+++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
    std::cout<<"json="<<json<<std::endl;

    std::cout<<"json hex:"<<std::endl;
    std::string strxxxx=json;
    for(int nxxx=0;nxxx<strxxxx.size();++nxxx)
    {
        unsigned char chTemp=strxxxx[nxxx];
        std::string strTemp=Poco::format("%hX ",(unsigned short )chTemp);//輸出16進制

        std::cout<<strTemp;
    }
    std::cout<<std::endl;

    
    for(int i=0;i<20;++i)
    {
        std::cout<<"++"<<i<<std::endl;
        strUtf8=CMyCodeConvert::Gb2312ToUtf8(json);//多 次測試 轉換..
    }
    


    std::cout<<std::endl;

    std::cout<<"+++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
    std::cout<<"strUtf8="<<strUtf8<<std::endl;
    

    std::cout<<"strUtf8 hex:"<<std::endl;
    strxxxx=strUtf8;
    for(int nxxx=0;nxxx<strxxxx.size();++nxxx)
    {
        unsigned char chTemp=strxxxx[nxxx];
        std::string strTemp=Poco::format("%hX ",(unsigned short )chTemp);//輸出16進制

        std::cout<<strTemp;
    }

    std::cout<<std::endl;

    Parser parser;
    Var result;

    try
    {
        result = parser.parse(strUtf8);//json轉換...
    }
    catch(JSONException& jsone)
    {
        std::cout << jsone.message() << std::endl;
        return "result==NULL";
    }
    

    Object::Ptr object = result.extract<Object::Ptr>();
    int nObjectSize=object->size();

    //assert(object->size() > 0);
    DynamicStruct ds = *object;

    //遍歷...
    for(DynamicStruct::Iterator  itBegin=ds.begin();itBegin!=ds.end();itBegin++)
    {
        //std::string strvalue=itBegin;
        //Var var= itBegin->second;
        //std::cout<<"type:"<<var.type()<<std::endl;

        std::cout<<"K:"  << itBegin->first << " , value: " <<CMyCodeConvert::Utf8ToGb2312( itBegin->second.toString()) << std::endl;
    }

    std::cout<<std::endl<<std::endl<<std::endl;
    std::cout<<"+++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;


    json="";

    std::string strlation1xxxx;
    std::string strjstostring=strUtf8;
    std::string strTemp;
    std::string strKey;




    //std::string strKey="id";

    //key 是否存在
    strKey="id";
    if(object->has(strKey))
    {
        strTemp=CMyCodeConvert::Utf8ToGb2312( object->get(strKey).toString());
        std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;
    }

    strKey="id2232";
    if(object->has(strKey))
    {
        strTemp=CMyCodeConvert::Utf8ToGb2312( object->get(strKey).toString());

        std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;
    }

    strKey="jsonrpc";
    if(object->has(strKey))
    {
        strTemp=CMyCodeConvert::Utf8ToGb2312( object->get(strKey).toString());

        std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;

        std::string strxxx;
        
        std::cout<<"jsonrpc 編碼:"<<std::endl;
        std::string strxxxx=strTemp;
        for(int nxxx=0;nxxx<strxxxx.size();++nxxx)
        {
            unsigned char chTemp=strxxxx[nxxx];
            std::string strTemp=Poco::format("%hX",(unsigned short )chTemp);
            strTemp+=" ";
            //std::cout<<strTemp;
        }

        std::cout<<std::endl;

        //strjstostring+=strTemp;


        std::cout<<"object333->get("<<strKey<<")="<<strTemp<<std::endl;
    }

    strKey="total";
    if(object->has(strKey))
    {
        strTemp=CMyCodeConvert::Utf8ToGb2312( object->get(strKey).toString());

        std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;
    }


    strKey="result";
    if(object->has(strKey))
    {
        strTemp=CMyCodeConvert::Utf8ToGb2312( object->get(strKey).toString());

        std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;

            
        Var  vResult=object->get("result");

        Poco::JSON::Array::Ptr arrayResult = object->getArray("result");
        Poco::JSON::Array::Ptr arr22222 = vResult.extract<Poco::JSON::Array::Ptr>();

        Poco::Dynamic::Array dsarrayResult = *arrayResult;

        std::cout<<"arr22222->size()"<<arr22222->size()<<std::endl;
        std::cout<<"arrayResult->size()"<<arrayResult->size()<<std::endl;
        std::cout<<"arrayResult->size()"<<dsarrayResult.size()<<std::endl;


        for(int nIndex=0;nIndex<arrayResult->size();++nIndex)
        {
            Object::Ptr object = arrayResult->getObject(nIndex);


            strKey="id";
            if(object->has(strKey))
            {
                strTemp="";
                if(!object->get(strKey).isEmpty())
                {
                    strTemp=CMyCodeConvert::Utf8ToGb2312( object->get(strKey).toString());

                    std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;
                }

            }

            strKey="picture";
            if(object->has(strKey))
            {
                strTemp=CMyCodeConvert::Utf8ToGb2312( object->get(strKey).toString());

                std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;
            }

            strKey="name";
            if(object->has(strKey))
            {
                strTemp=CMyCodeConvert::Utf8ToGb2312( object->get(strKey).toString());

                std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;
            }

            strKey="about";
            if(object->has(strKey))
            {
                strTemp=CMyCodeConvert::Utf8ToGb2312( object->get(strKey).toString());

                std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;
            }

            strKey="friends";
            if(object->has(strKey))
            {
                strTemp=object->get(strKey).toString();

                std::cout<<"object->get("<<strKey<<")="<<strTemp<<std::endl;

                Poco::JSON::Array::Ptr arrayFriends = object->getArray(strKey);
                for(int nIndex=0;nIndex<arrayFriends->size();++nIndex)
                {
                    Object::Ptr objectFriends = arrayFriends->getObject(nIndex);
                    DynamicStruct dsTemp = *objectFriends;

                    for(DynamicStruct::Iterator  itBegin=dsTemp.begin();itBegin!=dsTemp.end();itBegin++)
                    {
                        std::cout<<"K:"  << itBegin->first << " , value: " << CMyCodeConvert::Utf8ToGb2312(itBegin->second.toString() )<< std::endl;
                    }

                }
            }



            std::cout<<std::endl<<std::endl<<std::endl;
            std::cout<<"+++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;

            std::cout<<std::endl<<std::endl<<std::endl;

        }




    }

    return strjstostring;
}

 

處理 成功.

 

至此poco::json 庫已經能正常運行且能處理 utf8編碼的中文字符。

教訓

在linux 下的api

size_t iconv(iconv_t cd,
                    char **inbuf, size_t *inbytesleft,
                    char **outbuf, size_t *outbytesleft);

 

一定要照著參數原型寫.

 

  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved