程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> C語言 >> C++ >> C++入門知識 >> 跨平台Unicode與UTF8互轉代碼

跨平台Unicode與UTF8互轉代碼

編輯:C++入門知識

在原來原基礎上,將代碼整理,並加強安全性. 並按照WindowsAPI設計, 添加輸出緩沖長度探測功能

當OutUTFString為NULL時, 可以進行輸出的UTF8字符串長度探測

   1:  uint32 UniCharToUTF8(wchar_t UniChar, char *OutUTFString)
   2:      {
   3:  
   4:          uint32 UTF8CharLength = 0;
   5:  
   6:          if (UniChar < 0x80)
   7:          { 
   8:              if ( OutUTFString )
   9:                  OutUTFString[UTF8CharLength++] = (char)UniChar;
  10:              else
  11:                  UTF8CharLength++;
  12:          }
  13:          else if(UniChar < 0x800)
  14:          {
  15:              if ( OutUTFString )
  16:              {
  17:                  OutUTFString[UTF8CharLength++] = 0xc0 | ( UniChar >> 6 );
  18:                  OutUTFString[UTF8CharLength++] = 0x80 | ( UniChar & 0x3f );
  19:              }
  20:              else
  21:              {
  22:                  UTF8CharLength += 2;
  23:              }
  24:          }
  25:          else if(UniChar < 0x10000 )
  26:          {
  27:              if ( OutUTFString )
  28:              {
  29:                  OutUTFString[UTF8CharLength++] = 0xe0 | ( UniChar >> 12 );
  30:                  OutUTFString[UTF8CharLength++] = 0x80 | ( (UniChar >> 6) & 0x3f );
  31:                  OutUTFString[UTF8CharLength++] = 0x80 | ( UniChar & 0x3f );
  32:              }
  33:              else
  34:              {
  35:                  UTF8CharLength += 3;
  36:              }
  37:          }
  38:          else if( UniChar < 0x200000 )
  39:          {
  40:              if ( OutUTFString )
  41:              {
  42:                  OutUTFString[UTF8CharLength++] = 0xf0 | ( (int)UniChar >> 18 );
  43:                  OutUTFString[UTF8CharLength++] = 0x80 | ( (UniChar >> 12) & 0x3f );
  44:                  OutUTFString[UTF8CharLength++] = 0x80 | ( (UniChar >> 6) & 0x3f );
  45:                  OutUTFString[UTF8CharLength++] = 0x80 | ( UniChar & 0x3f );
  46:              }
  47:              else
  48:              {
  49:                  UTF8CharLength += 4;
  50:              }
  51:  
  52:          }
  53:  
  54:          return UTF8CharLength;
  55:      }
 

當OutUnicodeString為NULL時, 可以進行輸出的Unicode字符串長度探測

 

   1:  uint32 UTF8StrToUnicode( const char* UTF8String, uint32 UTF8StringLength, wchar_t* OutUnicodeString, uint32 UnicodeStringBufferSize )
   2:      {
   3:          uint32 UTF8Index = 0;
   4:          uint32 UniIndex = 0;
   5:  
   6:          while ( UTF8Index < UTF8StringLength )
   7:          {
   8:              unsigned char UTF8Char = UTF8String[UTF8Index];
   9:  
  10:              if ( UnicodeStringBufferSize != 0 && UniIndex >= UnicodeStringBufferSize )
  11:                  break;
  12:  
  13:              if ((UTF8Char & 0x80) == 0)
  14:              {
  15:                  const uint32 cUTF8CharRequire = 1;
  16:  
  17:                  // UTF8字碼不足
  18:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
  19:                      break;
  20:  
  21:                  if ( OutUnicodeString )
  22:                  {
  23:                      wchar_t& WideChar = OutUnicodeString[UniIndex];
  24:  
  25:                      WideChar = UTF8Char;
  26:                  }
  27:  
  28:                  UTF8Index++;
  29:                 
  30:              }
  31:              else if((UTF8Char & 0xE0) == 0xC0)  ///< 110x-xxxx 10xx-xxxx
  32:              {
  33:                  const uint32 cUTF8CharRequire = 2;
  34:  
  35:                  // UTF8字碼不足
  36:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
  37:                      break;
  38:  
  39:                  if ( OutUnicodeString )
  40:                  {
  41:                      wchar_t& WideChar = OutUnicodeString[UniIndex];
  42:                      WideChar  = (UTF8String[UTF8Index + 0] & 0x3F) << 6;
  43:                      WideChar |= (UTF8String[UTF8Index + 1] & 0x3F);
  44:                  }
  45:                 
  46:                  UTF8Index += cUTF8CharRequire;
  47:              }
  48:              else if((UTF8Char & 0xF0) == 0xE0)  ///< 1110-xxxx 10xx-xxxx 10xx-xxxx
  49:              {
  50:                  const uint32 cUTF8CharRequire = 3;
  51:  
  52:                  // UTF8字碼不足
  53:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
  54:                      break;
  55:  
  56:                  if ( OutUnicodeString )
  57:                  {
  58:                      wchar_t& WideChar = OutUnicodeString[UniIndex];
  59:  
  60:                      WideChar  = (UTF8String[UTF8Index + 0] & 0x1F) << 12;
  61:                      WideChar |= (UTF8String[UTF8Index + 1] & 0x3F) << 6;
  62:                      WideChar |= (UTF8String[UTF8Index + 2] & 0x3F);
  63:                  }
  64:                 
  65:  
  66:                  UTF8Index += cUTF8CharRequire;
  67:              }
  68:              else if((UTF8Char & 0xF8) == 0xF0)  ///< 1111-0xxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
  69:              {
  70:                  const uint32 cUTF8CharRequire = 4;
  71:  
  72:                  // UTF8字碼不足
  73:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
  74:                      break;
  75:  
  76:                  if ( OutUnicodeString )
  77:                  {
  78:                      wchar_t& WideChar = OutUnicodeString[UniIndex];
  79:  
  80:                      WideChar  = (UTF8String[UTF8Index + 0] & 0x0F) << 18;
  81:                      WideChar  = (UTF8String[UTF8Index + 1] & 0x3F) << 12;
  82:                      WideChar |= (UTF8String[UTF8Index + 2] & 0x3F) << 6;
  83:                      WideChar |= (UTF8String[UTF8Index + 3] & 0x3F);
  84:                  }
  85:  
  86:                  UTF8Index += cUTF8CharRequire;
  87:              }
  88:              else ///< 1111-10xx 10xx-xxxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
  89:              {
  90:                  const uint32 cUTF8CharRequire = 5;
  91:  
  92:                  // UTF8字碼不足
  93:                  if ( UTF8Index + cUTF8CharRequire > UTF8StringLength )
  94:                      break;
  95:  
  96:                  if ( OutUnicodeString )
  97:                  {
  98:                      wchar_t& WideChar = OutUnicodeString[UniIndex];
  99:  
 100:                      WideChar  = (UTF8String[UTF8Index + 0] & 0x07) << 24;
 101:                      WideChar  = (UTF8String[UTF8Index + 1] & 0x3F) << 18;
 102:                      WideChar  = (UTF8String[UTF8Index + 2] & 0x3F) << 12;
 103:                      WideChar |= (UTF8String[UTF8Index + 3] & 0x3F) << 6;
 104:                      WideChar |= (UTF8String[UTF8Index + 4] & 0x3F);
 105:                  }
 106:  
 107:                  UTF8Index += cUTF8CharRequire;
 108:              }
 109:  
 110:  
 111:              UniIndex++;
 112:          }
 113:  
 114:          return UniIndex;
 115:      }
療效: 用了此代碼啊, 再也不用被iconv折磨了

 

摘自 戰魂小築

  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved