關於unicode和utf的關系,可以簡單的記憶:Unicode是一個編碼組織、一個編碼規范、在java中指utf-16;utf是Unicode編碼的translation轉換格式,以便於很好地在網絡中傳遞、在存儲媒介匯總保存,於是utf存在多種格式,如8、16、32,而關聯le、te的區別,Unicode編碼格式才會有以下過程中的10種。
代碼如下:
public static void main(String[] args) throws UnsupportedEncodingException {
StringUtil.str2all("0 產品型號描述");
StringUtil.str4all("30000900A74EC1548B57F753CF63F08F");
}
/**
* 嘗試所有編碼格式對十六進制數字字符串進行編碼
*
* @param hexStr
* @throws UnsupportedEncodingException
*/
public static void str4all(String uStr) throws UnsupportedEncodingException{
System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");
byte[] bs = new byte[uStr.length()/2];
for (int i = 0; i < bs.length; i++) {
bs[i] = (byte) Integer.parseInt(uStr.substring(i*2, i*2+2), 16);
}
System.out.println(new String(bs, "utf-8"));
// 16
System.out.println(new String(bs, "utf-16")); // 同unicode
System.out.println(new String(bs, "utf-16le"));
System.out.println(new String(bs, "x-utf-16le-bom"));
System.out.println(new String(bs, "utf-16be"));
// System.out.println(new String(bs, "x-utf-16be-bom")); // UnsupportedEncodingException
// 32
System.out.println(new String(bs, "utf-32"));
System.out.println(new String(bs, "utf-32le"));
System.out.println(new String(bs, "x-utf-32le-bom"));
System.out.println(new String(bs, "utf-32be"));
System.out.println(new String(bs, "x-utf-32le-bom"));
}
/**
* 列出所有編碼對應的解碼後的十六進制數字字符串
*
* @param uStr
* @throws UnsupportedEncodingException
*/
public static void str2all(String uStr) throws UnsupportedEncodingException{
System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");
byte[] bs = new byte[]{};
bs = uStr.getBytes("utf-8");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
// 16
bs = uStr.getBytes("utf-16");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
bs = uStr.getBytes("utf-16le");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
bs = uStr.getBytes("x-utf-16le-bom");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
bs = uStr.getBytes("utf-16be");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
// bs = uStr.getBytes("x-utf-16be-bom"); // UnsupportedEncodingException
// 32
bs = uStr.getBytes("utf-32");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
bs = uStr.getBytes("utf-32le");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
bs = uStr.getBytes("x-utf-32le-bom");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
bs = uStr.getBytes("utf-32be");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
bs = uStr.getBytes("x-utf-32le-bom");
for(byte b:bs){
System.out.print(Integer.toHexString(b & 0xff));
}
System.out.println();
}
編碼名稱收集
代碼如下:
charset US-ASCII %S
historicalName ASCII
# IANA aliases
alias iso-ir-6
alias ANSI_X3.4-1986
alias ISO_646.irv:1991
alias ASCII
alias ISO646-US
alias us
alias IBM367
alias cp367
alias csASCII
alias default
# Other aliases
alias 646 # Solaris POSIX locale
alias iso_646.irv:1983
alias ANSI_X3.4-1968 # Linux POSIX locale (RedHat)
alias ascii7
charset UTF-8 UTF_8
historicalName UTF8
alias UTF8
alias unicode-1-1-utf-8
charset UTF-16 UTF_16
historicalName UTF-16
alias UTF_16
alias utf16
alias unicode
alias UnicodeBig
charset UTF-16BE UTF_16BE
historicalName UnicodeBigUnmarked
alias UTF_16BE
alias ISO-10646-UCS-2
alias X-UTF-16BE
alias UnicodeBigUnmarked
charset UTF-16LE UTF_16LE
historicalName UnicodeLittleUnmarked
alias UTF_16LE
alias X-UTF-16LE
alias UnicodeLittleUnmarked
charset x-UTF-16LE-BOM UTF_16LE_BOM
historicalName UnicodeLittle
alias UnicodeLittle
charset UTF-32 UTF_32
alias UTF_32
alias UTF32
charset UTF-32LE UTF_32LE
alias UTF_32LE
alias X-UTF-32LE
charset UTF-32BE UTF_32BE
alias UTF_32BE
alias X-UTF-32BE
charset X-UTF-32LE-BOM UTF_32LE_BOM
alias UTF_32LE_BOM
alias UTF-32LE-BOM
charset X-UTF-32BE-BOM UTF_32BE_BOM
alias UTF_32BE_BOM
alias UTF-32BE-BOM
charset ISO-8859-1 %S
historicalName ISO8859_1
# IANA aliases
alias iso-ir-100
alias ISO_8859-1
alias latin1
alias l1
alias IBM819
alias cp819
alias csISOLatin1
# Other aliases
alias 819
alias IBM-819
alias ISO8859_1
alias ISO_8859-1:1987
alias ISO_8859_1
alias 8859_1
alias ISO8859-1
charset ISO-8859-2 %S
historicalName ISO8859_2
alias iso8859_2
alias 8859_2
alias iso-ir-101
alias ISO_8859-2
alias ISO_8859-2:1987
alias ISO8859-2
alias latin2
alias l2
alias ibm912
alias ibm-912
alias cp912
alias 912
alias csISOLatin2
charset ISO-8859-4 %S
historicalName ISO8859_4
alias iso8859_4
alias iso8859-4
alias 8859_4
alias iso-ir-110
alias ISO_8859-4
alias ISO_8859-4:1988
alias latin4
alias l4
alias ibm914
alias ibm-914
alias cp914
alias 914
alias csISOLatin4
charset ISO-8859-5 %S
historicalName ISO8859_5
alias iso8859_5
alias 8859_5
alias iso-ir-144
alias ISO_8859-5
alias ISO_8859-5:1988
alias ISO8859-5
alias cyrillic
alias ibm915
alias ibm-915
alias cp915
alias 915
alias csISOLatinCyrillic
charset ISO-8859-7 %S
historicalName ISO8859_7
alias iso8859_7
alias 8859_7
alias iso-ir-126
alias ISO_8859-7
alias ISO_8859-7:1987
alias ELOT_928
alias ECMA-118
alias greek
alias greek8
alias csISOLatinGreek
alias sun_eu_greek # Solaris 7/8 compatibility
alias ibm813
alias ibm-813
alias 813
alias cp813
alias iso8859-7 # Solaris 9 compatibility
charset ISO-8859-9 %S
historicalName ISO8859_9
alias iso8859_9
alias 8859_9
alias iso-ir-148
alias ISO_8859-9
alias ISO_8859-9:1989
alias ISO8859-9
alias latin5
alias l5
alias ibm920
alias ibm-920
alias 920
alias cp920
alias csISOLatin5
charset ISO-8859-13 %S
historicalName ISO8859_13
alias iso8859_13
alias 8859_13
alias iso_8859-13
alias ISO8859-13
charset ISO-8859-15 %S
historicalName ISO8859_15
# IANA alias
alias ISO_8859-15
# Other aliases
alias 8859_15
alias ISO8859_15
alias ISO8859-15
alias IBM923
alias IBM-923
alias cp923
alias 923
alias LATIN0
alias LATIN9
alias L9
alias csISOlatin0
alias csISOlatin9
alias ISO8859_15_FDIS
charset KOI8-R %S
historicalName KOI8_R
alias koi8_r
alias koi8
alias cskoi8r
charset KOI8-U %S
alias koi8_u
charset windows-1250 %S
historicalName Cp1250
alias cp1250
alias cp5346 # Euro IBM CCSID
charset windows-1251 %S
historicalName Cp1251
alias cp1251
alias cp5347 # Euro IBM CCSID
alias ansi-1251 # Solaris compatibility
charset windows-1252 %S
historicalName Cp1252
alias cp1252
alias cp5348 # Euro IBM CCSID
charset windows-1253 %S
historicalName Cp1253
alias cp1253
alias cp5349 # Euro IBM CCSID
charset windows-1254 %S
historicalName Cp1254
alias cp1254
alias cp5350 # Euro IBM CCSID
charset windows-1257 %S
historicalName Cp1257
alias cp1257
alias cp5353 # Euro IBM CCSID
charset IBM437 %S
historicalName Cp437
alias cp437
alias ibm-437
alias 437
alias cspc8codepage437
alias windows-437
charset x-IBM737 %S
historicalName Cp737
alias cp737
alias ibm737
alias ibm-737
alias 737
charset IBM775 %S
historicalName Cp775
alias cp775
alias ibm-775
alias 775
charset IBM850 %S
historicalName Cp850
alias cp850
alias ibm-850
alias 850
alias cspc850multilingual
charset IBM852 %S
historicalName Cp852
alias cp852
alias ibm-852
alias 852
alias csPCp852
charset IBM855 %S
historicalName Cp855
alias cp855
alias ibm-855
alias 855
alias cspcp855
charset IBM857 %S
historicalName Cp857
alias cp857
alias ibm-857
alias 857
alias csIBM857
charset IBM00858 %S
historicalName Cp858
alias cp858
alias ccsid00858
alias cp00858
alias 858
charset IBM862 %S
historicalName Cp862
alias cp862
alias ibm-862
alias 862
alias csIBM862
alias cspc862latinhebrew
charset IBM866 %S
historicalName Cp866
alias cp866
alias ibm-866
alias 866
alias csIBM866
charset x-IBM874 %S
historicalName Cp874
alias cp874
alias ibm874
alias ibm-874
alias 874