|
Character Set and Encoding
Part:
1
2
3
4
(Continued from previous part...)
Methods to Encode Characters
There are 4 methods to encode characters:
- CharsetEncoder.encode()
- Charset.encode()
- String.getBytes()
- OutputStreamWriter.write()
Here is a program that demonstrate how to encode characters
in each of the above 4 methods:
/**
* EncodingSampler.java
* Copyright (c) 2002 by Dr. Herong Yang
*/
import java.io.*;
import java.nio.*;
import java.nio.charset.*;
class EncodingSampler {
static String dfltCharset = null;
static char[] chars={0x0000, 0x003F, 0x0040, 0x007F, 0x0080, 0x00BF,
0x00C0, 0x00FF, 0x0100, 0x3FFF, 0x4000, 0x7FFF,
0x8000, 0xBFFF, 0xC000, 0xEFFF, 0xF000, 0xFFFF};
static char hexDigit[] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
public static void main(String[] arg) {
String charset = null;
if (arg.length>0) charset = arg[0];
OutputStreamWriter o = new OutputStreamWriter(
new ByteArrayOutputStream());
dfltCharset = o.getEncoding();
if (charset==null) System.out.println("Default ("+dfltCharset
+") encoding:");
else System.out.println(charset+" encoding:");
System.out.println("Char, String, Writer, Charset, Encoder");
for (int i=0; i<chars.length; i++) {
char c = chars[i];
byte[] b1 = encodeByString(c,charset);
byte[] b2 = encodeByWriter(c,charset);
byte[] b3 = encodeByCharset(c,charset);
byte[] b4 = encodeByEncoder(c,charset);
System.out.print(charToHex(c)+",");
printBytes(b1);
System.out.print(",");
printBytes(b2);
System.out.print(",");
printBytes(b3);
System.out.print(",");
printBytes(b4);
System.out.println("");
}
}
public static byte[] encodeByCharset(char c, String cs) {
Charset cso = null;
byte[] b = null;
try {
if (cs==null) cso = Charset.forName(dfltCharset);
else cso = Charset.forName(cs);
ByteBuffer bb = cso.encode(String.valueOf(c));
b = copyBytes(bb.array(),bb.limit());
} catch (IllegalCharsetNameException e) {
System.out.println(e.toString());
}
return b;
}
public static byte[] encodeByEncoder(char c, String cs) {
Charset cso = null;
byte[] b = null;
try {
if (cs==null) cso = Charset.forName(dfltCharset);
else cso = Charset.forName(cs);
CharsetEncoder e = cso.newEncoder();
e.reset();
ByteBuffer bb = e.encode(CharBuffer.wrap(new char[] {c}));
b = copyBytes(bb.array(),bb.limit());
} catch (IllegalCharsetNameException e) {
System.out.println(e.toString());
} catch (CharacterCodingException e) {
//System.out.println(e.toString());
b = new byte[] {(byte)0x00};
}
return b;
}
public static byte[] encodeByString(char c, String cs) {
String s = String.valueOf(c);
byte[] b = null;
if (cs==null) {
b = s.getBytes();
} else {
try {
b = s.getBytes(cs);
} catch (UnsupportedEncodingException e) {
System.out.println(e.toString());
}
}
return b;
}
public static byte[] encodeByWriter(char c, String cs) {
byte[] b = null;
ByteArrayOutputStream bs = new ByteArrayOutputStream();
OutputStreamWriter o = null;
if (cs==null) {
o = new OutputStreamWriter(bs);
} else {
try {
o = new OutputStreamWriter(bs, cs);
} catch (UnsupportedEncodingException e) {
System.out.println(e.toString());
}
}
String s = String.valueOf(c);
try {
o.write(s);
o.flush();
b = bs.toByteArray();
o.close();
} catch (IOException e) {
System.out.println(e.toString());
}
return b;
}
public static byte[] copyBytes(byte[] a, int l) {
byte[] b = new byte[l];
for (int i=0; i<Math.min(l,a.length); i++) b[i] = a[i];
return b;
}
public static void printBytes(byte[] b) {
for (int j=0; j<b.length; j++)
System.out.print(" "+byteToHex(b[j]));
}
public static String byteToHex(byte b) {
char[] a = { hexDigit[(b >> 4) & 0x0f], hexDigit[b & 0x0f] };
return new String(a);
}
public static String charToHex(char c) {
byte hi = (byte) (c >>> 8);
byte lo = (byte) (c & 0xff);
return byteToHex(hi) + byteToHex(lo);
}
}
(Continued on next part...)
Part:
1
2
3
4
|