Unicode Tutorials - Herong's Tutorial Examples - v5.32, by Herong Yang
EncodingSampler.java - Testing encode() Methods
This section provides a tutorial example on how to use 4 different methods provided in Java to encode characters with a given character encoding.
Java offers 4 methods to perform character encoding:
Here is a program that demonstrate how to encode characters with each of above 4 methods:
/* EncodingSampler2.java
* Copyright (c) 2019 HerongYang.com. All Rights Reserved.
*/
import java.io.*;
import java.nio.*;
import java.nio.charset.*;
class EncodingSampler2 {
static String dfltCharset = null;
static int[] chars={0x0000, 0x003F, 0x0040, 0x007F, 0x0080, 0x00BF,
0x00C0, 0x00FF, 0x0100, 0x3FFF, 0x4000, 0x7FFF,
0x8000, 0xBFFF, 0xC000, 0xEFFF, 0xF000, 0xFFFF,
0x1F108, 0x1F132, 0x1F1A0};
static char hexDigit[] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
public static void main(String[] arg) {
String charset = null;
if (arg.length>0) charset = arg[0];
OutputStreamWriter o = new OutputStreamWriter(
new ByteArrayOutputStream());
dfltCharset = o.getEncoding();
if (charset==null) System.out.println("Default ("+dfltCharset
+") encoding:");
else System.out.println(charset+" encoding:");
System.out.println("Char, String, Writer, Charset, Encoder");
for (int i=0; i<chars.length; i++) {
int c = chars[i];
byte[] b1 = encodeByString(c,charset);
byte[] b2 = encodeByWriter(c,charset);
byte[] b3 = encodeByCharset(c,charset);
byte[] b4 = encodeByEncoder(c,charset);
System.out.print(intToHex(c)+",");
printBytes(b1);
System.out.print(",");
printBytes(b2);
System.out.print(",");
printBytes(b3);
System.out.print(",");
printBytes(b4);
System.out.println("");
}
}
public static byte[] encodeByCharset(int c, String cs) {
Charset cso = null;
byte[] b = null;
try {
if (cs==null) cso = Charset.forName(dfltCharset);
else cso = Charset.forName(cs);
ByteBuffer bb = cso.encode(new String(Character.toChars(c)));
b = copyBytes(bb.array(),bb.limit());
} catch (IllegalCharsetNameException e) {
System.out.println(e.toString());
}
return b;
}
public static byte[] encodeByEncoder(int c, String cs) {
Charset cso = null;
byte[] b = null;
try {
if (cs==null) cso = Charset.forName(dfltCharset);
else cso = Charset.forName(cs);
CharsetEncoder e = cso.newEncoder();
e.reset();
ByteBuffer bb
= e.encode(CharBuffer.wrap(Character.toChars(c)));
b = copyBytes(bb.array(),bb.limit());
} catch (IllegalCharsetNameException e) {
System.out.println(e.toString());
} catch (CharacterCodingException e) {
//System.out.println(e.toString());
b = new byte[] {(byte)0x00};
}
return b;
}
public static byte[] encodeByString(int c, String cs) {
String s = new String(Character.toChars(c));
byte[] b = null;
if (cs==null) {
b = s.getBytes();
} else {
try {
b = s.getBytes(cs);
} catch (UnsupportedEncodingException e) {
System.out.println(e.toString());
}
}
return b;
}
public static byte[] encodeByWriter(int c, String cs) {
byte[] b = null;
ByteArrayOutputStream bs = new ByteArrayOutputStream();
OutputStreamWriter o = null;
if (cs==null) {
o = new OutputStreamWriter(bs);
} else {
try {
o = new OutputStreamWriter(bs, cs);
} catch (UnsupportedEncodingException e) {
System.out.println(e.toString());
}
}
String s = new String(Character.toChars(c));
try {
o.write(s);
o.flush();
b = bs.toByteArray();
o.close();
} catch (IOException e) {
System.out.println(e.toString());
}
return b;
}
public static byte[] copyBytes(byte[] a, int l) {
byte[] b = new byte[l];
for (int i=0; i<Math.min(l,a.length); i++) b[i] = a[i];
return b;
}
public static void printBytes(byte[] b) {
for (int j=0; j<b.length; j++)
System.out.print(" "+byteToHex(b[j]));
}
public static String byteToHex(byte b) {
char[] a = { hexDigit[(b >> 4) & 0x0f], hexDigit[b & 0x0f] };
return new String(a);
}
public static String charToHex(char c) {
byte hi = (byte) (c >>> 8);
byte lo = (byte) (c & 0xff);
return byteToHex(hi) + byteToHex(lo);
}
public static String intToHex(int i) {
char hi = (char) (i >>> 16);
char lo = (char) (i & 0xffff);
return charToHex(hi) + charToHex(lo);
}
}
Note that:
Table of Contents
ASCII Character Set and Encoding
GB2312 Character Set and Encoding
GB18030 Character Set and Encoding
JIS X0208 Character Set and Encodings
UTF-8 (Unicode Transformation Format - 8-Bit)
UTF-16, UTF-16BE and UTF-16LE Encodings
UTF-32, UTF-32BE and UTF-32LE Encodings
Python Language and Unicode Characters
Java Language and Unicode Characters
List of Supported Character Encodings in Java
►EncodingSampler.java - Testing encode() Methods
Examples of CP1252 and ISO-8859-1 Encodings
Examples of US-ASCII, UTF-8, UTF-16 and UTF-32 Encodings
Encoding Conversion Programs for Encoded Text Files
Using Notepad as a Unicode Text Editor
Using Microsoft Word as a Unicode Text Editor