EncodingSampler.java - Testing encode() Methods
<< Character Encoding in Java
<< Unicode Tutorials - Herong's Tutorial Notes
This section provides a tutorial example on how to use 4 different methods provided in JDK to encode characters with a given encoding.
JDK offers 4 methods to encode characters:
Here is a program that demonstrate how to encode characters with each of above 4 methods:
/** * EncodingSampler.java * Copyright (c) 2002 by Dr. Herong Yang */ import java.io.*; import java.nio.*; import java.nio.charset.*; class EncodingSampler { static String dfltCharset = null; static char[] chars={0x0000, 0x003F, 0x0040, 0x007F, 0x0080, 0x00BF, 0x00C0, 0x00FF, 0x0100, 0x3FFF, 0x4000, 0x7FFF, 0x8000, 0xBFFF, 0xC000, 0xEFFF, 0xF000, 0xFFFF}; static char hexDigit[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; public static void main(String[] arg) { String charset = null; if (arg.length>0) charset = arg[0]; OutputStreamWriter o = new OutputStreamWriter( new ByteArrayOutputStream()); dfltCharset = o.getEncoding(); if (charset==null) System.out.println("Default ("+dfltCharset +") encoding:"); else System.out.println(charset+" encoding:"); System.out.println("Char, String, Writer, Charset, Encoder"); for (int i=0; i<chars.length; i++) { char c = chars[i]; byte[] b1 = encodeByString(c,charset); byte[] b2 = encodeByWriter(c,charset); byte[] b3 = encodeByCharset(c,charset); byte[] b4 = encodeByEncoder(c,charset); System.out.print(charToHex(c)+","); printBytes(b1); System.out.print(","); printBytes(b2); System.out.print(","); printBytes(b3); System.out.print(","); printBytes(b4); System.out.println(""); } } public static byte[] encodeByCharset(char c, String cs) { Charset cso = null; byte[] b = null; try { if (cs==null) cso = Charset.forName(dfltCharset); else cso = Charset.forName(cs); ByteBuffer bb = cso.encode(String.valueOf(c)); b = copyBytes(bb.array(),bb.limit()); } catch (IllegalCharsetNameException e) { System.out.println(e.toString()); } return b; } public static byte[] encodeByEncoder(char c, String cs) { Charset cso = null; byte[] b = null; try { if (cs==null) cso = Charset.forName(dfltCharset); else cso = Charset.forName(cs); CharsetEncoder e = cso.newEncoder(); e.reset(); ByteBuffer bb = e.encode(CharBuffer.wrap(new char[] {c})); b = copyBytes(bb.array(),bb.limit()); } catch (IllegalCharsetNameException e) { System.out.println(e.toString()); } catch (CharacterCodingException e) { //System.out.println(e.toString()); b = new byte[] {(byte)0x00}; } return b; } public static byte[] encodeByString(char c, String cs) { String s = String.valueOf(c); byte[] b = null; if (cs==null) { b = s.getBytes(); } else { try { b = s.getBytes(cs); } catch (UnsupportedEncodingException e) { System.out.println(e.toString()); } } return b; } public static byte[] encodeByWriter(char c, String cs) { byte[] b = null; ByteArrayOutputStream bs = new ByteArrayOutputStream(); OutputStreamWriter o = null; if (cs==null) { o = new OutputStreamWriter(bs); } else { try { o = new OutputStreamWriter(bs, cs); } catch (UnsupportedEncodingException e) { System.out.println(e.toString()); } } String s = String.valueOf(c); try { o.write(s); o.flush(); b = bs.toByteArray(); o.close(); } catch (IOException e) { System.out.println(e.toString()); } return b; } public static byte[] copyBytes(byte[] a, int l) { byte[] b = new byte[l]; for (int i=0; i<Math.min(l,a.length); i++) b[i] = a[i]; return b; } public static void printBytes(byte[] b) { for (int j=0; j<b.length; j++) System.out.print(" "+byteToHex(b[j])); } public static String byteToHex(byte b) { char[] a = { hexDigit[(b >> 4) & 0x0f], hexDigit[b & 0x0f] }; return new String(a); } public static String charToHex(char c) { byte hi = (byte) (c >>> 8); byte lo = (byte) (c & 0xff); return byteToHex(hi) + byteToHex(lo); } }
Note that:
Sections in This Chapter
What Is Character Encoding?
Supported Character Encodings in JDK 1.4.1
Examples of CP1252 and ISO-8859-1 Encodings
Examples of US-ASCII, UTF-8, UTF-16 and UTF-16BE Encodings
Examples of GB18030 Encoding
Testing decode() Methods