Unicode Tutorials - Herong's Tutorial Notes
Dr. Herong Yang, Version 5.00

EncodingSampler.java - Testing encode() Methods

This section provides a tutorial example on how to use 4 different methods provided in JDK to encode characters with a given encoding.

JDK offers 4 methods to encode characters:

  • CharsetEncoder.encode()
  • Charset.encode()
  • String.getBytes()
  • OutputStreamWriter.write()

Here is a program that demonstrate how to encode characters with each of above 4 methods:

/**
 * EncodingSampler.java
 * Copyright (c) 2002 by Dr. Herong Yang
 */
import java.io.*;
import java.nio.*;
import java.nio.charset.*;
class EncodingSampler {
   static String dfltCharset = null;
   static char[] chars={0x0000, 0x003F, 0x0040, 0x007F, 0x0080, 0x00BF,
                        0x00C0, 0x00FF, 0x0100, 0x3FFF, 0x4000, 0x7FFF,
                        0x8000, 0xBFFF, 0xC000, 0xEFFF, 0xF000, 0xFFFF};
   static char hexDigit[] = {'0', '1', '2', '3', '4', '5', '6', '7',
                             '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
   public static void main(String[] arg) {
      String charset = null;
      if (arg.length>0) charset = arg[0];
      OutputStreamWriter o = new OutputStreamWriter(
         new ByteArrayOutputStream());
      dfltCharset = o.getEncoding();
      if (charset==null) System.out.println("Default ("+dfltCharset
         +") encoding:");
      else System.out.println(charset+" encoding:");
      System.out.println("Char, String, Writer, Charset, Encoder");
      for (int i=0; i<chars.length; i++) {
         char c = chars[i];
         byte[] b1 = encodeByString(c,charset);
         byte[] b2 = encodeByWriter(c,charset);
         byte[] b3 = encodeByCharset(c,charset);
         byte[] b4 = encodeByEncoder(c,charset);
         System.out.print(charToHex(c)+",");
         printBytes(b1);
         System.out.print(",");
         printBytes(b2);
         System.out.print(",");
         printBytes(b3);
         System.out.print(",");
         printBytes(b4);
         System.out.println("");
      }
   }
   public static byte[] encodeByCharset(char c, String cs) {
      Charset cso = null;
      byte[] b = null;
      try {   	
         if (cs==null) cso = Charset.forName(dfltCharset);
         else cso = Charset.forName(cs);
         ByteBuffer bb = cso.encode(String.valueOf(c));
         b = copyBytes(bb.array(),bb.limit());
      } catch (IllegalCharsetNameException e) {
         System.out.println(e.toString());
      }      	
      return b;
   }
   public static byte[] encodeByEncoder(char c, String cs) {
      Charset cso = null;
      byte[] b = null;
      try {   	
         if (cs==null) cso = Charset.forName(dfltCharset);
         else cso = Charset.forName(cs);
         CharsetEncoder e =  cso.newEncoder();
         e.reset();
         ByteBuffer bb = e.encode(CharBuffer.wrap(new char[] {c}));
         b = copyBytes(bb.array(),bb.limit());
      } catch (IllegalCharsetNameException e) {
         System.out.println(e.toString());
      } catch (CharacterCodingException e) {
         //System.out.println(e.toString());
         b = new byte[] {(byte)0x00};
      }      	
      return b;
   }
   public static byte[] encodeByString(char c, String cs) {
      String s = String.valueOf(c);
      byte[] b = null;
      if (cs==null) {
         b = s.getBytes();
      } else {
         try {
            b = s.getBytes(cs);
         } catch (UnsupportedEncodingException e) {
            System.out.println(e.toString());
         }
      }
      return b;
   }
   public static byte[] encodeByWriter(char c, String cs) {
      byte[] b = null;
      ByteArrayOutputStream bs = new ByteArrayOutputStream();
      OutputStreamWriter o = null;
      if (cs==null) {
         o = new OutputStreamWriter(bs);
      } else {
         try {
            o = new OutputStreamWriter(bs, cs);
         } catch (UnsupportedEncodingException e) {
            System.out.println(e.toString());
         }
      }
      String s = String.valueOf(c);
      try {
         o.write(s);
         o.flush();
         b = bs.toByteArray();
         o.close();
      } catch (IOException e) {
         System.out.println(e.toString());
      }
      return b;
   }
   public static byte[] copyBytes(byte[] a, int l) {
      byte[] b = new byte[l];
      for (int i=0; i<Math.min(l,a.length); i++) b[i] = a[i];
      return b;
   }
   public static void printBytes(byte[] b) {
      for (int j=0; j<b.length; j++)
         System.out.print(" "+byteToHex(b[j]));
   }
   public static String byteToHex(byte b) {
      char[] a = { hexDigit[(b >> 4) & 0x0f], hexDigit[b & 0x0f] };
      return new String(a);
   }
   public static String charToHex(char c) {
      byte hi = (byte) (c >>> 8);
      byte lo = (byte) (c & 0xff);
      return byteToHex(hi) + byteToHex(lo);
   }
}

Note that:

  • If the same encoding is used, each of the encode method in the program should return the exactly the same byte sequence.
  • getEncoding() is used on OuputStreamWriter class to get the name of the default encoding.
  • There is no way to know the name of the default encoding on String class.
  • There is no default instance of Charset and Encoder.
  • In encodeByEncoder(), 0x00 is used as the output when the given character can not be encoded by the encoder.

Sections in This Chapter

What Is Character Encoding?

Supported Character Encodings in JDK 1.4.1

EncodingSampler.java - Testing encode() Methods

Examples of CP1252 and ISO-8859-1 Encodings

Examples of US-ASCII, UTF-8, UTF-16 and UTF-16BE Encodings

Examples of GB18030 Encoding

Testing decode() Methods

Dr. Herong Yang, updated in 2009
EncodingSampler.java - Testing encode() Methods