Unicode Tutorials - Herong's Tutorial Examples - v5.32, by Herong Yang
EncodingSampler.java - Testing encode() Methods
This section provides a tutorial example on how to use 4 different methods provided in Java to encode characters with a given character encoding.
Java offers 4 methods to perform character encoding:
Here is a program that demonstrate how to encode characters with each of above 4 methods:
/* EncodingSampler2.java * Copyright (c) 2019 HerongYang.com. All Rights Reserved. */ import java.io.*; import java.nio.*; import java.nio.charset.*; class EncodingSampler2 { static String dfltCharset = null; static int[] chars={0x0000, 0x003F, 0x0040, 0x007F, 0x0080, 0x00BF, 0x00C0, 0x00FF, 0x0100, 0x3FFF, 0x4000, 0x7FFF, 0x8000, 0xBFFF, 0xC000, 0xEFFF, 0xF000, 0xFFFF, 0x1F108, 0x1F132, 0x1F1A0}; static char hexDigit[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; public static void main(String[] arg) { String charset = null; if (arg.length>0) charset = arg[0]; OutputStreamWriter o = new OutputStreamWriter( new ByteArrayOutputStream()); dfltCharset = o.getEncoding(); if (charset==null) System.out.println("Default ("+dfltCharset +") encoding:"); else System.out.println(charset+" encoding:"); System.out.println("Char, String, Writer, Charset, Encoder"); for (int i=0; i<chars.length; i++) { int c = chars[i]; byte[] b1 = encodeByString(c,charset); byte[] b2 = encodeByWriter(c,charset); byte[] b3 = encodeByCharset(c,charset); byte[] b4 = encodeByEncoder(c,charset); System.out.print(intToHex(c)+","); printBytes(b1); System.out.print(","); printBytes(b2); System.out.print(","); printBytes(b3); System.out.print(","); printBytes(b4); System.out.println(""); } } public static byte[] encodeByCharset(int c, String cs) { Charset cso = null; byte[] b = null; try { if (cs==null) cso = Charset.forName(dfltCharset); else cso = Charset.forName(cs); ByteBuffer bb = cso.encode(new String(Character.toChars(c))); b = copyBytes(bb.array(),bb.limit()); } catch (IllegalCharsetNameException e) { System.out.println(e.toString()); } return b; } public static byte[] encodeByEncoder(int c, String cs) { Charset cso = null; byte[] b = null; try { if (cs==null) cso = Charset.forName(dfltCharset); else cso = Charset.forName(cs); CharsetEncoder e = cso.newEncoder(); e.reset(); ByteBuffer bb = e.encode(CharBuffer.wrap(Character.toChars(c))); b = copyBytes(bb.array(),bb.limit()); } catch (IllegalCharsetNameException e) { System.out.println(e.toString()); } catch (CharacterCodingException e) { //System.out.println(e.toString()); b = new byte[] {(byte)0x00}; } return b; } public static byte[] encodeByString(int c, String cs) { String s = new String(Character.toChars(c)); byte[] b = null; if (cs==null) { b = s.getBytes(); } else { try { b = s.getBytes(cs); } catch (UnsupportedEncodingException e) { System.out.println(e.toString()); } } return b; } public static byte[] encodeByWriter(int c, String cs) { byte[] b = null; ByteArrayOutputStream bs = new ByteArrayOutputStream(); OutputStreamWriter o = null; if (cs==null) { o = new OutputStreamWriter(bs); } else { try { o = new OutputStreamWriter(bs, cs); } catch (UnsupportedEncodingException e) { System.out.println(e.toString()); } } String s = new String(Character.toChars(c)); try { o.write(s); o.flush(); b = bs.toByteArray(); o.close(); } catch (IOException e) { System.out.println(e.toString()); } return b; } public static byte[] copyBytes(byte[] a, int l) { byte[] b = new byte[l]; for (int i=0; i<Math.min(l,a.length); i++) b[i] = a[i]; return b; } public static void printBytes(byte[] b) { for (int j=0; j<b.length; j++) System.out.print(" "+byteToHex(b[j])); } public static String byteToHex(byte b) { char[] a = { hexDigit[(b >> 4) & 0x0f], hexDigit[b & 0x0f] }; return new String(a); } public static String charToHex(char c) { byte hi = (byte) (c >>> 8); byte lo = (byte) (c & 0xff); return byteToHex(hi) + byteToHex(lo); } public static String intToHex(int i) { char hi = (char) (i >>> 16); char lo = (char) (i & 0xffff); return charToHex(hi) + charToHex(lo); } }
Note that:
Table of Contents
ASCII Character Set and Encoding
GB2312 Character Set and Encoding
GB18030 Character Set and Encoding
JIS X0208 Character Set and Encodings
UTF-8 (Unicode Transformation Format - 8-Bit)
UTF-16, UTF-16BE and UTF-16LE Encodings
UTF-32, UTF-32BE and UTF-32LE Encodings
Python Language and Unicode Characters
Java Language and Unicode Characters
List of Supported Character Encodings in Java
►EncodingSampler.java - Testing encode() Methods
Examples of CP1252 and ISO-8859-1 Encodings
Examples of US-ASCII, UTF-8, UTF-16 and UTF-32 Encodings
Encoding Conversion Programs for Encoded Text Files
Using Notepad as a Unicode Text Editor
Using Microsoft Word as a Unicode Text Editor