|
Managing Non ASCII Character Strings
Part:
1
2
3
4
5
(Continued from previous part...)
"mbstring" Basic Tests
I wrote the following script to test some of the "mbstring" basic functions, MbStringBasic.php:
<?php # MbStringBasic.php
# Copyright (c) 2006 by Dr. Herong Yang, http://www.herongyang.com/
#
print("\nCurrent settings:\n");
$settings = mb_get_info();
foreach ($settings as $k => $v) {
print " $k = ($v)\n";
}
print("\nEncoding detection:\n");
$str = "Hello!";
$coding = mb_detect_encoding($str);
print("1. ".$coding." for (\x".bin2hex($str).")\n");
$str = "\x00H\x00e\x00l\x00l\x00o\x00!";
$coding = mb_detect_encoding($str);
print("2. ".$coding." for (\x".bin2hex($str).")\n");
$str = "\xC2\xA1Hola!";
$coding = mb_detect_encoding($str);
print("3. ".$coding." for (\x".bin2hex($str).")\n");
$str = "\xE4\xBD\xA0\xE5\xA5\xBD!";
$coding = mb_detect_encoding($str);
print("4. ".$coding." for (\x".bin2hex($str).")\n");
$str = "\xC4\xE3\xBA\xC3\xA3\xA1";
$coding = mb_detect_encoding($str);
print("5. ".$coding." for (\x".bin2hex($str).")\n");
print("\nString length:\n");
$str = "Hello!";
$length = mb_strlen($str, "ASCII");
print("1. ".$length." for (\x".bin2hex($str).")\n");
$str = "\x00H\x00e\x00l\x00l\x00o\x00!";
$length = mb_strlen($str, "UTF-16");
print("2. ".$length." for (\x".bin2hex($str).")\n");
$str = "\xC2\xA1Hola!";
$length = mb_strlen($str, "UTF-8");
print("3. ".$length." for (\x".bin2hex($str).")\n");
$str = "\xE4\xBD\xA0\xE5\xA5\xBD!";
$length = mb_strlen($str, "UTF-8");
print("4. ".$length." for (\x".bin2hex($str).")\n");
$str = "\xC4\xE3\xBA\xC3\xA3\xA1";
$length = mb_strlen($str, "GB2312");
print("5. ".$length." for (\x".bin2hex($str).")\n");
print("\nString conversion - ASCII <--> UTF-16:\n");
$str = "Hello!";
print(" String in ASCII = (\x".bin2hex($str).")\n");
$str = mb_convert_encoding($str, "UTF-16", "ASCII");
print(" Converted to UTF-16 = (\x".bin2hex($str).")\n");
$str = mb_convert_encoding($str, "ASCII", "UTF-16");
print(" Converted to ASCII = (\x".bin2hex($str).")\n");
print("\nString conversion - UTF-8 <--> UTF-16:\n");
$str = "\xC2\xA1Hola!";
print(" String in UTF-8 = (\x".bin2hex($str).")\n");
$str = mb_convert_encoding($str, "UTF-16", "UTF-8");
print(" Converted to UTF-16 = (\x".bin2hex($str).")\n");
$str = mb_convert_encoding($str, "UTF-8", "UTF-16");
print(" Converted to UTF-8 = (\x".bin2hex($str).")\n");
print("\nString conversion - UTF-8 <--> GB2312:\n");
$str = "\xE4\xBD\xA0\xE5\xA5\xBD!";
print(" String in UTF-8 = (\x".bin2hex($str).")\n");
$str = mb_convert_encoding($str, "GB2312", "UTF-8");
print(" Converted to GB2312 = (\x".bin2hex($str).")\n");
$str = mb_convert_encoding($str, "UTF-8", "GB2312");
print(" Converted to UTF-8 = (\x".bin2hex($str).")\n");
print("\nString conversion - GB2312 <--> UTF-16:\n");
$str = "\xC4\xE3\xBA\xC3\xA3\xA1";
print(" String in GB2312 = (\x".bin2hex($str).")\n");
$str = mb_convert_encoding($str, "UTF-16", "GB2312");
print(" Converted to UTF-16 = (\x".bin2hex($str).")\n");
$str = mb_convert_encoding($str, "GB2312", "UTF-16");
print(" Converted to GB2312 = (\x".bin2hex($str).")\n");
?>
(Continued on next part...)
Part:
1
2
3
4
5
|