A copy of gb2312.txt (184799 bytes) is indeed too large and needs to be converted to unicode.
This comparison table is 51965 bytes, which is much smaller.
It is still very practical for situations where the iconv function library cannot be used.
//Use of comparison table
$filename = "gb2utf8.txt";
$fp = fopen($filename,"r");
while(! feof($fp)) {
list($gb,$utf8) = fgetcsv($fp,10);
$charset[$gb] = $utf8;
}
fclose($fp);
//Read the comparison table above into the array for later use
/**gb2312 to utf-8**/
function gb2utf8($text, &$charset) {
//Extract components from the text, Chinese characters are one element, and consecutive non-Chinese characters are one element
preg_match_all("/(?:[x80-xff].)|[x01-x7f]+/",$text, $tmp);
$tmp = $tmp[0];
//Separate Chinese characters
$ar = array_intersect($tmp, array_keys($charset));
//Replace Chinese character encoding
foreach($ar as $k=>$v)
$tmp[$k] = $charset[$v];
//Return the escaped string
return join( '',$tmp);
}
/**utf-8 to gb2312**/
function utf82gb($text, &$charset) {
$p = "/[xf0- xf7][x80-xbf]{3}|[xe0-xef][x80-xbf]{2}|[xc2-xdf][x80-xbf]|[x01-x7f]+/";
preg_match_all( $p,$text,$r);
$utf8 = array_flip($charset);
foreach($r[0] as $k=>$v)
if(isset($utf8 [$v]))
$r[0][$k] = $utf8[$v];
return join('',$r[0]);
}
//Test
$s = gb2utf8('This is a test of the lookup table', $charset);
echo utf82gb($s, $charset);
?>