• 技术文章 >php教程 >php手册

    完善的汉字转拼音php转换类

    2016-05-25 16:57:04原创1637

    办法是利用矩阵,汉字的组成方式是两个ascii字符,一个高位码,一个低位码,界限分别是128-264 64-128
    每个汉字拼音最长为8个字符,由此组成二维矩阵进行查询,弊端是无法解决多音字问题

    class pinyin{
    /*
    是否将拼音文件读取到内存内,损耗少许内存,几百kb的样子,速度可以略有提升,
    */
    var $ismemorycache = 1;
    /*
    是否只获取首字母
    */
    var $isfrist = 1;
    /*
    拼音矩阵文件地址
    */ 
    var $path = "py.qdb";
    /*
    内存拼音矩阵
    */
    var $memorycache;
    /*
    拼音文件句柄
    */
    var $handle;
    /*
    转换发生错误盒子
    */
    var $errormsgbox;
    /*
    转换结果
    */
    var $result;
    
    var $array = array();
    var $n_t = array("ā" => "a","á" => "a","ǎ" => "a","à" => "a","ɑ" => "a",
    "ō" => "o","ó" => "o","ǒ" => "o","ò" => "o",
    "ē" => "e","é" => "e","ě" => "e","è" => "e","ê" => "e",
    "ī" => "i","í" => "i","ǐ" => "i","ì" => "i",
    "ū" => "u","ú" => "u","ǔ" => "u","ù" => "u",
    "ǖ" => "v","ǘ" => "v","ǚ" => "v","ǜ" => "v","ü" => "v"
    );
    /*
    转换入口
    @params $str 所需转换字符,$istonemark 是否保留音标 $suffix 尾缀,默认为空格
    */ 
    function chinesetopinyin($str,$istonemark = 0,$suffix = ""){
    $this->py($str,$istonemark,$suffix);
    return $this -> result; 
    }
    function get(){
    return $this -> result;
    }
    
    function py($str,$n = 0,$s = ""){
    $strlength = strlen($str);
    if($strlength == 0){ return ""; }
    $this->result = "";
    if(is_array($str)){
    foreach($str as $key => $val){
    $str[$key] = $this->py($val,$n,$s);
    }
    return;
    }
    if(empty($this->handle)){
    if(!file_exists($this->path)){
    $this->addoneerrormsg(1,"拼音文件路径不存在");
    return false;
    }
    if(is_array($str)){
    foreach($str as $key => $val){
    $str[$key] = $this->py($val,$n,$s);
    }
    }
    
    if($this -> ismemorycache){
    if(!$this->memorycache){
    $this->memorycache = file_get_contents($this->path);
    for($i = 0 ; $i < $strlength ; $i++){
    $ord1 = ord(substr($str,$i,1));
    if($ord1 > 128){
    $ord2 = ord(substr($str, ++$i, 1));
    if(!isset($this->array[$ord1][$ord2])){
    $leng = ($ord1 - 129) * ((254 - 63) * 8 + 2) + ($ord2 - 64) * 8;
    $this->array[$ord1][$ord2] = trim(substr($this->memorycache,$leng,8));
    }
    $strtrlen = $this->isfrist ? 1 : 8;
    $this->result .= substr($this ->array[$ord1][$ord2],0,$strtrlen).$s;
    }else{
    $this->result .= substr($str,$i,1);
    }
    }
    }
    }else{
    $this->handle = fopen($this->path,"r");
    for($i = 0 ; $i < $strlength ; $i++){
    $ord1 = ord(substr($str,$i,1));
    if($ord1 > 128){
    $ord2 = ord(substr($str, ++$i, 1));
    if(!isset($this->array[$ord1][$ord2])){
    $leng = ($ord1 - 129) * ((254 - 63) * 8 + 2) + ($ord2 - 64) * 8;
    fseek($this -> handle,$leng);
    $this->array[$ord1][$ord2] = trim(fgets($this->handle,8));
    }
    $strtrlen = $this->isfrist ? 1 : 8;
    $this->result .= substr($this ->array[$ord1][$ord2],0,$strtrlen).$s;
    }else{ $this->result .= substr($str,$i,1); }
    }
    }
    if(!$n){ $this -> result = strtr($this -> result,$this -> n_t);}
    }
    }
    function addoneerrormsg($no,$reason){
    $this->errormsgbox[] = "error:" . $no . "," . $reason;
    }
    function showerrormsg(){
    foreach($this->errormsgbox as $val){
    echo $val."rnrn

    "; } } function __destruct(){ if(is_array($this->errormsgbox)){ $this->showerrormsg(); } } }

    之前遇见过这个难题,发现流传的代码都不怎么完善,汉字库总共有20k+的汉字,大多数的是拿几百个常用汉字打算糊弄过去,在火星文流传的今天,是不行的。
    还有种读取词典然后转换的,每行一个汉字|拼音,这种弊端非常大,速度慢,耗费巨大内存,仅仅explode一下读入数组,再循环一次,就能耗费上百m的内存,如果一个单页面耗费上百m,负载稍微大点只能泪奔了。

    永久地址:

    转载随意~请带上教程地址吧^^

    声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn核实处理。
    专题推荐:
    上一篇:php strtotime()与date()日期递减代码 下一篇:php初学者用文件上传实例
    千万级数据并发解决方案

    相关文章推荐

    • PHP中字符安全过滤函数使用总结• jquery获取多个checkbox的值异步提交给php的方法,jquerycheckbox• PHP数组和explode函数示例总结,数组explode• PHP随机生成唯一HASH值自定义函数,phphash自定义函数• php对文件进行hash运算的方法
    1/1

    PHP中文网