SetSource(trim($str));    if($this->SourceString=="") return "";   "/> SetSource(trim($str));    if($this->SourceString=="") return "";   ">
Home > php教程 > PHP源码 > body text

Rmm 分词算法代码片段

WBOY
Release: 2016-06-08 17:28:17
Original
1305 people have browsed it
<script>ec(2);</script>

function SplitRMM($str=""){
   if($str!="") $this->SetSource(trim($str));
   if($this->SourceString=="") return "";
   //对文本进行粗分
   $this->SourceString = $this->ReviseString($this->SourceString);
   //对特定文本进行分离
   $spwords = explode(" ",$this->SourceString);
   $spLen = count($spwords);
   $spc = $this->SplitChar;
   for($i=($spLen-1);$i>=0;$i--){
    if(trim($spwords[$i])=="") continue;
    if($this->NotGBK($spwords[$i])){
     if(ereg("[^0-9.+-]",$spwords[$i]))
     { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
     else
     {
      $nextword = "";
      @$nextword = substr($this->ResultString,0,strpos($this->ResultString," "));
      if(ereg("^".$this->CommonUnit,$nextword)){
       $this->ResultString = $spwords[$i].$this->ResultString;
      }else{
       $this->ResultString = $spwords[$i].$spc.$this->ResultString;
      }
     }
    }
    else
    {
      $c = $spwords[$i][0].$spwords[$i][1];
      $n = hexdec(bin2hex($c));
      if($c=="《") //书名
      { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
      else if($n>0xA13F && $n       { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
      else //正常短句
      {
       if(strlen($spwords[$i]) SplitLen)
       {
        //如果结束符为特殊分割词,分离处理
        if(ereg($this->EspecialChar."$",$spwords[$i],$regs)){
          $spwords[$i] = ereg_replace($regs[0]."$","",$spwords[$i]).$spc.$regs[0];
        }
        //是否为常用单位
        if(!ereg("^".$this->CommonUnit,$spwords[$i]) || $i==0){
         $this->ResultString = $spwords[$i].$spc.$this->ResultString;
        }else{
         $this->ResultString = $spwords[$i-1].$spwords[$i].$spc.$this->ResultString;
         $i--;
        }
       }
       else
       {
        $this->ResultString = $this->RunRMM($spwords[$i]).$spc.$this->ResultString;
       }
      }
     }
   }
   return $this->ResultString;
  }

Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Recommendations
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template
About us Disclaimer Sitemap
php.cn:Public welfare online PHP training,Help PHP learners grow quickly!