Home  >  Article  >  Backend Development  >  How to convert php utf-8 Chinese characters to pinyin

How to convert php utf-8 Chinese characters to pinyin

藏色散人
藏色散人Original
2020-10-20 09:57:473859browse

php How to convert utf-8 Chinese characters to Pinyin: first open the corresponding PHP code file; then use the CUtf8_PY class and the zh2py method to convert Pinyin characters.

How to convert php utf-8 Chinese characters to pinyin

Recommended: "PHP Video Tutorial"

PHP's UTF-8 Chinese to Pinyin processing class (performance has been improved Optimized to the extreme)

<?php
/**
 * PHP 汉字转拼音
 * @author Jerryli(hzjerry@gmail.com)
 * @version V0.20140715
 * @package SPFW.core.lib.final
 * @global SEA_PHP_FW_VAR_ENV
 * @example
 *echo CUtf8_PY::encode(&#39;阿里巴巴科技有限公司&#39;); //编码为拼音首字母
 *echo CUtf8_PY::encode(&#39;阿里巴巴科技有限公司&#39;, &#39;all&#39;); //编码为全拼音
 */
class CUtf8_PY {
/**
* 拼音字符转换图
* @var array
*/
private static $_aMaps = array(
&#39;a&#39;=>-20319,&#39;ai&#39;=>-20317,&#39;an&#39;=>-20304,&#39;ang&#39;=>-20295,&#39;ao&#39;=>-20292,
&#39;ba&#39;=>-20283,&#39;bai&#39;=>-20265,&#39;ban&#39;=>-20257,&#39;bang&#39;=>-20242,&#39;bao&#39;=>-20230,&#39;bei&#39;=>-20051,&#39;ben&#39;=>-20036,&#39;beng&#39;=>-20032,&#39;bi&#39;=>-20026,&#39;bian&#39;=>-20002,&#39;biao&#39;=>-19990,&#39;bie&#39;=>-19986,&#39;bin&#39;=>-19982,&#39;bing&#39;=>-19976,&#39;bo&#39;=>-19805,&#39;bu&#39;=>-19784,
&#39;ca&#39;=>-19775,&#39;cai&#39;=>-19774,&#39;can&#39;=>-19763,&#39;cang&#39;=>-19756,&#39;cao&#39;=>-19751,&#39;ce&#39;=>-19746,&#39;ceng&#39;=>-19741,&#39;cha&#39;=>-19739,&#39;chai&#39;=>-19728,&#39;chan&#39;=>-19725,&#39;chang&#39;=>-19715,&#39;chao&#39;=>-19540,&#39;che&#39;=>-19531,&#39;chen&#39;=>-19525,&#39;cheng&#39;=>-19515,&#39;chi&#39;=>-19500,&#39;chong&#39;=>-19484,&#39;chou&#39;=>-19479,&#39;chu&#39;=>-19467,&#39;chuai&#39;=>-19289,&#39;chuan&#39;=>-19288,&#39;chuang&#39;=>-19281,&#39;chui&#39;=>-19275,&#39;chun&#39;=>-19270,&#39;chuo&#39;=>-19263,&#39;ci&#39;=>-19261,&#39;cong&#39;=>-19249,&#39;cou&#39;=>-19243,&#39;cu&#39;=>-19242,&#39;cuan&#39;=>-19238,&#39;cui&#39;=>-19235,&#39;cun&#39;=>-19227,&#39;cuo&#39;=>-19224,
&#39;da&#39;=>-19218,&#39;dai&#39;=>-19212,&#39;dan&#39;=>-19038,&#39;dang&#39;=>-19023,&#39;dao&#39;=>-19018,&#39;de&#39;=>-19006,&#39;deng&#39;=>-19003,&#39;di&#39;=>-18996,&#39;dian&#39;=>-18977,&#39;diao&#39;=>-18961,&#39;die&#39;=>-18952,&#39;ding&#39;=>-18783,&#39;diu&#39;=>-18774,&#39;dong&#39;=>-18773,&#39;dou&#39;=>-18763,&#39;du&#39;=>-18756,&#39;duan&#39;=>-18741,&#39;dui&#39;=>-18735,&#39;dun&#39;=>-18731,&#39;duo&#39;=>-18722,
&#39;e&#39;=>-18710,&#39;en&#39;=>-18697,&#39;er&#39;=>-18696,
&#39;fa&#39;=>-18526,&#39;fan&#39;=>-18518,&#39;fang&#39;=>-18501,&#39;fei&#39;=>-18490,&#39;fen&#39;=>-18478,&#39;feng&#39;=>-18463,&#39;fo&#39;=>-18448,&#39;fou&#39;=>-18447,&#39;fu&#39;=>-18446,
&#39;ga&#39;=>-18239,&#39;gai&#39;=>-18237,&#39;gan&#39;=>-18231,&#39;gang&#39;=>-18220,&#39;gao&#39;=>-18211,&#39;ge&#39;=>-18201,&#39;gei&#39;=>-18184,&#39;gen&#39;=>-18183,&#39;geng&#39;=>-18181,&#39;gong&#39;=>-18012,&#39;gou&#39;=>-17997,&#39;gu&#39;=>-17988,&#39;gua&#39;=>-17970,&#39;guai&#39;=>-17964,&#39;guan&#39;=>-17961,&#39;guang&#39;=>-17950,&#39;gui&#39;=>-17947,&#39;gun&#39;=>-17931,&#39;guo&#39;=>-17928,
&#39;ha&#39;=>-17922,&#39;hai&#39;=>-17759,&#39;han&#39;=>-17752,&#39;hang&#39;=>-17733,&#39;hao&#39;=>-17730,&#39;he&#39;=>-17721,&#39;hei&#39;=>-17703,&#39;hen&#39;=>-17701,&#39;heng&#39;=>-17697,&#39;hong&#39;=>-17692,&#39;hou&#39;=>-17683,&#39;hu&#39;=>-17676,&#39;hua&#39;=>-17496,&#39;huai&#39;=>-17487,&#39;huan&#39;=>-17482,&#39;huang&#39;=>-17468,&#39;hui&#39;=>-17454,&#39;hun&#39;=>-17433,&#39;huo&#39;=>-17427,
&#39;ji&#39;=>-17417,&#39;jia&#39;=>-17202,&#39;jian&#39;=>-17185,&#39;jiang&#39;=>-16983,&#39;jiao&#39;=>-16970,&#39;jie&#39;=>-16942,&#39;jin&#39;=>-16915,&#39;jing&#39;=>-16733,&#39;jiong&#39;=>-16708,&#39;jiu&#39;=>-16706,&#39;ju&#39;=>-16689,&#39;juan&#39;=>-16664,&#39;jue&#39;=>-16657,&#39;jun&#39;=>-16647,
&#39;ka&#39;=>-16474,&#39;kai&#39;=>-16470,&#39;kan&#39;=>-16465,&#39;kang&#39;=>-16459,&#39;kao&#39;=>-16452,&#39;ke&#39;=>-16448,&#39;ken&#39;=>-16433,&#39;keng&#39;=>-16429,&#39;kong&#39;=>-16427,&#39;kou&#39;=>-16423,&#39;ku&#39;=>-16419,&#39;kua&#39;=>-16412,&#39;kuai&#39;=>-16407,&#39;kuan&#39;=>-16403,&#39;kuang&#39;=>-16401,&#39;kui&#39;=>-16393,&#39;kun&#39;=>-16220,&#39;kuo&#39;=>-16216,
&#39;la&#39;=>-16212,&#39;lai&#39;=>-16205,&#39;lan&#39;=>-16202,&#39;lang&#39;=>-16187,&#39;lao&#39;=>-16180,&#39;le&#39;=>-16171,&#39;lei&#39;=>-16169,&#39;leng&#39;=>-16158,&#39;li&#39;=>-16155,&#39;lia&#39;=>-15959,&#39;lian&#39;=>-15958,&#39;liang&#39;=>-15944,&#39;liao&#39;=>-15933,&#39;lie&#39;=>-15920,&#39;lin&#39;=>-15915,&#39;ling&#39;=>-15903,&#39;liu&#39;=>-15889,&#39;long&#39;=>-15878,&#39;lou&#39;=>-15707,&#39;lu&#39;=>-15701,&#39;lv&#39;=>-15681,&#39;luan&#39;=>-15667,&#39;lue&#39;=>-15661,&#39;lun&#39;=>-15659,&#39;luo&#39;=>-15652,
&#39;ma&#39;=>-15640,&#39;mai&#39;=>-15631,&#39;man&#39;=>-15625,&#39;mang&#39;=>-15454,&#39;mao&#39;=>-15448,&#39;me&#39;=>-15436,&#39;mei&#39;=>-15435,&#39;men&#39;=>-15419,&#39;meng&#39;=>-15416,&#39;mi&#39;=>-15408,&#39;mian&#39;=>-15394,&#39;miao&#39;=>-15385,&#39;mie&#39;=>-15377,&#39;min&#39;=>-15375,&#39;ming&#39;=>-15369,&#39;miu&#39;=>-15363,&#39;mo&#39;=>-15362,&#39;mou&#39;=>-15183,&#39;mu&#39;=>-15180,
&#39;na&#39;=>-15165,&#39;nai&#39;=>-15158,&#39;nan&#39;=>-15153,&#39;nang&#39;=>-15150,&#39;nao&#39;=>-15149,&#39;ne&#39;=>-15144,&#39;nei&#39;=>-15143,&#39;nen&#39;=>-15141,&#39;neng&#39;=>-15140,&#39;ni&#39;=>-15139,&#39;nian&#39;=>-15128,&#39;niang&#39;=>-15121,&#39;niao&#39;=>-15119,&#39;nie&#39;=>-15117,&#39;nin&#39;=>-15110,&#39;ning&#39;=>-15109,&#39;niu&#39;=>-14941,&#39;nong&#39;=>-14937,&#39;nu&#39;=>-14933,&#39;nv&#39;=>-14930,&#39;nuan&#39;=>-14929,&#39;nue&#39;=>-14928,&#39;nuo&#39;=>-14926,
&#39;o&#39;=>-14922,&#39;ou&#39;=>-14921,
&#39;pa&#39;=>-14914,&#39;pai&#39;=>-14908,&#39;pan&#39;=>-14902,&#39;pang&#39;=>-14894,&#39;pao&#39;=>-14889,&#39;pei&#39;=>-14882,&#39;pen&#39;=>-14873,&#39;peng&#39;=>-14871,&#39;pi&#39;=>-14857,&#39;pian&#39;=>-14678,&#39;piao&#39;=>-14674,&#39;pie&#39;=>-14670,&#39;pin&#39;=>-14668,&#39;ping&#39;=>-14663,&#39;po&#39;=>-14654,&#39;pu&#39;=>-14645,
&#39;qi&#39;=>-14630,&#39;qia&#39;=>-14594,&#39;qian&#39;=>-14429,&#39;qiang&#39;=>-14407,&#39;qiao&#39;=>-14399,&#39;qie&#39;=>-14384,&#39;qin&#39;=>-14379,&#39;qing&#39;=>-14368,&#39;qiong&#39;=>-14355,&#39;qiu&#39;=>-14353,&#39;qu&#39;=>-14345,&#39;quan&#39;=>-14170,&#39;que&#39;=>-14159,&#39;qun&#39;=>-14151,
&#39;ran&#39;=>-14149,&#39;rang&#39;=>-14145,&#39;rao&#39;=>-14140,&#39;re&#39;=>-14137,&#39;ren&#39;=>-14135,&#39;reng&#39;=>-14125,&#39;ri&#39;=>-14123,&#39;rong&#39;=>-14122,&#39;rou&#39;=>-14112,&#39;ru&#39;=>-14109,&#39;ruan&#39;=>-14099,&#39;rui&#39;=>-14097,&#39;run&#39;=>-14094,&#39;ruo&#39;=>-14092,
&#39;sa&#39;=>-14090,&#39;sai&#39;=>-14087,&#39;san&#39;=>-14083,&#39;sang&#39;=>-13917,&#39;sao&#39;=>-13914,&#39;se&#39;=>-13910,&#39;sen&#39;=>-13907,&#39;seng&#39;=>-13906,&#39;sha&#39;=>-13905,&#39;shai&#39;=>-13896,&#39;shan&#39;=>-13894,&#39;shang&#39;=>-13878,&#39;shao&#39;=>-13870,&#39;she&#39;=>-13859,&#39;shen&#39;=>-13847,&#39;sheng&#39;=>-13831,&#39;shi&#39;=>-13658,&#39;shou&#39;=>-13611,&#39;shu&#39;=>-13601,&#39;shua&#39;=>-13406,&#39;shuai&#39;=>-13404,&#39;shuan&#39;=>-13400,&#39;shuang&#39;=>-13398,&#39;shui&#39;=>-13395,&#39;shun&#39;=>-13391,&#39;shuo&#39;=>-13387,&#39;si&#39;=>-13383,&#39;song&#39;=>-13367,&#39;sou&#39;=>-13359,&#39;su&#39;=>-13356,&#39;suan&#39;=>-13343,&#39;sui&#39;=>-13340,&#39;sun&#39;=>-13329,&#39;suo&#39;=>-13326,
&#39;ta&#39;=>-13318,&#39;tai&#39;=>-13147,&#39;tan&#39;=>-13138,&#39;tang&#39;=>-13120,&#39;tao&#39;=>-13107,&#39;te&#39;=>-13096,&#39;teng&#39;=>-13095,&#39;ti&#39;=>-13091,&#39;tian&#39;=>-13076,&#39;tiao&#39;=>-13068,&#39;tie&#39;=>-13063,&#39;ting&#39;=>-13060,&#39;tong&#39;=>-12888,&#39;tou&#39;=>-12875,&#39;tu&#39;=>-12871,&#39;tuan&#39;=>-12860,&#39;tui&#39;=>-12858,&#39;tun&#39;=>-12852,&#39;tuo&#39;=>-12849,
&#39;wa&#39;=>-12838,&#39;wai&#39;=>-12831,&#39;wan&#39;=>-12829,&#39;wang&#39;=>-12812,&#39;wei&#39;=>-12802,&#39;wen&#39;=>-12607,&#39;weng&#39;=>-12597,&#39;wo&#39;=>-12594,&#39;wu&#39;=>-12585,
&#39;xi&#39;=>-12556,&#39;xia&#39;=>-12359,&#39;xian&#39;=>-12346,&#39;xiang&#39;=>-12320,&#39;xiao&#39;=>-12300,&#39;xie&#39;=>-12120,&#39;xin&#39;=>-12099,&#39;xing&#39;=>-12089,&#39;xiong&#39;=>-12074,&#39;xiu&#39;=>-12067,&#39;xu&#39;=>-12058,&#39;xuan&#39;=>-12039,&#39;xue&#39;=>-11867,&#39;xun&#39;=>-11861,
&#39;ya&#39;=>-11847,&#39;yan&#39;=>-11831,&#39;yang&#39;=>-11798,&#39;yao&#39;=>-11781,&#39;ye&#39;=>-11604,&#39;yi&#39;=>-11589,&#39;yin&#39;=>-11536,&#39;ying&#39;=>-11358,&#39;yo&#39;=>-11340,&#39;yong&#39;=>-11339,&#39;you&#39;=>-11324,&#39;yu&#39;=>-11303,&#39;yuan&#39;=>-11097,&#39;yue&#39;=>-11077,&#39;yun&#39;=>-11067,
&#39;za&#39;=>-11055,&#39;zai&#39;=>-11052,&#39;zan&#39;=>-11045,&#39;zang&#39;=>-11041,&#39;zao&#39;=>-11038,&#39;ze&#39;=>-11024,&#39;zei&#39;=>-11020,&#39;zen&#39;=>-11019,&#39;zeng&#39;=>-11018,&#39;zha&#39;=>-11014,&#39;zhai&#39;=>-10838,&#39;zhan&#39;=>-10832,&#39;zhang&#39;=>-10815,&#39;zhao&#39;=>-10800,&#39;zhe&#39;=>-10790,&#39;zhen&#39;=>-10780,&#39;zheng&#39;=>-10764,&#39;zhi&#39;=>-10587,&#39;zhong&#39;=>-10544,&#39;zhou&#39;=>-10533,&#39;zhu&#39;=>-10519,&#39;zhua&#39;=>-10331,&#39;zhuai&#39;=>-10329,&#39;zhuan&#39;=>-10328,&#39;zhuang&#39;=>-10322,&#39;zhui&#39;=>-10315,&#39;zhun&#39;=>-10309,&#39;zhuo&#39;=>-10307,&#39;zi&#39;=>-10296,&#39;zong&#39;=>-10281,&#39;zou&#39;=>-10274,&#39;zu&#39;=>-10270,&#39;zuan&#39;=>-10262,&#39;zui&#39;=>-10260,&#39;zun&#39;=>-10256,&#39;zuo&#39;=>-10254
);
 
    /**
     * 将中文编码成拼音
     * @param string $utf8Data utf8字符集数据
     * @param string $sRetFormat 返回格式 [head:首字母|all:全拼音]
     * @return string
     */
public static function encode($utf8Data, $sRetFormat=&#39;head&#39;){
$sGBK = iconv(&#39;UTF-8&#39;, &#39;GBK&#39;, $utf8Data);
$aBuf = array();
for ($i=0, $iLoop=strlen($sGBK); $i<$iLoop; $i++) {
$iChr = ord($sGBK{$i});
if ($iChr>160)
$iChr = ($iChr<<8) + ord($sGBK{++$i}) - 65536;
if (&#39;head&#39; === $sRetFormat)
$aBuf[] = substr(self::zh2py($iChr),0,1);
else
$aBuf[] = self::zh2py($iChr);
}
if (&#39;head&#39; === $sRetFormat)
return implode(&#39;&#39;, $aBuf);
else
return implode(&#39; &#39;, $aBuf);
}
 
/**
* 中文转换到拼音(每次处理一个字符)
* @param number $iWORD 待处理字符双字节
* @return string 拼音
*/
private static function zh2py($iWORD) {
if($iWORD>0 && $iWORD<160 ) {
return chr($iWORD);
} elseif ($iWORD<-20319||$iWORD>-10247) {
return &#39;&#39;;
} else {
foreach (self::$_aMaps as $py => $code) {
if($code > $iWORD) break;
$result = $py;
}
return $result;
}
}
}
?>

has optimized the traditional Pinyin conversion processing algorithm, specifically for UTF-8 character set processing.

If you use the GBK or GB2312 character set, you only need to remove the iconv function and assign the value directly without conversion.

Not much to say, just upload the code, it is completely open source and you can use it directly. If you find it useful, please help retain the author's information when using it.

The above is the detailed content of How to convert php utf-8 Chinese characters to pinyin. For more information, please follow other related articles on the PHP Chinese website!

Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn