curl は、コマンド ライン モードで動作する URL 構文を使用するオープン ソースのファイル転送ツールです。
この記事では、PHP でのcurl バッチ処理の例を実装します。
コードは次のとおりです:
<span> 1</span><span>header</span>("Content-Type:text/html;charset=utf8"<span>); </span><span> 2</span><span> 3</span><span>/*</span><span> 先获取两个页面的所有a标签 </span><span>*/</span><span> 4</span><span>//</span><span> 初始化两个简单处理句柄</span><span> 5</span><span>$ch1</span> =<span> curl_init(); </span><span> 6</span><span>$ch2</span> =<span> curl_init(); </span><span> 7</span> curl_setopt_array(<span>$ch1</span>,<span>array</span><span>( </span><span> 8</span> CURLOPT_URL => 'http://www.sina.com.cn', <span> 9</span> CURLOPT_HEADER => 0, <span> 10</span> CURLOPT_RETURNTRANSFER => 1, <span> 11</span><span>)); </span><span> 12</span> curl_setopt_array(<span>$ch2</span>,<span>array</span><span>( </span><span> 13</span> CURLOPT_URL => 'http://www.baidu.com/', <span> 14</span> CURLOPT_HEADER => 0, <span> 15</span> CURLOPT_RETURNTRANSFER => 1, <span> 16</span><span>)); </span><span> 17</span><span> 18</span><span>//</span><span> 初始化批处理句柄,并添加简单处理句柄</span><span> 19</span><span>$mh</span> =<span> curl_multi_init(); </span><span> 20</span> curl_multi_add_handle(<span>$mh</span>,<span>$ch1</span><span>); </span><span> 21</span> curl_multi_add_handle(<span>$mh</span>,<span>$ch2</span><span>); </span><span> 22</span><span> 23</span><span>//</span><span> 初始化执行状态</span><span> 24</span><span>$state</span> = <span>null</span><span>; </span><span> 25</span><span> 26</span><span>//</span><span> 执行批处理</span><span> 27</span><span>do</span><span>{ </span><span> 28</span><span>$mc</span> = curl_multi_exec(<span>$mh</span>,<span>$state</span><span>); </span><span> 29</span> }<span>while</span>(<span>$mc</span> ==<span> CURLM_CALL_MULTI_PERFORM); </span><span> 30</span><span>while</span>(<span>$mc</span> == CURLM_OK && <span>$state</span><span>) { </span><span> 31</span><span>while</span> (curl_multi_exec(<span>$mh</span>, <span>$state</span>) ===<span> CURLM_CALL_MULTI_PERFORM); </span><span> 32</span><span>//</span><span> 经过实验,发现curl_multi_select($mh)总是返回-1,意味着一下代码不会执行</span><span> 33</span><span>if</span>(curl_multi_select(<span>$mh</span>) != -1<span>) { </span><span> 34</span><span>do</span><span>{ </span><span> 35</span><span>$mc</span> = curl_multi_exec(<span>$mh</span>,<span>$state</span><span>); </span><span> 36</span> }<span>while</span>(<span>$mc</span> ==<span> CURLM_CALL_MULTI_PERFORM); </span><span> 37</span><span> } </span><span> 38</span><span>} </span><span> 39</span><span> 40</span><span>//</span><span> 获取内容</span><span> 41</span><span>$text</span> = curl_multi_getcontent(<span>$ch1</span><span>); </span><span> 42</span><span>$text</span> .= curl_multi_getcontent(<span>$ch2</span><span>); </span><span> 43</span><span> 44</span><span>//</span><span> 找到页面中所有的a标签,保存到$matches</span><span> 45</span><span>$matches</span> = <span>null</span><span>; </span><span> 46</span><span>preg_match_all</span>("/<a.*?href\s*?=\s*?[\'\"](.*?)[\'\"].*?>(.*?)<\/a>/",<span>$text</span>,<span>$matches</span><span>); </span><span> 47</span><span> 48</span><span>//</span><span> 关闭各个句柄</span><span> 49</span> curl_multi_remove_handle(<span>$mh</span>,<span>$ch1</span><span>); </span><span> 50</span> curl_multi_remove_handle(<span>$mh</span>,<span>$ch2</span><span>); </span><span> 51</span> curl_multi_close(<span>$mh</span><span>); </span><span> 52</span><span> 53</span><span>/*</span><span>在找到的连接中继续查找title标签 </span><span>*/</span><span> 54</span><span> 55</span><span>$handle</span> = <span>array</span>(); <span>//</span><span> 存储简单处理句柄的数组</span><span> 56</span><span>$mhandle</span> = curl_multi_init(); <span>//</span><span>批处理句柄 </span><span> 57</span><span>// 处理100个页面</span><span> 58</span><span>foreach</span>(<span>array_slice</span>(<span>$matches</span>[1],0,100) <span>as</span><span>$href</span><span>) { </span><span> 59</span><span>$tmp_h</span> =<span> curl_init(); </span><span> 60</span> curl_setopt_array(<span>$tmp_h</span>,<span>array</span><span>( </span><span> 61</span> CURLOPT_URL => <span>$href</span>, <span> 62</span> CURLOPT_HEADER => 0, <span> 63</span> CURLOPT_RETURNTRANSFER => 1, <span> 64</span><span> )); </span><span> 65</span> curl_multi_add_handle(<span>$mhandle</span>,<span>$tmp_h</span><span>); </span><span> 66</span><span>$handle</span>[] = <span>$tmp_h</span><span>; </span><span> 67</span><span>} </span><span> 68</span><span>do</span><span>{ </span><span> 69</span><span>$mrc</span> = curl_multi_exec(<span>$mhandle</span>,<span>$active</span><span>); </span><span> 70</span> }<span>while</span>(<span>$mrc</span> ==<span> CURLM_CALL_MULTI_PERFORM); </span><span> 71</span><span>while</span>(<span>$mrc</span> == CURLM_OK && <span>$active</span><span>) { </span><span> 72</span><span>while</span>(curl_multi_exec(<span>$mhandle</span>,<span>$active</span>) ==<span> CURLM_CALL_MULTI_PERFORM); </span><span> 73</span><span>if</span>(curl_multi_select(<span>$mhandle</span>) != -1<span>) { </span><span> 74</span><span>do</span><span>{ </span><span> 75</span><span>$mrc</span> = curl_multi_exec(<span>$mhandle</span>,<span>$active</span><span>); </span><span> 76</span> }<span>while</span>(<span>$mrc</span> ==<span> CURLM_CALL_MULTI_PERFORM); </span><span> 77</span><span> } </span><span> 78</span><span>} </span><span> 79</span><span> 80</span><span>//</span><span> 获取这些页面的内容</span><span> 81</span><span>$mtext</span> = <span>null</span><span>; </span><span> 82</span><span>foreach</span>(<span>$handle</span><span>as</span><span>$tmp_h</span><span>) { </span><span> 83</span><span>$mtext</span> .= curl_multi_getcontent(<span>$tmp_h</span><span>); </span><span> 84</span> curl_multi_remove_handle(<span>$mhandle</span>, <span>$tmp_h</span><span>); </span><span> 85</span><span>} </span><span> 86</span><span>$mmatches</span> = <span>array</span><span>(); </span><span> 87</span><span>preg_match_all</span>("/<title>(.*?)<\/title>/",<span>$mtext</span>, <span>$mmatches</span><span>); </span><span> 88</span><span> 89</span><span>//</span><span> 编码转换</span><span> 90</span> mb_detect_order('GB2312,GBK,BIG5,GB18030,UNICODE ,CP936'<span>); </span><span> 91</span><span>foreach</span>(<span>$mmatches</span>[1] <span>as</span><span>$key</span> => <span>$val</span><span>) { </span><span> 92</span><span>$encoding</span> = mb_detect_encoding(<span>$val</span><span>); </span><span> 93</span><span>if</span>(<span>$encoding</span> != 'UTF-8' && <span>$encoding</span> != 'CP936' && <span>$encoding</span> != 'GB18030' && <span>$encoding</span> !=''<span>) { </span><span> 94</span><span>$mmatches</span>[1][<span>$key</span>] = <span>iconv</span>(<span>$encoding</span>,'UTF-8//IGNORE',<span>$val</span><span>); </span><span> 95</span><span> } </span><span> 96</span><span>} </span><span> 97</span><span> 98</span><span>//</span><span> 打印title信息</span><span> 99</span><span>var_dump</span>(<span>$mmatches</span>[1<span>]); </span><span>100</span><span>101</span><span>//</span><span> 关闭批处理句柄</span><span>102</span> curl_multi_close(<span>$mhandle</span>);
上記では、curl と PHP の内容を含めて、PHP で実装されたcurl バッチ処理の例を紹介しました。PHP チュートリアルに興味のある友人に役立つことを願っています。