phpで実装されたcurlバッチ処理の例 phpcurlダウンロードファイル phpcurl.dllダウンロード phpcurlシミュレートされたログイン

WBOY
リリース: 2016-07-29 08:52:20
オリジナル
1336 人が閲覧しました

curl は、コマンド ライン モードで動作する URL 構文を使用するオープン ソースのファイル転送ツールです。

この記事では、PHP でのcurl バッチ処理の例を実装します。

コードは次のとおりです:

<span>  1</span><span>header</span>("Content-Type:text/html;charset=utf8"<span>);
</span><span>  2</span><span>  3</span><span>/*</span><span> 先获取两个页面的所有a标签 </span><span>*/</span><span>  4</span><span>//</span><span> 初始化两个简单处理句柄</span><span>  5</span><span>$ch1</span> =<span> curl_init();
</span><span>  6</span><span>$ch2</span> =<span> curl_init();
</span><span>  7</span> curl_setopt_array(<span>$ch1</span>,<span>array</span><span>(
</span><span>  8</span>     CURLOPT_URL => 'http://www.sina.com.cn',
<span>  9</span>     CURLOPT_HEADER => 0,
<span> 10</span>     CURLOPT_RETURNTRANSFER => 1,
<span> 11</span><span>));
</span><span> 12</span> curl_setopt_array(<span>$ch2</span>,<span>array</span><span>(
</span><span> 13</span>     CURLOPT_URL => 'http://www.baidu.com/',
<span> 14</span>     CURLOPT_HEADER => 0,
<span> 15</span>     CURLOPT_RETURNTRANSFER => 1,
<span> 16</span><span>));
</span><span> 17</span><span> 18</span><span>//</span><span> 初始化批处理句柄,并添加简单处理句柄</span><span> 19</span><span>$mh</span> =<span> curl_multi_init();
</span><span> 20</span> curl_multi_add_handle(<span>$mh</span>,<span>$ch1</span><span>);
</span><span> 21</span> curl_multi_add_handle(<span>$mh</span>,<span>$ch2</span><span>);
</span><span> 22</span><span> 23</span><span>//</span><span> 初始化执行状态</span><span> 24</span><span>$state</span> = <span>null</span><span>;
</span><span> 25</span><span> 26</span><span>//</span><span> 执行批处理</span><span> 27</span><span>do</span><span>{
</span><span> 28</span><span>$mc</span> = curl_multi_exec(<span>$mh</span>,<span>$state</span><span>);    
</span><span> 29</span> }<span>while</span>(<span>$mc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 30</span><span>while</span>(<span>$mc</span> == CURLM_OK && <span>$state</span><span>) {
</span><span> 31</span><span>while</span> (curl_multi_exec(<span>$mh</span>, <span>$state</span>) ===<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 32</span><span>//</span><span> 经过实验,发现curl_multi_select($mh)总是返回-1,意味着一下代码不会执行</span><span> 33</span><span>if</span>(curl_multi_select(<span>$mh</span>) != -1<span>) {
</span><span> 34</span><span>do</span><span>{
</span><span> 35</span><span>$mc</span> = curl_multi_exec(<span>$mh</span>,<span>$state</span><span>);
</span><span> 36</span>         }<span>while</span>(<span>$mc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 37</span><span>    }    
</span><span> 38</span><span>}
</span><span> 39</span><span> 40</span><span>//</span><span> 获取内容</span><span> 41</span><span>$text</span>  = curl_multi_getcontent(<span>$ch1</span><span>);
</span><span> 42</span><span>$text</span> .= curl_multi_getcontent(<span>$ch2</span><span>);
</span><span> 43</span><span> 44</span><span>//</span><span> 找到页面中所有的a标签,保存到$matches</span><span> 45</span><span>$matches</span> = <span>null</span><span>;
</span><span> 46</span><span>preg_match_all</span>("/<a.*?href\s*?=\s*?[\'\"](.*?)[\'\"].*?>(.*?)<\/a>/",<span>$text</span>,<span>$matches</span><span>);
</span><span> 47</span><span> 48</span><span>//</span><span> 关闭各个句柄</span><span> 49</span> curl_multi_remove_handle(<span>$mh</span>,<span>$ch1</span><span>);
</span><span> 50</span> curl_multi_remove_handle(<span>$mh</span>,<span>$ch2</span><span>);
</span><span> 51</span> curl_multi_close(<span>$mh</span><span>);
</span><span> 52</span><span> 53</span><span>/*</span><span>在找到的连接中继续查找title标签 </span><span>*/</span><span> 54</span><span> 55</span><span>$handle</span> = <span>array</span>(); <span>//</span><span> 存储简单处理句柄的数组</span><span> 56</span><span>$mhandle</span> = curl_multi_init(); <span>//</span><span>批处理句柄
</span><span> 57</span><span>// 处理100个页面</span><span> 58</span><span>foreach</span>(<span>array_slice</span>(<span>$matches</span>[1],0,100) <span>as</span><span>$href</span><span>) {
</span><span> 59</span><span>$tmp_h</span> =<span> curl_init();
</span><span> 60</span>     curl_setopt_array(<span>$tmp_h</span>,<span>array</span><span>(
</span><span> 61</span>         CURLOPT_URL => <span>$href</span>,
<span> 62</span>         CURLOPT_HEADER => 0,
<span> 63</span>         CURLOPT_RETURNTRANSFER => 1,
<span> 64</span><span>    ));
</span><span> 65</span>     curl_multi_add_handle(<span>$mhandle</span>,<span>$tmp_h</span><span>);
</span><span> 66</span><span>$handle</span>[] = <span>$tmp_h</span><span>;
</span><span> 67</span><span>}
</span><span> 68</span><span>do</span><span>{
</span><span> 69</span><span>$mrc</span> = curl_multi_exec(<span>$mhandle</span>,<span>$active</span><span>);
</span><span> 70</span> }<span>while</span>(<span>$mrc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 71</span><span>while</span>(<span>$mrc</span> == CURLM_OK && <span>$active</span><span>) {
</span><span> 72</span><span>while</span>(curl_multi_exec(<span>$mhandle</span>,<span>$active</span>) ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 73</span><span>if</span>(curl_multi_select(<span>$mhandle</span>) != -1<span>) {
</span><span> 74</span><span>do</span><span>{
</span><span> 75</span><span>$mrc</span> = curl_multi_exec(<span>$mhandle</span>,<span>$active</span><span>);
</span><span> 76</span>         }<span>while</span>(<span>$mrc</span> ==<span> CURLM_CALL_MULTI_PERFORM);
</span><span> 77</span><span>    }
</span><span> 78</span><span>}
</span><span> 79</span><span> 80</span><span>//</span><span> 获取这些页面的内容</span><span> 81</span><span>$mtext</span> = <span>null</span><span>;
</span><span> 82</span><span>foreach</span>(<span>$handle</span><span>as</span><span>$tmp_h</span><span>) {
</span><span> 83</span><span>$mtext</span> .= curl_multi_getcontent(<span>$tmp_h</span><span>);
</span><span> 84</span>     curl_multi_remove_handle(<span>$mhandle</span>, <span>$tmp_h</span><span>);
</span><span> 85</span><span>}
</span><span> 86</span><span>$mmatches</span> = <span>array</span><span>();
</span><span> 87</span><span>preg_match_all</span>("/<title>(.*?)<\/title>/",<span>$mtext</span>, <span>$mmatches</span><span>);
</span><span> 88</span><span> 89</span><span>//</span><span> 编码转换</span><span> 90</span> mb_detect_order('GB2312,GBK,BIG5,GB18030,UNICODE ,CP936'<span>);
</span><span> 91</span><span>foreach</span>(<span>$mmatches</span>[1] <span>as</span><span>$key</span> => <span>$val</span><span>) {
</span><span> 92</span><span>$encoding</span> = mb_detect_encoding(<span>$val</span><span>);
</span><span> 93</span><span>if</span>(<span>$encoding</span> != 'UTF-8' && <span>$encoding</span> != 'CP936' && <span>$encoding</span> != 'GB18030' && <span>$encoding</span> !=''<span>) {
</span><span> 94</span><span>$mmatches</span>[1][<span>$key</span>] = <span>iconv</span>(<span>$encoding</span>,'UTF-8//IGNORE',<span>$val</span><span>);
</span><span> 95</span><span>    }
</span><span> 96</span><span>}
</span><span> 97</span><span> 98</span><span>//</span><span> 打印title信息</span><span> 99</span><span>var_dump</span>(<span>$mmatches</span>[1<span>]);
</span><span>100</span><span>101</span><span>//</span><span> 关闭批处理句柄</span><span>102</span> curl_multi_close(<span>$mhandle</span>);
ログイン後にコピー

上記では、curl と PHP の内容を含めて、PHP で実装されたcurl バッチ処理の例を紹介しました。PHP チュートリアルに興味のある友人に役立つことを願っています。

関連ラベル:
ソース:php.cn
このウェブサイトの声明
この記事の内容はネチズンが自主的に寄稿したものであり、著作権は原著者に帰属します。このサイトは、それに相当する法的責任を負いません。盗作または侵害の疑いのあるコンテンツを見つけた場合は、admin@php.cn までご連絡ください。
最新の問題
人気のチュートリアル
詳細>
最新のダウンロード
詳細>
ウェブエフェクト
公式サイト
サイト素材
フロントエンドテンプレート
私たちについて 免責事項 Sitemap
PHP中国語ウェブサイト:福祉オンライン PHP トレーニング,PHP 学習者の迅速な成長を支援します!