• 技术文章 >后端开发 >php教程

    php各种优酷,土豆,视频封装类

    2016-07-25 08:49:37原创472
    php各种视频采集类封装,优酷,土豆,腾讯视频、56、搜狐视频、酷六、新浪视频、乐视.......

    九弄 jigwang.com
    1. /**
    2. * Video
    3. *
    4. * @package
    5. * @version 1.3
    6. * @copyright 2011 - 2013
    7. *
    8. * Usage
    9. * require_once "VideoUrlParser.class.php";
    10. * $urls[] = "http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html";
    11. * $urls[] = "http://www.tudou.com/playlist/p/l13087099.html";
    12. * $urls[] = "http://www.tudou.com/programs/view/ufg-A3tlcxk/";
    13. * $urls[] = "http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html";
    14. * $urls[] = "http://www.56.com/u68/v_NjI2NTkxMzc.html";
    15. * $urls[] = "http://www.letv.com/ptv/vplay/1168109.html";
    16. * $urls[] = "http://video.sina.com.cn/v/b/46909166-1290055681.html";
    17. *
    18. * foreach($urls as $url){
    19. * $info = VideoUrlParser::parse($url);
    20. * //var_dump($info);
    21. * echo "{$info['title']}";
    22. * echo "
      ";
    23. * echo $info['object'];
    24. * echo "
      ";
    25. * }
    26. *
    27. *
    28. *
    29. * //优酷
    30. * http://v.youku.com/v_show/id_XMjU0NjY4OTEy.html
    31. *
    32. *
    33. * //酷六
    34. * http://v.ku6.com/special/show_3917484/x0BMXAbgZdQS6FqN.html
    35. *
    36. *
    37. * //土豆
    38. * http://www.tudou.com/playlist/p/a65929.html?iid=74905844
    39. *
    40. *
    41. * //56
    42. * http://www.56.com/u98/v_NTkyODY2NTU.html
    43. *
    44. *
    45. * //新浪播客
    46. * http://video.sina.com.cn/v/b/46909166-1290055681.html
    47. *
    48. *
    49. * //乐视
    50. * http://www.letv.com/ptv/vplay/1168109.html
    51. *
    52. */
    53. class VideoUrlparser
    54. {
    55. const USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko)
    56. Chrome/8.0.552.224 Safari/534.10";
    57. const CHECK_URL_VALID = "/(youku\.com|tudou\.com|ku6\.com|56\.com|letv\.com|video\.sina\.com\.cn|(my\.)?tv\.sohu\.com|v\.qq\.com)/";
    58. /**
    59. * parse
    60. *
    61. * @param string $url
    62. * @param mixed $createObject
    63. * @static
    64. * @access public
    65. * @return void
    66. */
    67. static public function parse($url='', $createObject=true){
    68. $lowerurl = strtolower($url);
    69. preg_match(self::CHECK_URL_VALID, $lowerurl, $matches);
    70. if(!$matches) return false;
    71. switch($matches[1]){
    72. case 'youku.com':
    73. $data = self::_parseYouku($url);
    74. break;
    75. case 'tudou.com':
    76. $data = self::_parseTudou($url);
    77. break;
    78. case 'ku6.com':
    79. $data = self::_parseKu6($url);
    80. break;
    81. case '56.com':
    82. $data = self::_parse56($url);
    83. break;
    84. case 'letv.com':
    85. $data = self::_parseLetv($url);
    86. break;
    87. case 'video.sina.com.cn':
    88. $data = self::_parseSina($url);
    89. break;
    90. case 'my.tv.sohu.com':
    91. case 'tv.sohu.com':
    92. case 'sohu.com':
    93. $data = self::_parseSohu($url);
    94. break;
    95. case 'v.qq.com':
    96. $data = self::_parseQq($url);
    97. break;
    98. default:
    99. $data = false;
    100. }
    101. if($data && $createObject) $data['object'] = "";
    102. return $data;
    103. }
    104. /**
    105. * 腾讯视频
    106. * http://v.qq.com/cover/o/o9tab7nuu0q3esh.html?vid=97abu74o4w3_0
    107. * http://v.qq.com/play/97abu74o4w3.html
    108. * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o.html
    109. * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o/9SfqULsrtSb.html
    110. * http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?_v=20110829&vid=97abu74o4w3&autoplay=1&list=2&showcfg=1&tpid=23&title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&adplay=1&cid=o9tab7nuu0q3esh
    111. */
    112. private static function _parseQq($url){
    113. if(preg_match("/\/play\//", $url)){
    114. $html = self::_fget($url);
    115. preg_match("/url=[^\"]+/", $html, $matches);
    116. if(!$matches); return false;
    117. $url = $matches[0];
    118. }
    119. preg_match("/vid=([^\_]+)/", $url, $matches);
    120. $vid = $matches[1];
    121. $html = self::_fget($url);
    122. // query
    123. preg_match("/flashvars\s=\s\"([^;]+)/s", $html, $matches);
    124. $query = $matches[1];
    125. if(!$vid){
    126. preg_match("/vid\s?=\s?vid\s?\|\|\s?\"(\w+)\";/i", $html, $matches);
    127. $vid = $matches[1];
    128. }
    129. $query = str_replace('"+vid+"', $vid, $query);
    130. parse_str($query, $output);
    131. $data['img'] = "http://vpic.video.qq.com/{$$output['cid']}/{$vid}_1.jpg";
    132. $data['url'] = $url;
    133. $data['title'] = $output['title'];
    134. $data['swf'] = "http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?".$query;
    135. return $data;
    136. }
    137. /*
    138. * 优酷网
    139. * http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html
    140. * http://player.youku.com/player.php/sid/XMjU0NjI2Njg4/v.swf
    141. */
    142. private static function _parseYouku($url){
    143. preg_match("#id\_(\w+)#", $url, $matches);
    144. if (empty($matches)){
    145. preg_match("#v_playlist\/#", $url, $mat);
    146. if(!$mat) return false;
    147. $html = self::_fget($url);
    148. preg_match("#videoId2\s*=\s*\'(\w+)\'#", $html, $matches);
    149. if(!$matches) return false;
    150. }
    151. $link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&ran=2513&n=3";
    152. $retval = self::_cget($link);
    153. if ($retval) {
    154. $json = json_decode($retval, true);
    155. $data['img'] = $json['data'][0]['logo'];
    156. $data['title'] = $json['data'][0]['title'];
    157. $data['url'] = $url;
    158. $data['swf'] = "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf";
    159. // echo "
      "; var_dump($data);	
    160. return $data;
    161. } else {
    162. return false;
    163. }
    164. }
    165. /**
    166. * 土豆网
    167. * http://www.tudou.com/programs/view/Wtt3FjiDxEE/
    168. * http://www.tudou.com/v/Wtt3FjiDxEE/v.swf
    169. *
    170. * http://www.tudou.com/playlist/p/a65718.html?iid=74909603
    171. * http://www.tudou.com/l/G5BzgI4lAb8/&iid=74909603/v.swf
    172. */
    173. private static function _parseTudou($url){
    174. preg_match("#view/([-\w]+)/#", $url, $matches);
    175. if (empty($matches)) {
    176. if (strpos($url, "/playlist/") == false) return false;
    177. if(strpos($url, 'iid=') !== false){
    178. $quarr = explode("iid=", $lowerurl);
    179. if (empty($quarr[1])) return false;
    180. }elseif(preg_match("#p\/l(\d+).#", $lowerurl, $quarr)){
    181. if (empty($quarr[1])) return false;
    182. }
    183. $html = self::_fget($url);
    184. $html = iconv("GB2312", "UTF-8", $html);
    185. preg_match("/lid_code\s=\slcode\s=\s[\'\"]([^\'\"]+)/s", $html, $matches);
    186. $icode = $matches[1];
    187. preg_match("/iid\s=\s.*?\|\|\s(\d+)/sx", $html, $matches);
    188. $iid = $matches[1];
    189. preg_match("/listData\s=\s(\[\{.*\}\])/sx", $html, $matches);
    190. $find = array("/\n/", '/\s/', "/:[^\d\"]\w+[^\,]*,/i", "/(\{|,)(\w+):/");
    191. $replace = array("", "", ':"",', '\\1"\\2":');
    192. $str = preg_replace($find, $replace, $matches[1]);
    193. //var_dump($str);
    194. $json = json_decode($str);
    195. //var_dump($json);exit;
    196. if(is_array($json) || is_object($json) && !empty($json)){
    197. foreach ($json as $val) {
    198. if ($val->iid == $iid) {
    199. break;
    200. }
    201. }
    202. }
    203. $data['img'] = $val->pic;
    204. $data['title'] = $val->title;
    205. $data['url'] = $url;
    206. $data['swf'] = "http://www.tudou.com/l/{$icode}/&iid={$iid}/v.swf";
    207. return $data;
    208. }
    209. $host = "www.tudou.com";
    210. $path = "/v/{$matches[1]}/v.swf";
    211. $ret = self::_fsget($path, $host);
    212. if (preg_match("#\nLocation: (.*)\n#", $ret, $mat)) {
    213. parse_str(parse_url(urldecode($mat[1]), PHP_URL_QUERY));
    214. $data['img'] = $snap_pic;
    215. $data['title'] = $title;
    216. $data['url'] = $url;
    217. $data['swf'] = "http://www.tudou.com/v/{$matches[1]}/v.swf";
    218. return $data;
    219. }
    220. return false;
    221. }
    222. /**
    223. * 酷6网
    224. * http://v.ku6.com/film/show_520/3X93vo4tIS7uotHg.html
    225. * http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html
    226. * http://v.ku6.com/show/7US-kDXjyKyIInDevhpwHg...html
    227. * http://player.ku6.com/refer/3X93vo4tIS7uotHg/v.swf
    228. */
    229. private static function _parseKu6($url){
    230. if(preg_match("/show\_/", $url)){
    231. preg_match("#/([-\w]+)\.html#", $url, $matches);
    232. $url = "http://v.ku6.com/fetchVideo4Player/{$matches[1]}.html";
    233. $html = self::_fget($url);
    234. if ($html) {
    235. $json = json_decode($html, true);
    236. if(!$json) return false;
    237. $data['img'] = $json['data']['picpath'];
    238. $data['title'] = $json['data']['t'];
    239. $data['url'] = $url;
    240. $data['swf'] = "http://player.ku6.com/refer/{$matches[1]}/v.swf";
    241. return $data;
    242. } else {
    243. return false;
    244. }
    245. }elseif(preg_match("/show\//", $url, $matches)){
    246. $html = self::_fget($url);
    247. preg_match("/ObjectInfo\s?=\s?([^\n]*)};/si", $html, $matches);
    248. $str = $matches[1];
    249. // img
    250. preg_match("/cover\s?:\s?\"([^\"]+)\"//m.sbmmt.com/m/", $str, $matches);
    251. $data['img'] = $matches[1];
    252. // title
    253. preg_match("/title\"?\s?:\s?\"([^\"]+)\"//m.sbmmt.com/m/", $str, $matches);
    254. $jsstr = "{\"title\":\"{$matches[1]}\"}";
    255. $json = json_decode($jsstr, true);
    256. $data['title'] = $json['title'];
    257. // url
    258. $data['url'] = $url;
    259. // query
    260. preg_match("/\"(vid=[^\"]+)\"\sname=\"flashVars\"/s", $html, $matches);
    261. $query = str_replace("&", '&', $matches[1]);
    262. preg_match("/\/\/player\.ku6cdn\.com[^\"\']+/", $html, $matches);
    263. $data['swf'] = 'http:'.$matches[0].'?'.$query;
    264. return $data;
    265. }
    266. }
    267. /**
    268. * 56网
    269. * http://www.56.com/u73/v_NTkzMDcwNDY.html
    270. * http://player.56.com/v_NTkzMDcwNDY.swf
    271. */
    272. private static function _parse56($url){
    273. preg_match("#/v_(\w+)\.html#", $url, $matches);
    274. if (empty($matches)) return false;
    275. $link="http://vxml.56.com/json/{$matches[1]}/?src=out";
    276. $retval = self::_cget($link);
    277. if ($retval) {
    278. $json = json_decode($retval, true);
    279. $data['img'] = $json['info']['img'];
    280. $data['title'] = $json['info']['Subject'];
    281. $data['url'] = $url;
    282. $data['swf'] = "http://player.56.com/v_{$matches[1]}.swf";
    283. return $data;
    284. } else {
    285. return false;
    286. }
    287. }
    288. /**
    289. * 乐视网
    290. * http://www.letv.com/ptv/vplay/1168109.html
    291. * http://www.letv.com/player/x1168109.swf
    292. */
    293. private static function _parseLetv($url){
    294. $html = self::_fget($url);
    295. preg_match("#http://v.t.sina.com.cn/([^'\"]*)#", $html, $matches);
    296. parse_str(parse_url(urldecode($matches[0]), PHP_URL_QUERY));
    297. preg_match("#vplay/(\d+)#", $url, $matches);
    298. $data['img'] = $pic;
    299. $data['title'] = $title;
    300. $data['url'] = $url;
    301. $data['swf'] = "http://www.letv.com/player/x{$matches[1]}.swf";
    302. return $data;
    303. }
    304. // 搜狐TV http://my.tv.sohu.com/u/vw/5101536
    305. private static function _parseSohu($url){
    306. $html = self::_fget($url);
    307. $html = iconv("GB2312", "UTF-8", $html);
    308. preg_match_all("/og:(?:title|image|videosrc)\"\scontent=\"([^\"]+)\"/s", $html, $matches);
    309. $data['img'] = $matches[1][1];
    310. $data['title'] = $matches[1][0];
    311. $data['url'] = $url;
    312. $data['swf'] = $matches[1][2];
    313. return $data;
    314. }
    315. /*
    316. * 新浪播客
    317. * http://video.sina.com.cn/v/b/48717043-1290055681.html
    318. * http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=48717043_1290055681_PUzkSndrDzXK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdatoG5ynSA9kEqDhAQJA4dPkm0x4/s.swf
    319. */
    320. private static function _parseSina($url){
    321. preg_match("/(\d+)(?:\-|\_)(\d+)/", $url, $matches);
    322. $url = "http://video.sina.com.cn/v/b/{$matches[1]}-{$matches[2]}.html";
    323. $html = self::_fget($url);
    324. preg_match("/video\s?:\s?([^<]+)}/", $html, $matches);
    325. $find = array("/\n/", "/\s*/", "/\'/", "/\{([^:,]+):/", "/,([^:]+):/", "/:[^\d\"]\w+[^\,]*,/i");
    326. $replace = array('', '', '"', '{"\\1":', ',"\\1":', ':"",');
    327. $str = preg_replace($find, $replace, $matches[1]);
    328. $arr = json_decode($str, true);
    329. $data['img'] = $arr['pic'];
    330. $data['title'] = $arr['title'];
    331. $data['url'] = $url;
    332. $data['swf'] = $arr['swfOutsideUrl'];
    333. return $data;
    334. }
    335. /*
    336. * 通过 file_get_contents 获取内容
    337. */
    338. private static function _fget($url=''){
    339. if(!$url) return false;
    340. $html = file_get_contents($url);
    341. // 判断是否gzip压缩
    342. if($dehtml = self::_gzdecode($html))
    343. return $dehtml;
    344. else
    345. return $html;
    346. }
    347. /*
    348. * 通过 fsockopen 获取内容
    349. */
    350. private static function _fsget($path='//m.sbmmt.com/m/', $host='', $user_agent=''){
    351. if(!$path || !$host) return false;
    352. $user_agent = $user_agent ? $user_agent : self::USER_AGENT;
    353. $out = <<GET $path HTTP/1.1
    354. Host: $host
    355. User-Agent: $user_agent
    356. Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
    357. Accept-Language: zh-cn,zh;q=0.5
    358. Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7\r\n\r\n
    359. HEADER;
    360. $fp = @fsockopen($host, 80, $errno, $errstr, 10);
    361. if (!$fp) return false;
    362. if(!fputs($fp, $out)) return false;
    363. while ( !feof($fp) ) {
    364. $html .= fgets($fp, 1024);
    365. }
    366. fclose($fp);
    367. // 判断是否gzip压缩
    368. if($dehtml = self::_gzdecode($html))
    369. return $dehtml;
    370. else
    371. return $html;
    372. }
    373. /*
    374. * 通过 curl 获取内容
    375. */
    376. private static function _cget($url='', $user_agent=''){
    377. if(!$url) return;
    378. $user_agent = $user_agent ? $user_agent : self::USER_AGENT;
    379. $ch = curl_init();
    380. curl_setopt($ch, CURLOPT_URL, $url);
    381. curl_setopt($ch, CURLOPT_HEADER, 0);
    382. if(strlen($user_agent)) curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
    383. ob_start();
    384. curl_exec($ch);
    385. $html = ob_get_contents();
    386. ob_end_clean();
    387. if(curl_errno($ch)){
    388. curl_close($ch);
    389. return false;
    390. }
    391. curl_close($ch);
    392. if(!is_string($html) || !strlen($html)){
    393. return false;
    394. }
    395. return $html;
    396. // 判断是否gzip压缩
    397. if($dehtml = self::_gzdecode($html))
    398. return $dehtml;
    399. else
    400. return $html;
    401. }
    402. private static function _gzdecode($data) {
    403. $len = strlen ( $data );
    404. if ($len < 18 || strcmp ( substr ( $data, 0, 2 ), "\x1f\x8b" )) {
    405. return null; // Not GZIP format (See RFC 1952)
    406. }
    407. $method = ord ( substr ( $data, 2, 1 ) ); // Compression method
    408. $flags = ord ( substr ( $data, 3, 1 ) ); // Flags
    409. if ($flags & 31 != $flags) {
    410. // Reserved bits are set -- NOT ALLOWED by RFC 1952
    411. return null;
    412. }
    413. // NOTE: $mtime may be negative (PHP integer limitations)
    414. $mtime = unpack ( "V", substr ( $data, 4, 4 ) );
    415. $mtime = $mtime [1];
    416. $xfl = substr ( $data, 8, 1 );
    417. $os = substr ( $data, 8, 1 );
    418. $headerlen = 10;
    419. $extralen = 0;
    420. $extra = "";
    421. if ($flags & 4) {
    422. // 2-byte length prefixed EXTRA data in header
    423. if ($len - $headerlen - 2 < 8) {
    424. return false; // Invalid format
    425. }
    426. $extralen = unpack ( "v", substr ( $data, 8, 2 ) );
    427. $extralen = $extralen [1];
    428. if ($len - $headerlen - 2 - $extralen < 8) {
    429. return false; // Invalid format
    430. }
    431. $extra = substr ( $data, 10, $extralen );
    432. $headerlen += 2 + $extralen;
    433. }
    434. $filenamelen = 0;
    435. $filename = "";
    436. if ($flags & 8) {
    437. // C-style string file NAME data in header
    438. if ($len - $headerlen - 1 < 8) {
    439. return false; // Invalid format
    440. }
    441. $filenamelen = strpos ( substr ( $data, 8 + $extralen ), chr ( 0 ) );
    442. if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
    443. return false; // Invalid format
    444. }
    445. $filename = substr ( $data, $headerlen, $filenamelen );
    446. $headerlen += $filenamelen + 1;
    447. }
    448. $commentlen = 0;
    449. $comment = "";
    450. if ($flags & 16) {
    451. // C-style string COMMENT data in header
    452. if ($len - $headerlen - 1 < 8) {
    453. return false; // Invalid format
    454. }
    455. $commentlen = strpos ( substr ( $data, 8 + $extralen + $filenamelen ), chr ( 0 ) );
    456. if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
    457. return false; // Invalid header format
    458. }
    459. $comment = substr ( $data, $headerlen, $commentlen );
    460. $headerlen += $commentlen + 1;
    461. }
    462. $headercrc = "";
    463. if ($flags & 1) {
    464. // 2-bytes (lowest order) of CRC32 on header present
    465. if ($len - $headerlen - 2 < 8) {
    466. return false; // Invalid format
    467. }
    468. $calccrc = crc32 ( substr ( $data, 0, $headerlen ) ) & 0xffff;
    469. $headercrc = unpack ( "v", substr ( $data, $headerlen, 2 ) );
    470. $headercrc = $headercrc [1];
    471. if ($headercrc != $calccrc) {
    472. return false; // Bad header CRC
    473. }
    474. $headerlen += 2;
    475. }
    476. // GZIP FOOTER - These be negative due to PHP's limitations
    477. $datacrc = unpack ( "V", substr ( $data, - 8, 4 ) );
    478. $datacrc = $datacrc [1];
    479. $isize = unpack ( "V", substr ( $data, - 4 ) );
    480. $isize = $isize [1];
    481. // Perform the decompression:
    482. $bodylen = $len - $headerlen - 8;
    483. if ($bodylen < 1) {
    484. // This should never happen - IMPLEMENTATION BUG!
    485. return null;
    486. }
    487. $body = substr ( $data, $headerlen, $bodylen );
    488. $data = "";
    489. if ($bodylen > 0) {
    490. switch ($method) {
    491. case 8 :
    492. // Currently the only supported compression method:
    493. $data = gzinflate ( $body );
    494. break;
    495. default :
    496. // Unknown compression method
    497. return false;
    498. }
    499. } else {
    500. //...
    501. }
    502. if ($isize != strlen ( $data ) || crc32 ( $data ) != $datacrc) {
    503. // Bad format! Length or CRC doesn't match!
    504. return false;
    505. }
    506. return $data;
    507. }
    508. }
    509. /*
    510. $url = "http://v.youku.com/v_show/id_XNjIxNjUyOTky.html";
    511. $obj = new VideoUrlparser();
    512. $data = $obj->parse($url);
    513. var_dump($data);
    514. */
    复制代码
    声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn核实处理。
    专题推荐:php各种优酷,土豆,视频封装类
    上一篇:一个简单的数据库连接和文本缓存综合类 下一篇:自己动手写 PHP MVC 框架(40节精讲/巨细/新人进阶必看)

    相关文章推荐

    • PHP网站常见一些安全漏洞及防御方法• PHP Hyperf 3.0 发布!新功能速览• 详解PHP怎么实现旋转图片验证• 简单理解PHP超级全局变量• 一起聊聊PHP的路由与伪静态应用
    1/1

    PHP中文网