Home > Backend Development > PHP Tutorial > PHP web page capture

PHP web page capture

WBOY
Release: 2016-07-25 08:43:39
Original
994 people have browsed it
  1. header('Content-Type:text/html;charset=utf-8');
  2. /**
  3. * A class for grabbing pictures
  4. */
  5. class download_image {
  6. public $_save_path = NULL; //The way to save pictures
  7. public $_limit_size = NULL; //Limit the size of pictures
  8. public static $_img_url_old = array(); //Storage the captured image link address
  9. public static $_a_page_url = array(); / /Storage the crawled page
  10. public function __construct( $_save_path, $_limit_size) {
  11. $this->_save_path = $_save_path;
  12. $this->_limit_size = $_limit_size;
  13. }
  14. public function get_all_page_image( $ site_url ) {
  15. if ( $site_url == '' ) {
  16. return false;
  17. }
  18. if ( ! in_array( $site_url, self::$_a_page_url ) ) { //Determine whether the current page has been crawled
  19. self:: $_a_page_url[] = $site_url; //Save the hyperlink into a static array
  20. } else {
  21. return; //If it has been crawled, jump out
  22. }
  23. $this->download_the_page_image( $site_url );
  24. $content = @file_get_contents($site_url);
  25. $a_page_url = "|]+href=['\" ]?([^ '\"?]+)['\" >]|U ";
  26. $all_url = array();
  27. preg_match_all( $a_page_url, $content, $all_url, PREG_SET_ORDER );
  28. if ( $all_url != NULL ) {
  29. foreach( $all_url as $key => $val ) {
  30. /**
  31. * Static hyperlinks to prevent entering an infinite loop
  32. * Exit the current page link representation ('', '#', '/')
  33. */
  34. if ( trim($val[1]) != '' && ! in_array( $val[1], self::$_a_page_url ) && ! in_array( $val[1], array('#','/',$site_url) ) ) {
  35. self::$_a_page_url[] = $val[1]; //Write the hyperlinks that meet the requirements into the static array
  36. }
  37. }
  38. }
  39. if ( self::$_a_page_url != NULL ) {
  40. foreach( self::$_a_page_url as $keys => $vals ) {
  41. if ( strpos( $vals, 'http://' ) === false ) { //When the hyperlink does not contain http://, it cannot be accessed directly
  42. // When the image link address is a relative address, the address is reassembled
  43. $a_domain_url = substr( $site_url, 0, strpos( $site_url, '/' ,8 ) + 1 );
  44. $a_img_url = $a_domain_url.$vals;
  45. }
  46. //Recursive call, access each hyperlink page
  47. $this->get_all_page_image( $a_img_url );
  48. }
  49. }
  50. }
  51. /**
  52. * Download all image links under the current page
  53. * @param $site_url
  54. */
  55. public function download_the_page_image( $site_url ) {
  56. // Get all the contents of the current link address page
  57. $img_pattern = NULL;
  58. $content = @file_get_contents( $site_url );
  59. $img_pattern = "|]+src=['\" ]?([^ '\"?]+)['\" >]|U";
  60. //Globally match all Picture link in
  61. $img_out = array();
  62. preg_match_all( $img_pattern, $content, $img_out, PREG_SET_ORDER );
  63. echo '

    '. $site_url . 'Total found' . count($img_out ) . 'Picture

    ';
  64. //print_r($img_out[1]);
  65. foreach( $img_out as $key => $val ) {
  66. //echo htmlspecialchars($val[1] ).'
    ';
  67. $this->save_one_image( $site_url, $val[1]);
  68. }
  69. }
  70. public function save_one_image( $site_url, $img_url ) {
  71. if ( strpos( $img_url, 'http://' ) === false ) {
  72. // When the image link address is a relative address, the address is reassembled
  73. $domain_url = substr( $site_url, 0, strpos( $site_url, '/ ',8 ) + 1 );
  74. $img_url = $domain_url.$img_url;
  75. }
  76. $pic_name = basename( $img_url ); //Get the picture name
  77. if ( in_array( $img_url, self::$_img_url_old ) ) {
  78. echo $img_url .'This image has been captured!
    ';
  79. return;
  80. }
  81. //Get the image content and write it into a string
  82. $img_data = @file_get_contents( $img_url );
  83. if ( strlen($img_data) < $this->_limit_size ) { //The image size is within the limit range Inside
  84. $img_boo = @file_put_contents( $this->_save_path.md5(microtime()).$pic_name, $img_data );
  85. if ( $img_boo ) {
  86. echo $img_url .'The picture was saved successfully!
    ';
  87. self::$_img_url_old[] = $img_url;
  88. } else {
  89. echo $img_url .'Picture saving failed!
    ';
  90. }
  91. } else {
  92. echo $img_url .'The image size is within the limit outside!
    ';
  93. }
  94. }
  95. }
  96. set_time_limit(0);
  97. $download_images = new download_image('surces_Img/',1024*1024*100);
  98. $download_images-> get_all_page_image('http://www.baidu.com/');
  99. ?>
Copy code

Snapshot, PHP


Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template