首页 > 后端开发 > php教程 > 网页快照功能_PHP教程

网页快照功能_PHP教程

WBOY
发布: 2016-07-20 11:05:24
原创
1177 人浏览过

//====================================================
//        FileName:    snap.class.php
//        Summary:    网页快照类
//        Author:        millken(迷路林肯)
//        LastModifed:2007-06-29
//        copyright (c)2007 [email]millken@gmail.com[/email]
//====================================================
class snap{
    var $dir;
    var $log;
    var $contents;
    var $filename;
    var $host;
    var $name;
    var $data_ts;
    var $ttl;
    var $url;
    var $ts;
    function snap(){
        $this->log = "New snap() object instantiated.
n";  
        $this->dir = dirname(__FILE__)."/";
    }
    function fetch($url="",$ttl=10){
        $this->log .= "--------------------------------
fetch() called
n";
        $this->log .= "url: ".$url."
n";
        $hosts = parse_url($url);
        $this->host = $hosts['scheme'].'://'.$hosts['host'].'/';
        if (!$url) {
            $this->log .= "OOPS: You need to pass a URL!
";
            return false;
        }
        $this->ttl = $ttl;
        $this->url = $url;
        $this->name = md5($this->url);
        $this->filename = $this->dir.$this->name;
        $this->log .= "Filename: ".$this->filename."
";
        $this->getFile_ts();
        $this->file_get_content();

    }
    function file_get_content(){
        ob_start();
        $this->ts = time() - $this->data_ts;
        if($this->data_ts 0 && $this->ts ttl){
            $this->log .= "cache has expired
";
            @readfile($this->filename);  
            $this->contents = ob_get_contents();
            ob_end_clean();
        }else{
            $this->log .= "cache hasn't expired
";       
            @readfile($this->url);  
            $this->contents = ob_get_contents();
            ob_end_clean();
            $this->saveToCache();
        }
        return true;
    }
    function saveToCache(){
        $this->log .= "saveToCache() called
";
        //create file pointer
        if (!$fp=@fopen($this->filename,"w")) {
            $this->log .= "Could not open ".$this->filename."
";
            return false;
        }
        $this->contents = $this->formaturl($this->contents,$this->host);
        $this->contents = preg_replace("'<script>]*?>.*?</script>'si","",$this->contents);
        //write to file
        if (!@fwrite($fp,$this->contents)) {
            $this->log .= "Could not write to ".$this->filename."
";
            fclose($fp);
            return false;
        }
        //close file pointer
        fclose($fp);
        return true;
    }
    function getFile_ts(){
        $this->log .= "getFile_ts() called
";
        if (!file_exists($this->filename)) {
            $this->data_ts = 0;
            $this->log .= $this->filename." does not exist
";
            return false;
        }
        $this->data_ts = filemtime($this->filename);
        return true;
    }
    function formaturl($l1,$l2){
    if (preg_match_all("/(网页快照功能_PHP教程]+src="([^"]+)"[^>]*>)|(]+href="([^"]+)"[^>]*>)|(]+href="([^"]+)"[^>]*>)|(网页快照功能_PHP教程]+src='([^']+)'[^>]*>)|(]+href='([^']+)'[^>]*>)/i",$l1,$regs)){
      foreach($regs[0] as $num => $url){
       $l1 = str_replace($url,$this->lIIIIl($url,$l2),$l1);
      }
    }
    return     $l1;
    }

    function lIIIIl($l1,$l2){
    if(preg_match("/(.*)(href|src)=(.+?)( |/>|>).*/i",$l1,$regs)){$I2 = $regs[3];}
    if(strlen($I2)>0){
      $I1 = str_replace(chr(34),"",$I2);
      $I1 = str_replace(chr(39),"",$I1);
    }else{return $l1;}
    $url_parsed = parse_url($l2);
    $scheme      = $url_parsed["scheme"];if($scheme!=""){$scheme = $scheme."://";}
    $host      = $url_parsed["host"];  
    $l3       = $scheme.$host;
    if(strlen($l3)==0){return $l1;}
    $path      = dirname($url_parsed["path"]);if($path[0]==""){$path="";}
    $pos      = strpos($I1,"#");
    if($pos>0) $I1 = substr($I1,0,$pos);
    //判断类型
    if(preg_match("/^(http|https|ftp):(//|\)(([w/+-~`@:%])+.)+([w/.=?+-~`@':!%#]|(&)|&)+/i",$I1)){return $l1; }//http开头的url类型要跳过
    elseif($I1[0]=="/"){$I1 = $l3.$I1;}//绝对路径
    elseif(substr($I1,0,3)=="../"){//相对路径
          while(substr($I1,0,3)=="../"){
       $I1 = substr($I1,strlen($I1)-(strlen($I1)-3),strlen($I1)-3);
       if(strlen($path)>0){
        $path = dirname($path);
       }
      }
      $I1 = $l3.$path."/".$I1;
    }
    elseif(substr($I1,0,2)=="./"){
      $I1 = $l3.$path.substr($I1,strlen($I1)-(strlen($I1)-1),strlen($I1)-1);
    }
    elseif(strtolower(substr($I1,0,7))=="mailto:"||strtolower(substr($I1,0,11))=="java script:"){
      return $l1;
    }else{
      $I1 = $l3.$path."/".$I1;
    }
    return str_replace($I2,""$I1"",$l1);
    }
}
?>
 

用法test.php:

 require_once(dirname(__FILE__).'/snap.class.php');
$h = new snap();
$h->fetch($_GET['url']);
//echo $h->log;
echo $h->contents;
?>


www.bkjia.comtruehttp://www.bkjia.com/PHPjc/445124.htmlTechArticle?PHP //==================================================== // FileName: snap.class.php // Summary: 网页快照类 // Author: millken(迷路林肯) // LastModifed:2007-06-29 // co...
相关标签:
来源:php.cn
本站声明
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn
热门教程
更多>
最新下载
更多>
网站特效
网站源码
网站素材
前端模板