还是由于工作上的需要,做了这个分享链接功能
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><script language="javascript" type="text/javascript" src="/test/parseurl/parse.js"></script><title>分享链接 php+ajax实现内容获取与动态修改</title><style>.float { float:left; padding: 5px;}.float2 { float:left; padding: 1px 5px;}.float2 span{ }.inputs { width: 350px; border:1px solid #C7E0E7; heigth: 33px; padding: 8px 5px; font-size:13px;}.input_small { width: 240px;}.buttons { border:medium none; background-color:green; height:33px; width:50px; cursor:pointer;}.clearboth { clear:both; float:left;}.title { font-size:16px; width:240px; line-height:20px;}.content { font-size:12px; width:240px; line-height:16px;}.content1 { font-size:12px; width:240px; line-height:20px;}.clearboth img { width: 120px; height: 100px;}.backovercolor { background-color: #FFFF99;}</style></head><body> <div><div class="float"><input class="inputs" type="text" name="parseurl" id="parseurl" /></div><div class="float"><input class="buttons" type="button" name="parse" id="parse" value="获取" /></div><div id="append"></div></div> <script type="text/javascript">//给按钮绑定事件addNewThing(document.getElementById('parse'), 'click', parseAsynch);</script></body></html>
parse.js 此文件内容比较多,涉及到ajax请求,页面元素的动态生成,对于json的数据处理,里面的注释应该可以让大家看懂了
js封装的不太好,对JS比较熟悉精通的朋友可以多给点意见
var parseUrl = '/test/parseurl/parse.php'; //本地的PHP异步解析文件var limitTime = 20;//单位为秒,设置最大的请求时间var clearTo = null;var imgs = [];//图片集var Ajax = function(){ function request(url,opt){ function fn(){} var async = opt.async !== false, method = opt.method || 'GET', data = opt.data || null, dataType = opt.dataType || 'html', success = opt.success || fn, failure = opt.failure || fn; method = method.toUpperCase(); dataType = dataType.toUpperCase(); if(method == 'GET' && data){ url += (url.indexOf('?') == -1 ? '?' : '&') + data; data = null; } var xhr = _createHttpRequest(); xhr.onreadystatechange = function(){ _onStateChange(xhr,success,failure,dataType); }; /*重点,在请求发布后开始设置setTimeout,如果请求状态不成功也就是readyState != 4 那么setTimeout将会在5秒后运行,并弹出信息提示,要是请求成功,将会清除该setTimeout*/ clearTo = setTimeout(function() { xhr.abort(); //终止XMLHttpRequest对象 alert("系统繁忙,请重刷新页面或稍后再试..."); },limitTime * 1000); xhr.open(method,url,async); if(method == 'POST'){ xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded;'); } xhr.send(data); return xhr; } function _createHttpRequest() { var request = false; if(window.XMLHttpRequest) { request = new XMLHttpRequest(); } else if(window.ActiveXObject) { var versions = ['Microsoft.XMLHTTP', 'MSXML.XMLHTTP', 'Microsoft.XMLHTTP', 'Msxml2.XMLHTTP.7.0', 'Msxml2.XMLHTTP.6.0', 'Msxml2.XMLHTTP.5.0', 'Msxml2.XMLHTTP.4.0', 'MSXML2.XMLHTTP.3.0', 'MSXML2.XMLHTTP']; for(var i=0, icount=versions.length; i<icount; i++) { try { request = new ActiveXObject(versions[i]); if(request) { return request; } } catch(e) {} } } return request; } function _onStateChange(xhr,success,failure,dataType){ if(xhr.readyState == 4){ clearTimeout(clearTo); var s = xhr.status; var resCont = false; if(s>= 200 && s < 300){ if(dataType == 'JSON') { //尽可能的发送json头信息,更好兼容性的获得json对象 resCont = eval('(' + xhr.responseText + ')'); } else if(dataType == 'XML') { //服务器端一定要指定xml头信息,让所有浏览器知道是xml数据,才能得到xml对象 resCont = xhr.responseXML; } else resCont = xhr.responseText; success(resCont); }else{ failure(xhr.responseText); } }else {} } return {request:request}; }(); //事件的绑定 function addNewThing(obj, ename, func) { if(document.addEventListener) { obj.addEventListener(ename, func, false); } else { obj.attachEvent('on'+ename, func); } } function parseAsynch() { //获取url内容 var newurl = document.getElementById('parseurl').value; if(newurl.indexOf('http') == -1) return false; Ajax.request(parseUrl,{ data : 'url='+newurl, dataType : 'json', method : 'post', success : function(data) { document.getElementById("append").innerHTML = '';//清空数据 imgs = [];//清空数据 //json数据的处理,data为json对象 //显示一张图片在左侧 addNewObj('div', 'append', '', 'clearboth', 'image_display'); //在外层加个DIV //显示图片 if(data.pic.length) { for(var i in data.pic) { if(i == 0) { var imgOjb_display = document.getElementById('image_display'); var imgOjb = document.createElement('img'); imgOjb.src = data.pic[i]; imgOjb.id = 'change_pic'; imgOjb_display.appendChild(imgOjb); } imgs[i] = data.pic[i]; } } addNewObj('div', 'append', '', 'float2', 'title_display'); //在外层加个DIV //显示标题 addNewObj('span', 'title_display', data.title, 'clearboth title', 'parse_title'); var titleObj = document.getElementById('parse_title'); titleObj.onclick = function() { if(!document.getElementById('parse_title_input')) fromTextToInput(this); }; titleObj.onmouseover = function(){this.className = 'clearboth title backovercolor'} titleObj.onmouseout = function(){this.className = 'clearboth title'} //显示url addNewObj('span', 'title_display', newurl, 'clearboth content1'); //显示内容 addNewObj('span', 'title_display', data.content, 'clearboth content', 'parse_content'); var contentObj = document.getElementById('parse_content'); contentObj.onclick = function() { if(!document.getElementById('parse_content_textarea')) { fromTextToTextarea(this); } }; contentObj.onmouseover = function(){this.className = 'clearboth content backovercolor'} contentObj.onmouseout = function(){this.className = 'clearboth content'} //如果图片多个则可以进行翻页 if(data.pic.length > 1) { //显示翻页按钮 var pageContent = "<a href=\"javascript:;\" onclick=\"prevImg();\"><<</a>" pageContent += " <span id=\"pregnum\">1</span>-<span>"+(parseInt(i)+1)+"</span> "; pageContent += "<a href=\"javascript:;\" onclick=\"nextImg();\">>></a>"; addNewObj('span', 'title_display', pageContent, 'clearboth content'); } } }); } function fromTextToInput(obj) { var val = obj.innerHTML; newObj = document.createElement('input'); newObj.id = obj.id + '_input'; newObj.type = 'text'; newObj.className = 'input_small'; newObj.onblur = function() { fromInputToText(this); } newObj.value = val; obj.innerHTML = ''; obj.appendChild(newObj); newObj.focus(); } function fromInputToText(obj) { var val = obj.value; obj.parentNode.onclick = function() { if(!document.getElementById('parse_title_input')) { fromTextToInput(this); } } //去掉input obj.parentNode.innerHTML = val; } function fromTextToTextarea(obj) { var val = obj.innerHTML; newObj = document.createElement('textarea'); newObj.id = obj.id + '_textarea'; newObj.rows = 2; newObj.className = 'input_small'; newObj.onblur = function() { fromTextareaToText(this); } newObj.value = val; obj.innerHTML = ''; obj.appendChild(newObj); newObj.focus(); } function fromTextareaToText(obj) { var val = obj.value; obj.parentNode.onclick = function() { if(!document.getElementById('parse_content_textarea')) { fromTextToTextarea(this); } } //去掉textarea obj.parentNode.innerHTML = val; } //IE与FF的input标签name属性生成不一样 function addNewObj(newObjTag, parentObjTag, content, classname, idname) { var appendObjs = document.getElementById(parentObjTag); var newObj = document.createElement(newObjTag); if(content) newObj.innerHTML = content; if(classname) newObj.className = classname; if(idname) newObj.id = idname; appendObjs.appendChild(newObj); } function prevImg() { //获取当前图片URL在数且中的位置 var pos = parseInt(document.getElementById('pregnum').innerHTML); //图片的数量 var picnum = imgs.length; if(pos <= 1) return false; //图片对象 var picOjb = document.getElementById('change_pic'); //图片的预加载,只有这样IE6下面才会正常显示图片 var img = new Image(); img.onload = function() { picOjb.src = imgs[pos-2]; }; img.src = imgs[pos-2]; //当前位置改变 document.getElementById('pregnum').innerHTML = pos - 1; } function nextImg() { //获取当前图片URL在数且中的位置 var pos = parseInt(document.getElementById('pregnum').innerHTML); //图片的数量 var picnum = imgs.length; if(pos >= picnum) return false; //图片对象 var picOjb = document.getElementById('change_pic'); var img = new Image(); img.onload = function() { picOjb.src = imgs[pos]; } img.src = imgs[pos]; //当前位置改变 document.getElementById('pregnum').innerHTML = pos + 1; }
parse.php 主要是用到curl对页面的请求,这个比file,file_get_contents去获取页面要稳定得多,这得益于它的模拟浏览器功能异常的强大
还有几点:
1.对于抓取回来的页面要进一步的过滤
2.由于抓取回来的页面的编码有些不同,所以这里需要先获取到页面的编码然后在通过iconv来转下编码(在次用的mb_convert_encoding,这是由于公司服务器配置的环境iconv函数用不了)
3.对于获取的图片的路径的补全
好了,源码贴上
<?php //$url = 'http://www.onlypo.com';// url地址$url = addslashes($_POST['url']);//分析url的上级目录,于用图片的路径补全$urlinfo = parse_url($url);if(isset($urlinfo['path'])) { $dir = substr($url, 0, strrpos($url, "/") + 1);} else $dir = $url.'/'; //模拟请求头,在FF的firebue里面可以看到这个,尽量逼真点$headers = array( 'User-Agent:Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0', 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset:GB2312,utf-8;q=0.7,*;q=0.7', 'Cache-Control:max-age=0', 'Accept-Language:zh-cn,zh;q=0.5'); //获取页面内容$ch = curl_init();curl_setopt($ch, CURLOPT_URL, $url);curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $cont = curl_exec($ch); curl_close($ch); //除去多余的干扰元素$striparr = array( "/\<script([^>]*?)\>(.*?)\<\/script>/si", //过滤script "/\<\!\-\-(.*?)\-\-\>/is", //过滤注释 "/\<style([^>]*?)\>(.*?)\<\/style>/is",//过滤CSS); $cont = preg_replace($striparr,array('','',''),$cont); //有的站点不能请求过于太快或过于频繁,不然会返回 不想要的值,如 QQ站点 //获取网页编码$match = array();preg_match("/<meta\s*([^>]*?)\s*content=\"(.*?)(charset=(.*?))(.*?)\"\s*([^>]*?)\s*[\/]{0,1}\>/si", $cont, $match); $charset = $match[5] ? $match[5] : 'utf-8'; //由于服务器上的iconv函数不可用,所以直接用到mb_convert_encoding函数$cont = mb_convert_encoding($cont, 'utf-8', $charset);//获取文章标题,与内容$match = array();preg_match("/<title\>(.*?)\<\/title\>/si", $cont, $match); $title = $match[1]; $match = array();preg_match("/\<body([^>]*?)\>(.*?)\<\/body\>/Usi", $cont, $match); //去掉内容中的html标签和特殊字符$content = str_replace(array("\r\n","\n","\r","\t"),array("","","",""),strip_tags($match[2]));$middlepos = strlen($content) / 2; //utf-8为三个字符为一个汉字,为了避免有乱码出现,在此相应的补成3的倍数//$remainder = $middlepos % 3;//if($remainder) { $middlepos -= $remainder;} //从内容中间开始截取一段文字 $content = mb_strcut($content, $middlepos, 120, 'utf-8');//echo substr($content, $middlepos, 120);//从body中获取文章里面的图片preg_match_all("/<img([^\>]*?)src=\"(.*?)\"([^\>]*?)[\/]{0,1}>/i", $match[2], $matchpics); $piclist = $matchpics[2];//整理图片if(!empty($piclist) && is_array($piclist)) { foreach($piclist as $k => $v) { $v = trim($v); if(empty($v)) unset($piclist[$k]); //去除多余的图片 //如果图片不是完整路径,完善他 if(stripos($v,'http://') === false) { $piclist[$k] = $dir.ltrim($v, '/'); } }}echo json_encode(array('title' => $title, 'content' => $content, 'pic' => $piclist)); ?>
需要这个功能的朋友还是zero
俺看不懂
可以看看演示的效果啊,地址:
http://www.onlypo.com/archives/16
看来大家只比较关心
这种类型的网站了
一忠保洁
不过需要提醒一下,你使用 json 返回数据就称不上是 ajax 了,只能叫 ajaj
AJAX Asynchronous JavaScript and XML
ajaj Asynchronous JavaScript and JSON
我去看了演示地址 怎么什么都获取不到?浏览器问题?
怎??看到想要的,效果,可以?取???中的??否?
我去看了演示地址 怎么什么都获取不到?浏览器问题?
服务器在国外,你打http://www.facebook.com,或是其它站点,好像baidu.com服务器上访问不了
http://www.csdn.net等是可以的
怎??看到想要的,效果,可以?取???中的??否?
那是当然不可能的,只能获取到页面上的数据
不过需要提醒一下,你使用 json 返回数据就称不上是 ajax 了,只能叫 ajaj
AJAX Asynchronous JavaScript and XML
ajaj Asynchronous JavaScript and JSON
嗯,没有注意这点,说的有道理,我是觉得返回一个XML有许多不必要的内容,所以没有返回XML
不错。
不错不错
Ajax实例:获得站点文件内容
http://3aj.cn/article/65.html