Principle: Read the source file of a site, then use regular expressions to analyze its source code, and get all the links
/**********qiushuiwuhen(2002-5-20)*************/
if(empty($url) )$url = "http://www.csdn.net/expert/";//Set url
$site=substr($url,0,strpos($url,"/",8));/ /Site
$base=substr($url,0,strrpos($url,"/")+1);//The directory where the file is located
$fp = fopen($url, "r" );/ /Open url
while(!feof($fp))$contents.=fread($fp,1024);//
$pattern="|href=["]?([^ "]+) [" ]|U";
preg_match_all($pattern,$contents, $regArr, PREG_SET_ORDER);//Match all href=
for($i=0;$i
if(substr($regArr[$i][1],0,1)=="/")//Is it the root directory of the site
echo "link".($i+1).":" .$site.$regArr[$i][1]."
";//Root directory
else
echo "link".($i+1).":".$base. $regArr[$i][1]."
";//Current directory
else
echo "link".($i+1).":".$regArr[$i][ 1]."
";//relative path
}
fclose($fp);
?>