PHP 小方法之 仿百度蜘蛛采集

时间:2023-03-08 21:43:25
if(!function_exists('_GetContent')){
function _GetContent( $url ){
$ch = curl_init();
$ip = '220.181.108.91'; // 百度蜘蛛
$timeout = 15;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_TIMEOUT,0);
//伪造百度蜘蛛IP
curl_setopt($ch,CURLOPT_HTTPHEADER,array('X-FORWARDED-FOR:'.$ip.'','CLIENT-IP:'.$ip.''));
//伪造百度蜘蛛头部
curl_setopt($ch,CURLOPT_USERAGENT,"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_HEADER,0);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
$content = curl_exec($ch);
if($content === false)
{//输出错误信息
$no = curl_errno($ch);
switch(trim($no))
{
case 28 : $error = '访问目标地址超时'; break;
default : $error = curl_error($ch); break;
}
echo $error;
}
else
{
$succ = true;
return $content;
}
}
}