curl模拟自动登陆&采集网页数据

时间:2023-03-09 10:06:59
curl模拟自动登陆&采集网页数据
<!DOCTYPE>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>模拟登录测试</title>
</head> <body>
<?php
//curl模拟自动登陆认证
$url = "";
$User_Agent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
$post_data = "muser=***&passwd=***";
$refer="http://***/";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url); //设置URL
curl_setopt($ch, CURLOPT_HEADER, true); //设置显示响应头
curl_setopt($ch,CURLOPT_USERAGENT,$User_Agent); //设置代理浏览器
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT,5);//超时处理
curl_setopt($ch,CURLOPT_REFERER,$refer); //设置来源网站
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);//设置重定向
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);//设置参数
$content = curl_exec($ch); $headArr = explode("\r\n", $content); //解析url
foreach ($headArr as $loop) {
//echo htmlspecialchars($loop)."<br />";
if(strpos($loop, "http://***/default.aspx?id=") !== false){//找到Location
$id = trim(substr($loop, 46));
}
if(strpos($loop, "Set-Cookie") !== false){//找到Cookie
$cookie = trim(substr($loop, 12));
}
}
$rurl="****?id=".$id;
echo "edengUrl:<br>".$rurl;
echo "<br />";
echo "setcookie:<br>".$cookie."<br />";
curl_close($ch); //结果处理
$url = $rurl;
$User_Agent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
$refer="http://****/";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url); //设置URL
curl_setopt($ch, CURLOPT_COOKIE, $cookie); //设置cookies
curl_setopt($ch,CURLOPT_USERAGENT,$User_Agent); //设置代理浏览器
curl_setopt($ch, CURLOPT_TIMEOUT,5);//超时处理
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch,CURLOPT_REFERER,$refer); //设置来源网站
$content = curl_exec($ch); //解析html
echo "<br />";
echo "<br />解析:";
echo "<br />";
$content=strip_tags($content);//去除html标签
echo $content;
echo "<br />";
echo "<br />";
$sk=strpos(trim($content), "当前用户")+28;
$name = trim(substr($content,$sk,9));
$arr=explode(" ",$name);
echo $sk."<br />".$arr[0]."<br />"; curl_close($ch); ?>
</body>
</html>