//2016/09/16///
//by xbw///
![](https://img.laitimes.com/img/_0nNw4CM6IyYiwiM6ICdiwiIyVGduV2QvwVe0lmdhJ3ZvwFM38CXlZHbvN3cpR2Lc1TPB10QGtWUCpEMJ9CXsxWam9CXwADNvwVZ6l2c052bm9CXUJDT1wkNhVzLcRnbvZ2LcZXUYpVd1kmYr50MZV3YyI2cKJDT29GRjBjUIF2LcRHelR3LcJzLctmch1mclRXY39DM0QTOwMzM4EjNxkDM2EDMy8CX0Vmbu4GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.jpg)
抓取伺服器端php源碼
<?PHP
set_time_limit(0);
require("db_config.php");
$conn=mysql_connect($mysql_server_name,$mysql_username,$mysql_password) or die("error connecting") ;
mysql_query("set names 'gb2312'");
mysql_select_db($mysql_database);
$page=60;
while($page>0){
$str = file_get_contents('http://www.autohome.com.cn/all/'.$page.'/#liststart');
$isMatched = preg_match_all('/<a href="(?<grp0>[^" target="_blank" rel="external nofollow" ]+)">[\s]+<div cl[^<]+pic"><img src="(?<grp1>[^"]+)"><\/div>[^<]*<h3>(?<grp2>[^<]+)<\/h3>[\S\s]+?<p>(?<grp3>[^<]+)<\/p>/', $str, $matches);
if($isMatched!=0){
for($i=0;$i<$isMatched;$i++){
$xu=array();
for($j=1;$j<=4;$j++){
$xu[$j]=$matches[$j][$i];
}
$rowz=mysql_fetch_row(mysql_query("SELECT count(id) FROM news WHERE url='$xu[1]'"));
echo $rowz[0]."--------------";
if($rowz[0]==0){
$result = "INSERT INTO newsa(title,content,url,pic,time) VALUES('$xu[3]','$xu[4]','$xu[1]','$xu[2]',NOW())";
mysql_query($result);
echo '該資料抓取成功'."<BR>";
}else{
echo '該資料已存在'."<BR>";
}
}
}
$page--;
}
?>
存儲到資料庫裡
我已經抓了10萬條資料了。。。。。。
汽車之家真心不錯。。。。。。。。
需要資料的可以留下郵箱