转自互联网
代码未测试
=================
利用CURL库一次抓取多个页面的网页抓取程序,多线程
//$connomains=array("http://www.17buyhost.com","http://www.17buyhost.cn","http://www.592wife.cn");
$mh = curl_multi_init();
foreach ($connomains as $i => $url) {
$conn[$i] = curl_init($url);
curl_setopt($conn[$i], CURLOPT_RETURNTRANSFER, 1); //抓取多个页面一般不在浏览器显示,而是写入文件或者数据库.
curl_multi_add_handle ($mh,$conn[$i]);
}
// 开始发出请求
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
while ($active and $mrc == CURLM_OK) {
// wait for network
if (curl_multi_select($mh) != -1) {
// pull in any new data, or at least handle timeouts
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
}
if ($mrc != CURLM_OK) {
print "Curl 多线程读取错误!$mrc\n";
}
// 接受并处理数据
foreach ($connomains as $i => $url) {
if (($err = curl_error($conn[$i])) == '') {
$res[$i]=curl_multi_getcontent($conn[$i]);
} else {
print "Curl 句柄错误$i: $err\n";
}
curl_multi_remove_handle($mh,$conn[$i]);
curl_close($conn[$i]);
}
curl_multi_close($mh);
将接受的数据写入文本.
$fp=fopen("yds.txt","w");
foreach ($res as $str) //$rec数组每个元素保存一个页面的内容
fputs($fp,$str);
fclose($fp);