PHP采集网页部分数据的问题
(http://sale.gb168.cn/Saleagent/Customer/Shopping/StandardDetails.aspx?StandNo=GB/T20...
(http://sale.gb168.cn/Saleagent/Customer/Shopping/StandardDetails.aspx?StandNo=GB/T 20819.1-2007)括号中是全部网址 采集这个网页的数据
我的函数如下:
//获得指定url的网页内容
function get_content($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)');
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
$html = curl_exec($ch);
if(false===$html){
curl_close($ch);
return false;
}else{
$html = preg_replace('/>(\s+)/','>',$html);
$html = preg_replace('/(\s+)</','<',$html);
$html = preg_replace('/(\s+)/',' ',$html);
$html = str_replace("\r\n",'',$html);
$html = str_replace("\t",'',$html);
curl_close($ch);
return $html;
}
}
执行到curl_setopt($ch, CURLOPT_URL, $url);没问题能输出$ch
但是执行下面一句时,就不能输出$html,返回bad requset
请问是什么原因啊? 展开
我的函数如下:
//获得指定url的网页内容
function get_content($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)');
curl_setopt($ch, CURLOPT_TIMEOUT, 60);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
$html = curl_exec($ch);
if(false===$html){
curl_close($ch);
return false;
}else{
$html = preg_replace('/>(\s+)/','>',$html);
$html = preg_replace('/(\s+)</','<',$html);
$html = preg_replace('/(\s+)/',' ',$html);
$html = str_replace("\r\n",'',$html);
$html = str_replace("\t",'',$html);
curl_close($ch);
return $html;
}
}
执行到curl_setopt($ch, CURLOPT_URL, $url);没问题能输出$ch
但是执行下面一句时,就不能输出$html,返回bad requset
请问是什么原因啊? 展开
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询