在用curl抓取網頁內容的時候,經常要知道,網頁返回的請求頭信息,和請求的相關信息,特別是在請求過程中存在重定向的時候獲取請求返回頭信息對分析請求內容很有幫助
下面就是一個請求中存在重定向的例子,我們的目的是要獲取最終實際請求的url地址
$url='http://www.appchina.com/market/r/489267/com.appshare.android.ilisten.vapk?c=aplus.direct&uid=gAJ9cQEu1TlyZxsXN-aB4RaanvFL6t6Bj-vj0rIBs&p=aplus.detail&m=redirect'; $ch=curl_init(); curl_setopt($ch, CURLOPT_URL, $url); //curl_setopt($ch, CURLOPT_POST, 1); //curl_setopt($ch, CURLOPT_POSTFIELDS, $params); curl_setopt($ch, CURLOPT_HEADER, 1);//返回response頭部信息 curl_setopt($ch, CURLOPT_NOBODY, 1);//不返回response body內容 //curl_setopt($ch, CURLOPT_MAXREDIRS, 1);//設置請求最多重定向的次數 curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);//不直接輸出response curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);//如果返回的response 頭部中存在Location值,就會遞歸請求 $content=curl_exec($ch); $rinfo=curl_getinfo($ch); echo $content,"</br>"; echo "<hr>"; print_r($rinfo);
下面是輸出的結果
HTTP/1.1 200 OKServer: nginxDate: Sat, 22 Dec 2012 06:17:44 GMTContent-Type: application/vnd.android.package-archiveConnection: closeLast-Modified: Mon, 03 Dec 2012 16:00:00 GMTExpires: Tue, 03 Dec 2013 16:00:00 GMTCache-Control: max-age=31536000Content-Length: 2142149 Array( [url] => http://www.d.appchina.com/McDonald/r/489267/com.appshare.android.ilisten.vapk?c=aplus.direct&uid=gAJ9cQEu1TlyZxsXN-aB4RaanvFL6t6Bj-vj0rIBs&p=aplus.detail&m=redirect [content_type] => application/vnd.android.package-archive [http_code] => 200 [header_size] => 289 [request_size] => 196 [filetime] => -1 [ssl_verify_result] => 0 [redirect_count] => 0 [total_time] => 0.171621 [namelookup_time] => 0.135256 [connect_time] => 0.152913 [pretransfer_time] => 0.152916 [size_upload] => 0 [size_download] => 0 [speed_download] => 0 [speed_upload] => 0 [download_content_length] => 2142149 [upload_content_length] => 0 [starttransfer_time] => 0.171582 [redirect_time] => 0 [certinfo] => Array ( ))
可以看到,經過遞歸請求後最終得到一個200的response,但是這中方式不能得到最後一次請求的url,也就是最終實際請求的url,要想得到這個url就需要遞歸的分析每次請求返回的response
下面是我寫的一個獲取最後一次請求url的遞歸函數
$url='http://www.appchina.com/market/r/489267/com.appshare.android.ilisten.vapk?c=aplus.direct&uid=gAJ9cQEu1TlyZxsXN-aB4RaanvFL6t6Bj-vj0rIBs&p=aplus.detail&m=redirect'; [php] view plaincopy $realUrl=getRedirectLocation($url); echo "</br>--->",$realUrl; function getRedirectLocation($url){ $realUrl=$url; echo $url,"</br>"; $ch=curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 1);curl_setopt($ch, CURLOPT_TIMEOUT, 3);//設置curl執行時間不超過3秒 //curl_setopt($ch, CURLOPT_NOBODY, 1);//這行不能要,如果添上,那麼在遇到302重定向的時候就會得不到真正的請求url curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); $content=curl_exec($ch); //echo $content; $rinfo=curl_getinfo($ch); $matches=array(); if(preg_match('/Location:\s+?(.+?)\s+?/', $content,$matches)){ //echo $matches[1],"</br>"; unset($content); $realUrl=getRedirectLocation($matches[1]); } if(isset($content)){ unset($content); } return $realUrl; }