C#獲取指定網頁HTML原代碼可使用 WebClient WebRequest HttpWebRequest 三種方式來實現。
當然也可使用webBrowse!在此就不研究webBrowse如何獲取了。
WebClient
private string GetWebClient(string url)
{
string strHTML = "";
WebClient myWebClient = new WebClient();
Stream myStream = myWebClient.OpenRead(url);
StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding("utf-8"));
strHTML = sr.ReadToEnd();
myStream.Close();
return strHTML;
}
WebRequest
private string GetWebRequest(string url)
{
Uri uri = new Uri(url);
WebRequest myReq = WebRequest.Create(uri);
WebResponse result = myReq.GetResponse();
Stream receviceStream = result.GetResponseStream();
StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("utf-8"));
string strHTML = readerOfStream.ReadToEnd();
readerOfStream.Close();
receviceStream.Close();
result.Close();
return strHTML;
}
HttpWebRequest
private string GetHttpWebRequest(string url)
{
Uri uri = new Uri(url);
HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri);
myReq.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
myReq.Accept = "*/*";
myReq.KeepAlive = true;
myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
HttpWebResponse result = (HttpWebResponse)myReq.GetResponse();
Stream receviceStream = result.GetResponseStream();
StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("utf-8"));
string strHTML = readerOfStream.ReadToEnd();
readerOfStream.Close();
receviceStream.Close();
result.Close();
return strHTML;
}
注意"utf-8"應與指定網頁的編碼對應。
總結
可以看到HttpWebRequest 方式最復雜,但確提供了更多的選擇性。
有的網站檢測客戶端的UserAgent!如163.com,你如果使用WebClient WebRequest方式獲取時,將獲取到的是錯誤提示頁面內容。
而通過HttpWebRequest 就沒問題。
源碼下載:http://files.cnblogs.com/zjfree/GetHTML.rar
測試環境:WIN2003 + VS2005 + C# + winForm
歡迎轉載,轉載請注明:轉載自[ http://hovertree.com/ ]