程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> .NET網頁編程 >> C# >> C#入門知識 >> 支持Cookie並開放了一些特殊設置項的HttpWebClient,webclientcookie

支持Cookie並開放了一些特殊設置項的HttpWebClient,webclientcookie

編輯:C#入門知識

支持Cookie並開放了一些特殊設置項的HttpWebClient,webclientcookie


1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Net; 6 using System.IO; 7 using System.Collections.Specialized; 8 using System.Web; 9 10 namespace Common.Helpers 11 { 12 /// <summary> 13 /// 網絡訪問輔助類 14 /// </summary> 15 public class HttpWebClient : WebClient 16 { 17 #region 公共屬性 18 /// <summary> 19 /// 浏覽器用戶標識,默認采用Chrome的標識 20 /// </summary> 21 public string UserAgent { get; set; } 22 /// <summary> 23 /// Cookie容器 24 /// </summary> 25 public CookieContainer CookieContainer { get; set; } 26 /// <summary> 27 /// 如果 POST 請求需要 100-Continue 響應,則為 true;否則為 false。 28 /// </summary> 29 public bool Expect100Continue { get; set; } 30 31 private WebResponse m_LastWebResponse = null; 32 /// <summary> 33 /// 最後一次的響應對象 34 /// </summary> 35 public WebResponse LastWebResponse { get { return this.m_LastWebResponse; } } 36 37 private int m_Timeout = 120000; 38 /// <summary> 39 /// 超時時間,默認120000毫秒(120秒) 40 /// </summary> 41 public int Timeout 42 { 43 get { return m_Timeout; } 44 set { m_Timeout = value; } 45 } 46 47 private HttpWebClientSetting m_HttpWebClientSetting = null; 48 /// <summary> 49 /// WebClient設置項,該屬性始終不會為null 50 /// </summary> 51 public HttpWebClientSetting HttpWebClientSetting 52 { 53 get 54 { 55 if (m_HttpWebClientSetting == null) 56 { 57 m_HttpWebClientSetting = new HttpWebClientSetting(); 58 } 59 return m_HttpWebClientSetting; 60 } 61 set 62 { 63 m_HttpWebClientSetting = value ?? new HttpWebClientSetting(); 64 } 65 } 66 67 68 /// <summary> 69 /// 預處理Web請求對象的委托方法(會在每次獲取WebRequest對象後調用),默認值為null 70 /// </summary> 71 public Action<HttpWebRequest> PrepareProcessWebRequest { get; set; } 72 #endregion 73 74 #region 構造方法 75 public HttpWebClient() 76 : this(new CookieContainer()) 77 { 78 } 79 80 public HttpWebClient(CookieContainer cookieContainer) 81 { 82 this.CookieContainer = cookieContainer; 83 this.UserAgent = UserAgentValues.FireFox; 84 this.Expect100Continue = false; 85 } 86 #endregion 87 88 #region 重寫方法,增加對CookieContainer的支持 89 protected override WebRequest GetWebRequest(Uri address) 90 { 91 if (!string.IsNullOrEmpty(this.UserAgent)) 92 { 93 this.Headers.Add(HttpRequestHeader.UserAgent, this.UserAgent); 94 } 95 96 WebRequest request = base.GetWebRequest(address); 97 request.Timeout = this.Timeout; 98 99 if (request is HttpWebRequest) 100 { 101 HttpWebRequest httpRequest = request as HttpWebRequest; 102 httpRequest.CookieContainer = this.CookieContainer; 103 httpRequest.ServicePoint.Expect100Continue = this.Expect100Continue; // 取消100-continue 104 105 //讀取自定義設置項 106 if (this.HttpWebClientSetting != null) 107 { 108 httpRequest.AllowAutoRedirect = this.HttpWebClientSetting.AllowAutoRedirect; 109 } 110 111 //使用外部委托屬性處理Request對象 112 if (this.PrepareProcessWebRequest != null) 113 { 114 this.PrepareProcessWebRequest(httpRequest); 115 } 116 } 117 118 return request; 119 } 120 #endregion 121 122 #region 重寫方法,增加對響應對象的訪問 123 protected override WebResponse GetWebResponse(WebRequest request) 124 { 125 WebResponse response = base.GetWebResponse(request); 126 this.m_LastWebResponse = response; 127 return response; 128 } 129 #endregion 130 131 #region (public) 向一個URL用POST提交數據,並返回其響應內容 PostData 132 /// <summary> 133 /// 向一個URL用POST提交數據,並返回其響應內容 134 /// ZhangQingFeng 2014-12-14 Add 135 /// EditLog: 136 /// ZhangQingFeng 2015-05-12 Edit 因WebClient的UpdateValues方法中固定為UTF-8格式進行UrlEncode,因此此處需用UploadString方式來間接實現 --見微軟WebClient類源碼UploadValuesInternal方法中 137 /// </summary> 138 /// <param name="url">請求的URL</param> 139 /// <param name="data">要提交的數據</param> 140 /// <param name="encoding">請求所使用的編碼</param> 141 /// <param name="responseEncoding">響應內容所使用的編碼,為null時使用請求的編碼</param> 142 /// <returns>響應的內容</returns> 143 public string PostData(string url, NameValueCollection data, Encoding encoding, Encoding responseEncoding) 144 { 145 WebClient client = this; 146 147 /* 148 client.Encoding = encoding ?? Encoding.UTF8; 149 150 byte[] response = client.UploadValues(url, "POST", data ?? new NameValueCollection()); 151 152 string html = string.Empty; 153 154 if (responseEncoding == null) 155 { 156 html = client.Encoding.GetString(response); 157 } 158 else 159 { 160 html = responseEncoding.GetString(response); 161 } 162 */ 163 164 client.Encoding = encoding ?? Encoding.UTF8; 165 client.Headers.Add(HttpRequestHeader.ContentType, "application/x-www-form-urlencoded"); 166 167 string delimiter = String.Empty; 168 StringBuilder values = new StringBuilder(); 169 foreach (string name in data.AllKeys) 170 { 171 values.Append(delimiter); 172 values.Append(HttpUtility.UrlEncode(name, encoding)); 173 values.Append("="); 174 values.Append(HttpUtility.UrlEncode(data[name], encoding)); 175 delimiter = "&"; 176 } 177 178 byte[] arrData = client.UploadData(url, "POST", Encoding.ASCII.GetBytes(values.ToString())); 179 string html = (responseEncoding ?? client.Encoding).GetString(arrData); 180 181 return html; 182 } 183 184 /// <summary> 185 /// 向一個URL用POST提交數據,並返回其響應內容 186 /// ZhangQingFeng 2014-12-14 Add 187 /// </summary> 188 /// <param name="url">請求的URL</param> 189 /// <param name="data">要提交的數據</param> 190 /// <param name="encoding">請求和響應所使用的編碼</param> 191 /// <returns>響應的內容</returns> 192 public string PostData(string url, NameValueCollection data, Encoding encoding) 193 { 194 return PostData(url, data, encoding, null); 195 } 196 197 /// <summary> 198 /// 向一個URL用POST提交數據,並返回其響應內容(使用this.Encoding來作請求編碼和響應編碼) 199 /// ZhangQingFeng 2014-12-14 Add 200 /// </summary> 201 /// <param name="url">請求的URL</param> 202 /// <param name="data">要提交的數據</param> 203 /// <returns>響應的內容</returns> 204 public string PostData(string url, NameValueCollection data) 205 { 206 return PostData(url, data, this.Encoding); 207 } 208 #endregion 209 210 #region (public) 向一個URL用POST提交數據,並返回其響應內容 PostData 211 /// <summary> 212 /// 向一個URL用POST提交數據,並返回其響應內容 213 /// ZhangQingFeng 2014-12-14 Add 214 /// </summary> 215 /// <param name="url">請求的URL</param> 216 /// <param name="data">要提交的數據</param> 217 /// <param name="encoding">請求和響應內容所使用的編碼</param> 218 /// <returns>響應的內容</returns> 219 public string PostData(string url, Dictionary<string, string> data, Encoding encoding, Encoding responseEncoding) 220 { 221 NameValueCollection postData = new NameValueCollection(); 222 if (data != null) 223 { 224 foreach (var item in data) 225 { 226 postData.Add(item.Key, item.Value); 227 } 228 } 229 return PostData(url, postData, encoding, responseEncoding); 230 } 231 232 233 /// <summary> 234 /// 向一個URL用POST提交數據,並返回其響應內容 235 /// ZhangQingFeng 2014-12-14 Add 236 /// </summary> 237 /// <param name="url">請求的URL</param> 238 /// <param name="data">要提交的數據</param> 239 /// <param name="encoding">請求和響應所使用的編碼</param> 240 /// <returns>響應的內容</returns> 241 public string PostData(string url, Dictionary<string, string> data, Encoding encoding) 242 { 243 return PostData(url, data, encoding, null); 244 } 245 246 /// <summary> 247 /// 向一個URL用POST提交數據,並返回其響應內容(使用this.Encoding來作請求編碼和響應編碼) 248 /// ZhangQingFeng 2014-12-14 Add 249 /// </summary> 250 /// <param name="url">請求的URL</param> 251 /// <param name="data">要提交的數據</param> 252 /// <returns>響應的內容</returns> 253 public string PostData(string url, Dictionary<string, string> data) 254 { 255 return PostData(url, data, this.Encoding); 256 } 257 #endregion 258 259 #region 輔助類 260 /// <summary> 261 /// 浏覽器用戶標識類 262 /// </summary> 263 public class UserAgentValues 264 { 265 public static readonly string FireFox = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0"; 266 public static readonly string Chrome = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36"; 267 public static readonly string IE8 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2;)"; 268 } 269 #endregion 270 } 271 272 /// <summary> 273 /// HttpWebClient對象設置類 274 /// </summary> 275 public class HttpWebClientSetting 276 { 277 private bool m_AllowAutoRedirect = true; 278 /// <summary> 279 /// 當響應內容為重定向時客戶端是否自動重定向(如果該屬性為true,則取到的響應則為重定向後的內容,否則則為響應原文),默認值為true 280 /// </summary> 281 public bool AllowAutoRedirect 282 { 283 get { return m_AllowAutoRedirect; } 284 set { m_AllowAutoRedirect = value; } 285 } 286 } 287 } HttpWebClient

在做頁面抓取的過程中,發現自帶的WebClient不夠靈活,因此做了一個實現。

 

關於在PostData方法中不使用UploadValues()方法的原因:

1.查看微軟的源代碼實現時發現,無論設置請求時的Encoding是否為GB2312,在使用WebClient的UploadValues()上傳內容時,其內在都是使用UTF-8編碼進行UrlEncode,因此傳到服務端中的數據中若包含有中文時則一定會亂碼,因此重寫PostData以規避此問題。

 

關於HttpWebClientSetting中的AllowAutoRedirect屬性:

在WebClient發起請求時,若響應內容為重定向,則WebClient會自動做重定向,因此該類提供此設置項以控制在訪問時是否自動做重定向(第二次訪問Refer後的網站時會將請求中的Refer頭置空,將該AllowAutoRedirect設置為false,然後手動從Response.Header中取出Location對象地址,設置Refer後再訪問,則可真實模擬浏覽器訪問,從而避開一些網站的防抓取設置)

 

關於HttpWebClient中的LastWebResponse屬性:

當存在多次重定向時,系統記錄了最後一次返回的內容,從此內容的Header中取出ResponseUri,則可以取到最後返回響應的頁面真實地址,從而為下一次的設置請求Refer頭作准備。

 

大約就是如此,後期如有Bug會繼續更新。

  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved