支持Cookie並開放了一些特殊設置項的HttpWebClient,webclientcookie
1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5 using System.Net;
6 using System.IO;
7 using System.Collections.Specialized;
8 using System.Web;
9
10 namespace Common.Helpers
11 {
12 /// <summary>
13 /// 網絡訪問輔助類
14 /// </summary>
15 public class HttpWebClient : WebClient
16 {
17 #region 公共屬性
18 /// <summary>
19 /// 浏覽器用戶標識,默認采用Chrome的標識
20 /// </summary>
21 public string UserAgent { get; set; }
22 /// <summary>
23 /// Cookie容器
24 /// </summary>
25 public CookieContainer CookieContainer { get; set; }
26 /// <summary>
27 /// 如果 POST 請求需要 100-Continue 響應,則為 true;否則為 false。
28 /// </summary>
29 public bool Expect100Continue { get; set; }
30
31 private WebResponse m_LastWebResponse = null;
32 /// <summary>
33 /// 最後一次的響應對象
34 /// </summary>
35 public WebResponse LastWebResponse { get { return this.m_LastWebResponse; } }
36
37 private int m_Timeout = 120000;
38 /// <summary>
39 /// 超時時間,默認120000毫秒(120秒)
40 /// </summary>
41 public int Timeout
42 {
43 get { return m_Timeout; }
44 set { m_Timeout = value; }
45 }
46
47 private HttpWebClientSetting m_HttpWebClientSetting = null;
48 /// <summary>
49 /// WebClient設置項,該屬性始終不會為null
50 /// </summary>
51 public HttpWebClientSetting HttpWebClientSetting
52 {
53 get
54 {
55 if (m_HttpWebClientSetting == null)
56 {
57 m_HttpWebClientSetting = new HttpWebClientSetting();
58 }
59 return m_HttpWebClientSetting;
60 }
61 set
62 {
63 m_HttpWebClientSetting = value ?? new HttpWebClientSetting();
64 }
65 }
66
67
68 /// <summary>
69 /// 預處理Web請求對象的委托方法(會在每次獲取WebRequest對象後調用),默認值為null
70 /// </summary>
71 public Action<HttpWebRequest> PrepareProcessWebRequest { get; set; }
72 #endregion
73
74 #region 構造方法
75 public HttpWebClient()
76 : this(new CookieContainer())
77 {
78 }
79
80 public HttpWebClient(CookieContainer cookieContainer)
81 {
82 this.CookieContainer = cookieContainer;
83 this.UserAgent = UserAgentValues.FireFox;
84 this.Expect100Continue = false;
85 }
86 #endregion
87
88 #region 重寫方法,增加對CookieContainer的支持
89 protected override WebRequest GetWebRequest(Uri address)
90 {
91 if (!string.IsNullOrEmpty(this.UserAgent))
92 {
93 this.Headers.Add(HttpRequestHeader.UserAgent, this.UserAgent);
94 }
95
96 WebRequest request = base.GetWebRequest(address);
97 request.Timeout = this.Timeout;
98
99 if (request is HttpWebRequest)
100 {
101 HttpWebRequest httpRequest = request as HttpWebRequest;
102 httpRequest.CookieContainer = this.CookieContainer;
103 httpRequest.ServicePoint.Expect100Continue = this.Expect100Continue; // 取消100-continue
104
105 //讀取自定義設置項
106 if (this.HttpWebClientSetting != null)
107 {
108 httpRequest.AllowAutoRedirect = this.HttpWebClientSetting.AllowAutoRedirect;
109 }
110
111 //使用外部委托屬性處理Request對象
112 if (this.PrepareProcessWebRequest != null)
113 {
114 this.PrepareProcessWebRequest(httpRequest);
115 }
116 }
117
118 return request;
119 }
120 #endregion
121
122 #region 重寫方法,增加對響應對象的訪問
123 protected override WebResponse GetWebResponse(WebRequest request)
124 {
125 WebResponse response = base.GetWebResponse(request);
126 this.m_LastWebResponse = response;
127 return response;
128 }
129 #endregion
130
131 #region (public) 向一個URL用POST提交數據,並返回其響應內容 PostData
132 /// <summary>
133 /// 向一個URL用POST提交數據,並返回其響應內容
134 /// ZhangQingFeng 2014-12-14 Add
135 /// EditLog:
136 /// ZhangQingFeng 2015-05-12 Edit 因WebClient的UpdateValues方法中固定為UTF-8格式進行UrlEncode,因此此處需用UploadString方式來間接實現 --見微軟WebClient類源碼UploadValuesInternal方法中
137 /// </summary>
138 /// <param name="url">請求的URL</param>
139 /// <param name="data">要提交的數據</param>
140 /// <param name="encoding">請求所使用的編碼</param>
141 /// <param name="responseEncoding">響應內容所使用的編碼,為null時使用請求的編碼</param>
142 /// <returns>響應的內容</returns>
143 public string PostData(string url, NameValueCollection data, Encoding encoding, Encoding responseEncoding)
144 {
145 WebClient client = this;
146
147 /*
148 client.Encoding = encoding ?? Encoding.UTF8;
149
150 byte[] response = client.UploadValues(url, "POST", data ?? new NameValueCollection());
151
152 string html = string.Empty;
153
154 if (responseEncoding == null)
155 {
156 html = client.Encoding.GetString(response);
157 }
158 else
159 {
160 html = responseEncoding.GetString(response);
161 }
162 */
163
164 client.Encoding = encoding ?? Encoding.UTF8;
165 client.Headers.Add(HttpRequestHeader.ContentType, "application/x-www-form-urlencoded");
166
167 string delimiter = String.Empty;
168 StringBuilder values = new StringBuilder();
169 foreach (string name in data.AllKeys)
170 {
171 values.Append(delimiter);
172 values.Append(HttpUtility.UrlEncode(name, encoding));
173 values.Append("=");
174 values.Append(HttpUtility.UrlEncode(data[name], encoding));
175 delimiter = "&";
176 }
177
178 byte[] arrData = client.UploadData(url, "POST", Encoding.ASCII.GetBytes(values.ToString()));
179 string html = (responseEncoding ?? client.Encoding).GetString(arrData);
180
181 return html;
182 }
183
184 /// <summary>
185 /// 向一個URL用POST提交數據,並返回其響應內容
186 /// ZhangQingFeng 2014-12-14 Add
187 /// </summary>
188 /// <param name="url">請求的URL</param>
189 /// <param name="data">要提交的數據</param>
190 /// <param name="encoding">請求和響應所使用的編碼</param>
191 /// <returns>響應的內容</returns>
192 public string PostData(string url, NameValueCollection data, Encoding encoding)
193 {
194 return PostData(url, data, encoding, null);
195 }
196
197 /// <summary>
198 /// 向一個URL用POST提交數據,並返回其響應內容(使用this.Encoding來作請求編碼和響應編碼)
199 /// ZhangQingFeng 2014-12-14 Add
200 /// </summary>
201 /// <param name="url">請求的URL</param>
202 /// <param name="data">要提交的數據</param>
203 /// <returns>響應的內容</returns>
204 public string PostData(string url, NameValueCollection data)
205 {
206 return PostData(url, data, this.Encoding);
207 }
208 #endregion
209
210 #region (public) 向一個URL用POST提交數據,並返回其響應內容 PostData
211 /// <summary>
212 /// 向一個URL用POST提交數據,並返回其響應內容
213 /// ZhangQingFeng 2014-12-14 Add
214 /// </summary>
215 /// <param name="url">請求的URL</param>
216 /// <param name="data">要提交的數據</param>
217 /// <param name="encoding">請求和響應內容所使用的編碼</param>
218 /// <returns>響應的內容</returns>
219 public string PostData(string url, Dictionary<string, string> data, Encoding encoding, Encoding responseEncoding)
220 {
221 NameValueCollection postData = new NameValueCollection();
222 if (data != null)
223 {
224 foreach (var item in data)
225 {
226 postData.Add(item.Key, item.Value);
227 }
228 }
229 return PostData(url, postData, encoding, responseEncoding);
230 }
231
232
233 /// <summary>
234 /// 向一個URL用POST提交數據,並返回其響應內容
235 /// ZhangQingFeng 2014-12-14 Add
236 /// </summary>
237 /// <param name="url">請求的URL</param>
238 /// <param name="data">要提交的數據</param>
239 /// <param name="encoding">請求和響應所使用的編碼</param>
240 /// <returns>響應的內容</returns>
241 public string PostData(string url, Dictionary<string, string> data, Encoding encoding)
242 {
243 return PostData(url, data, encoding, null);
244 }
245
246 /// <summary>
247 /// 向一個URL用POST提交數據,並返回其響應內容(使用this.Encoding來作請求編碼和響應編碼)
248 /// ZhangQingFeng 2014-12-14 Add
249 /// </summary>
250 /// <param name="url">請求的URL</param>
251 /// <param name="data">要提交的數據</param>
252 /// <returns>響應的內容</returns>
253 public string PostData(string url, Dictionary<string, string> data)
254 {
255 return PostData(url, data, this.Encoding);
256 }
257 #endregion
258
259 #region 輔助類
260 /// <summary>
261 /// 浏覽器用戶標識類
262 /// </summary>
263 public class UserAgentValues
264 {
265 public static readonly string FireFox = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0";
266 public static readonly string Chrome = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36";
267 public static readonly string IE8 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2;)";
268 }
269 #endregion
270 }
271
272 /// <summary>
273 /// HttpWebClient對象設置類
274 /// </summary>
275 public class HttpWebClientSetting
276 {
277 private bool m_AllowAutoRedirect = true;
278 /// <summary>
279 /// 當響應內容為重定向時客戶端是否自動重定向(如果該屬性為true,則取到的響應則為重定向後的內容,否則則為響應原文),默認值為true
280 /// </summary>
281 public bool AllowAutoRedirect
282 {
283 get { return m_AllowAutoRedirect; }
284 set { m_AllowAutoRedirect = value; }
285 }
286 }
287 }
HttpWebClient
在做頁面抓取的過程中,發現自帶的WebClient不夠靈活,因此做了一個實現。
關於在PostData方法中不使用UploadValues()方法的原因:
1.查看微軟的源代碼實現時發現,無論設置請求時的Encoding是否為GB2312,在使用WebClient的UploadValues()上傳內容時,其內在都是使用UTF-8編碼進行UrlEncode,因此傳到服務端中的數據中若包含有中文時則一定會亂碼,因此重寫PostData以規避此問題。
關於HttpWebClientSetting中的AllowAutoRedirect屬性:
在WebClient發起請求時,若響應內容為重定向,則WebClient會自動做重定向,因此該類提供此設置項以控制在訪問時是否自動做重定向(第二次訪問Refer後的網站時會將請求中的Refer頭置空,將該AllowAutoRedirect設置為false,然後手動從Response.Header中取出Location對象地址,設置Refer後再訪問,則可真實模擬浏覽器訪問,從而避開一些網站的防抓取設置)
關於HttpWebClient中的LastWebResponse屬性:
當存在多次重定向時,系統記錄了最後一次返回的內容,從此內容的Header中取出ResponseUri,則可以取到最後返回響應的頁面真實地址,從而為下一次的設置請求Refer頭作准備。
大約就是如此,後期如有Bug會繼續更新。