本文采用 HttpClient 來模擬站點的登錄發帖回復,介紹 HttpClient 的用法和常見問題的解決方案。
HttpClient 是 Apache Jakarta Common 下的子項目,可以用來提供支持 HTTP 協議的客戶端編程工具包,模擬浏覽器的行為。它提供了很多的方法來簡化網絡的訪問,雖然大部分的功能可以使用較底層的 java.net.HttpURLConnection 來實現。例如:
* 實現了所有 HTTP 的方法( GET,POST 等)
* 支持 HTTPS 協議
* 支持代理服務器
* 自動維護 Cookies 等
我們知道, http 協議是面向無連接的,要維持會話,現在基本上都是采用基於 Cookies 的方式( Session 機制也是通過 Cookies 實現的),所以 HttpClient 的自動維護 Cookies 的方式對我們的登錄發帖回復非常有用(一般網站都需要先登錄再發帖回復)。
下面的 例子都是采用 commons-httpclient-3.1.jar 包來實現的(雖然 commons-httpclient-4.0 已經發布,但是代碼發生了較大的重構,調用方式也發生了很大的改變)。
下載 jar 包的路徑為: http://hc.apache.org/downloads.cgi
由於 httpclient 使用了 Apache Jakarta common 下的子項目 logging 和 codec ,所以也需要在 http://commons.apache.org/ 下載這兩個包:
commons-logging.jar
commons-codec-1.3.jar
為了更好地理解代碼,設計的 UML 類圖如下:
方法調用的時序圖如下:
其中, BrowserContext 類代表浏覽器上下文對象,維護 HttpClient 鏈接和 Cookies 。 KaixinSitePost 是負責實現開心網的具體登錄發帖回復邏輯的類。
BrowserContext 的代碼如下:
Java代碼
/** */ /**
* 浏覽器進程上下文
*/
public class BrowserContext
{
private HttpClient client; // 注意:每個站點和每個用戶,對應一個單獨的BrowserContext對象
private Cookie[] cookies = new Cookie[ 0 ]; // 維護Cookies
private Proxyips proxyip = null ; // 當前的代理IP
private Siteusers user = null ; // 當前的登錄用戶
public Cookie[] getCookies() {
return cookies;
}
public void setCookies(Cookie[] cookies) {
this .cookies = cookies;
}
public void addCookie(Cookie c) {
if (cookies != null && cookies.length > 0 ) {
Cookie[] others = new Cookie[cookies.length + 1 ];
System.arraycopy(cookies, 0 , others, 0 , cookies.length);
others[others.length - 1 ] = c;
cookies = others;
} else {
cookies = new Cookie[ 1 ];
cookies[ 0 ] = c;
}
}
public Proxyips getProxyip() {
return proxyip;
}
public void setProxyip(Proxyips proxyip) {
this .proxyip = proxyip;
if ( this .proxyip != null ) {
client.getHostConfiguration().setProxy(proxyip.getIp(),proxyip.getPort());
client.getParams().setAuthenticationPreemptive( true );
// 如果代理需要密碼驗證,這裡設置用戶名密碼
// client.getState().setProxyCredentials(AuthScope.ANY, new UsernamePasswordCredentials("",""));
}
}
public HttpClient getClient() {
return client;
}
public Siteusers getUser() {
return user;
}
public void setUser(Siteusers user) {
this .user = user;
}
private BrowserContext(Site site) {
super ();
Protocol myhttps = new Protocol( " https " , new MySecureProtocolSocketFactory(), 443 );
Protocol.registerProtocol( " https " , myhttps);
client = new HttpClient();
client.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
HttpConnectionManagerParams managerParams = client.getHttpConnectionManager().getParams();
// 設置連接超時時間(單位毫秒)
// managerParams.setConnectionTimeout(50000);
// 設置讀數據超時時間(單位毫秒)
// managerParams.setSoTimeout(120000);
initForSiteVisit(site.getSite(),site.getPort(),site.getCharset());
}
public BrowserContext(Site site,Proxyips proxyip) {
this (site);
this .setProxyip(proxyip);
}
private void initForSiteVisit(String siteurl, int port,String charset) {
client.getHostConfiguration().setHost(siteurl, port, " http " );
// 解決中文亂碼問題,和指定網站的頁面編碼一致
client.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, charset);
}
// 查看cookie信息
public void printCookies()
{
System.out.println( " ---------------Cookie---------------- " );
if (cookies != null ) {
for (Cookie c:cookies) {
System.out.println(c.getName() + " : " + c.getValue());
}
} else {
System.out.println( " 沒有設置Cookies " );
}
System.out.println( " ---------------Cookie---------------- " );
}
public void setCommonMethodRequestHeaders(HttpMethodBase method)
{
method.setRequestHeader( " Accept " , " */* " );
// method.setRequestHeader("Accept-Language", "zh-cn");
// method.setRequestHeader("Accept-Encoding", "gzip,deflate");
method.setRequestHeader( " User-Agent " , " Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;) " );
// 設置非常重要
method.setRequestHeader( " Connection " , " Keep-Alive " );
}
public String redirectToURL(String url) throws IOException
{
if (url != null ) {
try {
System.out.println( " 頁面重定向到: " + url);
String responseString = this .doCommonVisitWithURL(url);
// System.out.println(responseString);
return responseString;
} catch (IOException e) {
System.out.println( " 重定向: " + url + " 出錯 " );
}
} else {
System.out.println( " redirect url is null " );
}
return null ;
}
public String doCommonVisitWithURL(String url) throws IOException {
GetMethod get = new GetMethod(url);
return this .doGet(get);
}
public String doPost(ExpectContinueMethod post) throws IOException
{
if (post == null )
return null ;
try
{
if (getCookies() != null ) {
// printCookies();
client.getState().addCookies(cookies);
post.addRequestHeader( " Cookie " ,getCookies().toString());
// System.out.println(post.getRequestHeader("Cookie").getValue());
}
setCommonMethodRequestHeaders(post);
int statusCode = client.executeMethod(post);
cookies = client.getState().getCookies();
System.out.println(statusCode);
// System.out.println(post.getResponseHeader("Location"));
String responseString = post.getResponseBodyAsString();
System.out.println(responseString);
printCookies();
post.releaseConnection();
if (statusCode == 301 || statusCode == 302 ) {
redirectToURL(post.getResponseHeader( " Location " ).getValue());
}
return responseString;
}
finally {
if (post != null )
post.releaseConnection();
}
}
public String doGet(GetMethod get) throws IOException
{
if (get == null )
return null ;
if (cookies != null ) {
// printCookies();
client.getState().addCookies(cookies);
get.addRequestHeader( " Cookie " ,cookies.toString());
}
try {
setCommonMethodRequestHeaders(get);
int statusCode = client.executeMethod(get);
cookies = client.getState().getCookies(); // 重新保存Cookies
printCookies();
System.out.println(statusCode);
if (statusCode == 301 || statusCode == 302 ) {
redirectToURL(get.getResponseHeader( " Location " ).getValue());
}
String responseString = get.getResponseBodyAsString();
// System.out.println(responseString);
return responseString;
}
finally {
if (get != null )
get.releaseConnection();
}
}
public String getRedirectURL(String content)
{
if (content != null && content.indexOf( " window.location=\ "" )!=-1){
int begin = content.indexOf( " window.location=\ "" );
int end = content.indexOf( " \ "" , begin+17);
return content.substring(begin + 17 ,end);
}
return null ;
}
}
Java代碼
/*
6
7/** *//**
8 * 模擬測試網站(不需要驗證碼)
9 * 開心網(www.kaixin.com)
*/
public class KaixinSitePost implements ISitePost
{
private static final String LOGON_SITE = "www.kaixin.com";
private static final int LOGON_PORT = 80;
private static final String CHARSET="UTF-8";
private BrowserContext context=null;
//單個用戶登錄
public String login(Siteusers userinfo, Proxyips ip)
{
if(userinfo!=null)
{
SiteLogin login=new SiteLogin(context,"http://login.kaixin.com/Login.do");
if(ip!=null)
login.getContext().setProxyip(ip);
Map<String,String> params=new HashMap<String,String>();
params.put("ss", "10106");
params.put("loginregFrom", "index");
params.put("origURL", "http://www.kaixin.com/SysHome.do");
params.put("email", userinfo.getUsername());
params.put("password", userinfo.getUserpwd());
login.addRequestParameters(params);
return login.login(userinfo);
}
return null;
}
public List<Siteboards> parseBoard(Siteboards data) {
return null;
}
public String post(Postinfos postinfo,List<Siteboards> siteboards)
{
if(postinfo!=null && siteboards!=null){
SitePost sport=new SitePost(context);
context.getClient().getHostConfiguration().setHost("blog.kaixin.com");
Map<String,String> params=new HashMap<String,String>();
params.put("categoryId", "0");
params.put("blogControl", "1");
params.put("title", postinfo.getTitle());
params.put("body",postinfo.getContent());
sport.addRequestParameters(params);
for(Siteboards sb:siteboards){
sb.setPostUrl("http://blog.kaixin.com/NewEntry.do");
try{
sport.post(postinfo, sb);
}catch(IOException e){
e.printStackTrace();
}
}
}
return null;
}
public String reply(Postinfos postinfo,List<Articleinfos> arts)
{
return null;
}
/** *//**
* @param args
*/
public static void main(String[] args)
{
try
{
Siteusers userinfo=new Siteusers();
userinfo.setUsername("xxxx");
userinfo.setUserpwd("xxxx");
Proxyips ips = new Proxyips();
ips.setIp("218.56.64.210");
ips.setPort(8080);
KaixinSitePost sp=new KaixinSitePost();
sp.login(userinfo,ips);
Postinfos post=new Postinfos();
post.setContent("<p>lllllllllllllllllllllll</p>");
post.setTitle("中文測試");
List<Siteboards> siteboards=new ArrayList<Siteboards>();
siteboards.add(new Siteboards());
siteboards.add(new Siteboards());
sp.post(post,siteboards);
}
catch(Exception e){
e.printStackTrace();
}
}
}
Java代碼
/**
* 站點登錄
*/
public class SiteLogin extends AbstractMethodAdapter
{
private HttpMethodBase method;
private boolean ispost=true;
protected BrowserContext context; //當前的浏覽器進程上下文
public BrowserContext getContext() {
return context;
}
/** *//**
* 構造函數
* @param context
* @param url
* @param ispost 設置是否POST方式提交,默認為POST
*/
public SiteLogin(BrowserContext context,String url,boolean ispost) {
super();
this.context = context;
this.ispost=ispost;
method = this.ispost?new PostMethod(url):new GetMethod(url);
}
public SiteLogin(BrowserContext context,String url) {
this(context,url,true);
}
public String login(Siteusers user)
{
int statusCode=0;
if(this.ispost && this.hasRequestParameters()){
((PostMethod)method).setRequestBody(this.getRequestParams());
}
if(this.hasExtraRequestHeaders()){
this.addExtraRequestHeaders(method,this.getExtraRequestHeaders());
}
context.setCommonMethodRequestHeaders(method);
try
{
if(context.getCookies()!=null){
//printCookies();
context.getClient().getState().addCookies(context.getCookies());
method.addRequestHeader("Cookie", context.getCookies().toString());
}
statusCode = context.getClient().executeMethod(method);
context.setCookies(context.getClient().getState().getCookies());
String responseString = method.getResponseBodyAsString();
//System.out.println(responseString);
method.releaseConnection();
if(statusCode==HttpStatus.SC_OK){
System.out.println("登錄成功");
return responseString;
}
else if(statusCode==302 ||statusCode==301){
System.out.println("登錄成功,頁面重定向");
String url=method.getResponseHeader("Location").getValue();
return context.redirectToURL(url);
}
else{
System.out.println("登錄失敗,狀態碼:"+statusCode);
}
}catch(Exception e){
e.printStackTrace();
}finally{
if(method!=null)
method.releaseConnection();
}
return null;
}
}
Java代碼
/**
* 站點發帖新帖
*/
public class SitePost extends CommonSitePost
{
public SitePost(BrowserContext context) {
super();
this.context=context;
}
public String post(Postinfos postinfo,Siteboards siteboard) throws IOException
{
if (postinfo != null && siteboard != null) {
if (StringUtils.isNotEmpty(siteboard.getPostUrl())) {
PostMethod post = new PostMethod(siteboard.getPostUrl());
if(this.hasRequestParameters()){
post.setRequestBody(this.getRequestParams());
}
if(this.hasExtraRequestHeaders()){
this.addExtraRequestHeaders(post,this.getExtraRequestHeaders());
}
context.setCommonMethodRequestHeaders(post);
this.context.doPost(post);
}else{
System.out.println("版面的新帖提交地址不能為空!");
}
}else{
System.out.println("帖子或者版面信息輸入都不能為空");
}
return null;
}
}