流程起始很簡單,就是模擬浏覽器去訪問百度盤的地址,然後提取其中的實際地址,核心函數就是這個
[csharp]
//百度盤地址 http://pan.baidu.com/share/link?shareid=112694&uk=455690558
private string ConvertBaiduPan_Thunder(string strBaiduPan)
{
if (strBaiduPan == null)
{
return null;
}
string strThunder = null;
//訪問百度盤地址
string strBaiduPanWebPage = MyHttp.GetHtml(strBaiduPan);
if (strBaiduPanWebPage == "")
{
MessageBox.Show("訪問地址失敗!"+ strBaiduPan);
goto End;
}
//取出 <a class="dbtn cancel b-fr" href="http://www.baidupcs.com/file/8d29f2f27f5939fabc7260e23f4739dd?fid=455690558-250528-2069072566&time=1358472203&sign=FDTA-DCb740ccc5511e5e8fedcff06b081203-E0ZjQ1awurzhDkjhV5c%2BtTN05nA%3D&expires=8h&sh=1&response-cache-control=private" id="downFileButtom"><b>下載(2.44M)</b></a>
//匹配正則表達式 <a class="dbtn cancel b-fr" href="http://.*?">*.id="downFileButtom"><b>
Regex r = new Regex("<a class=\"dbtn cancel b-fr\" href=\"http://.*?\">*.id=\"downFileButtom\"><b>");
//開始匹配
Match m = r.Match(strBaiduPanWebPage);
string strLink = null;
while (m.Success)
{
strLink = m.Groups[0].Value;
//提取出其中href的值 http://www.baidupcs.com/file/8d29f2f27f5939fabc7260e23f4739dd?fid=455690558-250528-2069072566&time=1358470090&sign=FDTA-DCb740ccc5511e5e8fedcff06b081203-OiOC3pLc236790OikWTlSsTHWjc%3D&expires=8h&sh=1&response-cache-control=private" id="downFileButtom"
Regex r2 = new Regex("http.*\"");
//開始匹配
Match m2 = r2.Match(strLink);
while (m2.Success)
{
strLink = m2.Groups[0].Value;
strLink = strLink.Replace("&", "&");
// 包含雙引號 " id="downFileButtom"
strLink = strLink.Replace("\"","");
strLink = strLink.Replace("id=downFileButtom", "");
m2 = m2.NextMatch();
}
//從上一個匹配結束的位置開始下一個匹配
m = m.NextMatch();
strThunder = strLink;
}
End:
return strThunder;
}
//百度盤地址 http://pan.baidu.com/share/link?shareid=112694&uk=455690558
private string ConvertBaiduPan_Thunder(string strBaiduPan)
{
if (strBaiduPan == null)
{
return null;
}
string strThunder = null;
//訪問百度盤地址
string strBaiduPanWebPage = MyHttp.GetHtml(strBaiduPan);
if (strBaiduPanWebPage == "")
{
MessageBox.Show("訪問地址失敗!"+ strBaiduPan);
goto End;
}
//取出 <a class="dbtn cancel b-fr" href="http://www.baidupcs.com/file/8d29f2f27f5939fabc7260e23f4739dd?fid=455690558-250528-2069072566&time=1358472203&sign=FDTA-DCb740ccc5511e5e8fedcff06b081203-E0ZjQ1awurzhDkjhV5c%2BtTN05nA%3D&expires=8h&sh=1&response-cache-control=private" id="downFileButtom"><b>下載(2.44M)</b></a>
//匹配正則表達式 <a class="dbtn cancel b-fr" href="http://.*?">*.id="downFileButtom"><b>
Regex r = new Regex("<a class=\"dbtn cancel b-fr\" href=\"http://.*?\">*.id=\"downFileButtom\"><b>");
//開始匹配
Match m = r.Match(strBaiduPanWebPage);
string strLink = null;
while (m.Success)
{
strLink = m.Groups[0].Value;
//提取出其中href的值 http://www.baidupcs.com/file/8d29f2f27f5939fabc7260e23f4739dd?fid=455690558-250528-2069072566&time=1358470090&sign=FDTA-DCb740ccc5511e5e8fedcff06b081203-OiOC3pLc236790OikWTlSsTHWjc%3D&expires=8h&sh=1&response-cache-control=private" id="downFileButtom"
Regex r2 = new Regex("http.*\"");
//開始匹配
Match m2 = r2.Match(strLink);
while (m2.Success)
{
strLink = m2.Groups[0].Value;
strLink = strLink.Replace("&", "&");
// 包含雙引號 " id="downFileButtom"
strLink = strLink.Replace("\"","");
strLink = strLink.Replace("id=downFileButtom", "");
m2 = m2.NextMatch();
}
//從上一個匹配結束的位置開始下一個匹配
m = m.NextMatch();
strThunder = strLink;
}
End:
return strThunder;
}
以上這個函數就是取出百度網盤中文件地址的函數了。很簡單的網頁抓取和文字匹配是吧。 如果有什麼不明白的,自己看源碼,或者給我留言。作為小白,樂意和眾多菜鳥交流。