using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using Tool; using System.Net; using System.Text.RegularExpressions; using System.Threading; namespace Search { public partial class Form1 : Form { public Form1() { InitializeComponent(); } /** * 隊列,保存將要訪問的URL */ public class Queue { //使用鏈表實現隊列 private LinkedListqueue = new LinkedList (); //入隊列 public void enQueue(string t) { queue.AddLast(t); } //出隊列 public string deQueue() { string o = queue.Last.Value; queue.RemoveLast(); return o; } //判斷隊列是否為空 public bool isQueueEmpty() { return queue.Count > 0 ? false : true; } //判斷隊列是否包含t public bool contians(string t) { return queue.Contains(t); } public int getcount() { return queue.Count; } } public class LinkQueue { //已訪問的url 集合 private static ISet visitedUrl = new HashSet (); //待訪問的url 集合 private static Queue unVisitedUrl = new Queue(); //獲得URL 隊列 public static Queue getUnVisitedUrl() { return unVisitedUrl; } //添加到訪問過的URL 隊列中 public static void addVisitedUrl(String url) { visitedUrl.Add(url); } //移除訪問過的URL public static void removeVisitedUrl(String url) { visitedUrl.Remove(url); } //未訪問的URL 出隊列 public static Object unVisitedUrlDeQueue() { return unVisitedUrl.deQueue(); } // 保證每個URL 只被訪問一次 public static void addUnvisitedUrl(String url) { if (url != null && !url.Trim().Equals("") && !visitedUrl.Contains(url) && !unVisitedUrl.contians(url)) unVisitedUrl.enQueue(url); } //獲得已經訪問的URL 數目 public static int getVisitedUrlNum() { return visitedUrl.Count; } //判斷未訪問的URL 隊列中是否為空 public static bool unVisitedUrlsEmpty() { return unVisitedUrl.isQueueEmpty(); } } string[] urlarr=new string[100]; private void button1_Click(object sender, EventArgs e) { zzHttp http = new zzHttp(); CookieContainer cookie = new CookieContainer(); string url = textBox1.Text!=""?textBox1.Text:"http://image.baidu.com/"; string content=http.SendDataByGET(url,"",ref cookie); string baseUri = Utility.GetBaseUri(url); string[] links = Parser.ExtractLinks(baseUri, content); foreach (string link in links) { richTextBox1.Text += link; richTextBox1.Text += "\n"; } Regex regImg = new Regex(@"]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(? [^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); // 搜索匹配的字符串 MatchCollection matches = regImg.Matches(content); Queue que = new Queue(); foreach (Match match in matches) que.enQueue(match.Groups["imgUrl"].Value); int k; for (k = 0; k < que.getcount(); k++) { string picurl = que.deQueue(); richTextBox1.Text += picurl; richTextBox1.Text += "\n"; string[] s = picurl.Split('/'); string picname=s[s.Length - 1]; zzHttp.downfile(picurl, picname, @"d:\pic\"); } label1.Text = k+"張"; } //搜索 void search() { int i = 0; LinkQueue.addUnvisitedUrl("http://blog.csdn.net/zhujunxxxxx/"); while (!LinkQueue.unVisitedUrlsEmpty() && LinkQueue.getVisitedUrlNum() <= 1000) { //隊頭URL 出隊列 String visitUrl=(String)LinkQueue.unVisitedUrlDeQueue(); if(visitUrl==null) continue; zzHttp downLoader = new zzHttp(); CookieContainer cookie = new CookieContainer(); //下載網頁 string content=downLoader.SendDataByGET(visitUrl,"",ref cookie); //該URL 放入已訪問的URL 中 LinkQueue.addVisitedUrl(visitUrl); //提取出下載網頁中的URL string baseUri = Utility.GetBaseUri(visitUrl); string[] links = Parser.ExtractLinks(baseUri, content); //新的未訪問的URL 入隊 i++; Add2Message("已訪問數目:" + LinkQueue.getVisitedUrlNum() + ",count=" + LinkQueue.getUnVisitedUrl().getcount()); foreach (string link in links) { if (link.Contains("css") || link.Contains("js") || link.Contains("gif") || link.Contains("jpg") || link.Contains("png") || link.Contains("jpeg")) continue; LinkQueue.addUnvisitedUrl(link); AddMessage(link); } } } private void button2_Click(object sender, EventArgs e) { new Thread(search).Start(); } private delegate void InfoDelegate(string message); public void AddMessage(string message) { if (richTextBox1.InvokeRequired)//不能訪問就創建委托 { InfoDelegate d = new InfoDelegate(AddMessage); richTextBox1.Invoke(d, new object[] { message}); } else { richTextBox1.AppendText(message + Environment.NewLine); richTextBox1.ScrollToCaret(); } } private delegate void Info2Delegate(string message); public void Add2Message(string message) { if (label2.InvokeRequired)//不能訪問就創建委托 { Info2Delegate d = new Info2Delegate(Add2Message); label2.Invoke(d, new object[] { message }); } else { label2.Text = message; } } } }