效果:
描述:此功能是抓取外國的一個檢測PM2.5的網站。實時讀取網站的數據,然後保存到數據庫裡面。每隔一小時刷新一次。
地址為:http://beijing.usembassy-china.org.cn/070109air.html
篩選後的地址為:http://utils.usembassy.gov/feed2js/feed2js.php?src=http%3A%2F%2Fwww.stateair.net%2Fweb%2Frss%2F1%2F1.xml&desc=1&num=7&targ=y&utf=y&pc=y&words=40&
思路:先抓取到頁面的所有數據,保存到txt裡面,再一行一行的讀取txt,然後用split,substring截取到自己想要的數據,最後保存到數據庫,在進行插入數據庫的時候查看一下是否已經存在,如果不存在則插入。
代碼:
using System; using System.Collections.Generic; using System.Configuration; using System.Data; using System.Data.SqlClient; using System.IO; //using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; //using System.Threading.Tasks; /******************************** * 創建人:青蘋果 * 創建時間:2015-12-28 * 描述:獲取美利堅合眾國的 PM2.5 * ******************************/ namespace GetUSAData { class Program { //public static string GetURL = System.Configuration.ConfigurationSettings.AppSettings["GetURL"];//獲取數據的地址 public static string GetURL = "http://utils.usembassy.gov/feed2js/feed2js.php?src=http%3A%2F%2Fwww.stateair.net%2Fweb%2Frss%2F1%2F1.xml&desc=1&num=7&targ=y&utf=y&pc=y&words=40&"; public static string txtURL = System.Configuration.ConfigurationSettings.AppSettings["txtURL"];//保存為txt文件的路徑 public static string conn = ConfigurationManager.ConnectionStrings["ConnectionString"].ToString(); static void Main(string[] args) { LoadGO(); } public static void LoadGO() { GetUSA(); List<string[]> getlist = Read(txtURL); //刪除txt if (File.Exists(txtURL)) { //如果存在則刪除 File.Delete(txtURL); } if (getlist.Count > 0) { for (int i = getlist.Count-1; i >-1; i--) { DateTime dtime = DateTime.Parse(getlist[i][0].ToString()); string getTime = dtime.ToString("yyyy-MM-dd HH:mm"); string controlTime = dtime.ToString("yyyy-MM-dd"); float LatestHourdata1 = float.Parse(getlist[i][2]); int LatestHourdata2 = Convert.ToInt32(getlist[i][3]); float Avgdata1 = 0; int Avgdata2 = 0; string Avgdata3 = getlist[i][4].ToString(); List<SqlParameter> listWhere = new List<SqlParameter>(); listWhere.Add(new SqlParameter("@strDatetime", controlTime)); string sqlSelect = @"SELECT count(*) as allcount,sum(LatestHourdata1) as LatestHourdata1_avg, sum(LatestHourdata2) as LatestHourdata2_avg FROM T_twitter where ([LatestHourdata1] is not null or [LatestHourdata2] is not null or [Avgdata1] is not null or [AvgData2] is not null) and CONVERT(varchar(100), [datetime], 23)=@strDatetime"; DataTable sumDT = ControlDB(sqlSelect, listWhere, "select"); //查詢總和用於計算日均值 if (sumDT.Rows.Count > 0) { foreach (DataRow itemDR in sumDT.Rows) { int allcount = Convert.ToInt32(itemDR["allcount"].ToString()); //數據庫中當前日期數量總和 if (allcount > 0) { if (itemDR["LatestHourdata1_avg"] != null) { Avgdata1 = float.Parse(itemDR["LatestHourdata1_avg"].ToString()); //數據庫中LatestHourdata1_avg總和 Avgdata1 = (Avgdata1 + LatestHourdata1) / (allcount + 1);//(數據庫的總和+最新的一條)/(數據庫的總和數量+1)=日平均值 } if (itemDR["LatestHourdata2_avg"] != null) { Avgdata2 = Convert.ToInt32(itemDR["LatestHourdata2_avg"].ToString()); //數據庫中LatestHourdata2_avg總和 Avgdata2 = (Avgdata2 + LatestHourdata2) / (allcount + 1);//(數據庫的總和+最新的一條)/(數據庫的總和數量+1)=日平均值 } //根據網站規則判斷PM2.5的平均嚴重性 if (Avgdata2 >= 0 && Avgdata2 <= 50) { Avgdata3 = " Good (at 24-hour exposure at this level)"; } else if (Avgdata2 >= 51 && Avgdata2 <= 100) { Avgdata3 = " Moderate (at 24-hour exposure at this level)"; } else if (Avgdata2 >= 101 && Avgdata2 <= 150) { Avgdata3 = " Unhealthy for Sensitive Groups (at 24-hour exposure at this level)"; } else if (Avgdata2 >= 151 && Avgdata2 <= 200) { Avgdata3 = " Unhealthy (at 24-hour exposure at this level)"; } else if (Avgdata2 >= 201 && Avgdata2 <= 300) { Avgdata3 = " Very Unhealthy (at 24-hour exposure at this level)"; } else { Avgdata3 = " Hazardous (at 24-hour exposure at this level)"; } } else { Avgdata1 = LatestHourdata1; Avgdata2 = LatestHourdata2; } } } List<SqlParameter> pars = new List<SqlParameter>(); pars.Add(new SqlParameter("@whereDatetime", getTime)); pars.Add(new SqlParameter("@datetime", getTime)); pars.Add(new SqlParameter("@LatestHourdata1", LatestHourdata1)); pars.Add(new SqlParameter("@LatestHourdata2", LatestHourdata2)); pars.Add(new SqlParameter("@LatestHourdata3", getlist[i][4].ToString())); pars.Add(new SqlParameter("@Avgdata1", Avgdata1)); pars.Add(new SqlParameter("@Avgdata2", Avgdata2)); pars.Add(new SqlParameter("@Avgdata3", Avgdata3)); string sql = @"if not exists(select * from dbo.T_twitter where datetime=@whereDatetime) begin insert T_twitter (datetime,LatestHourdata1,LatestHourdata2,LatestHourdata3,Avgdata1,AvgData2,AvgData3) VALUES(@datetime,@LatestHourdata1,@LatestHourdata2,@LatestHourdata3,@Avgdata1,@Avgdata2,@Avgdata3) end"; ControlDB(sql, pars, "");//插入數據 } } } /// <summary> /// 獲取頁面數據保存至txt /// </summary> public static void GetUSA() { WebRequest request = WebRequest.Create(GetURL); WebResponse response = request.GetResponse(); StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312")); //reader.ReadToEnd() 表示取得網頁的源碼 FileStream fs = new FileStream(txtURL, FileMode.Create); byte[] data = System.Text.Encoding.Default.GetBytes(reader.ReadToEnd()); //開始寫入 fs.Write(data, 0, data.Length); //清空緩沖區、關閉流 fs.Flush(); fs.Close(); } /// <summary> /// 根據路徑讀取txt文件 /// </summary> /// <param name="path">txt路徑</param> /// <returns></returns> public static List<string[]> Read(string path) { List<string[]> list = new List<string[]>(); StreamReader sr = new StreamReader(path, Encoding.Default); String line; while ((line = sr.ReadLine()) != null) { int i = line.ToString().IndexOf("title"); if (i > 0) { string titleStr = line.ToString().Substring(i + 7); //截取到title後面的值 string[] titlelist = titleStr.Split('"'); //以" 截取 string titledata = titlelist[0]; string[] datalist = titledata.Split('&'); //以& 截取 string data = datalist[0]; string[] datastrlist = data.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries);//以; 截取 list.Add(datastrlist); } } sr.Close(); return list; } /// <summary> /// 增查表 /// </summary> /// <returns></returns> public static DataTable ControlDB(string sql, List<SqlParameter> par, string type) { DataAccess controData = new DataAccess(); DataTable resultDT = new DataTable(); if (type == "select") { resultDT = controData.GetDataTable(sql, par.ToArray()); } else { int result = controData.ExecuteSql(sql, par.ToArray()); } return resultDT; } } }
Demo下載:
http://files.cnblogs.com/files/xinchun/GetUSAData.zip