提供一個URL網址,
例如 http://hovertree.net,
怎樣把這個網址頁面的標題(title)給提取出來?
下面的代碼就實現了這個功能。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
namespace test_for_title
{
class Program
{
static string GetHtml(string url, Encoding encoding)
{
byte[] buf = new WebClient().DownloadData(url);
if (encoding != null) return encoding.GetString(buf);
string html = Encoding.UTF8.GetString(buf);
if (encoding == null || encoding == Encoding.UTF8)
return html;
return encoding.GetString(buf);
}
static string GetTitle(string html)
{
string pattern = @"(?si)<title(?:\s+(?:""[^""]*""|'[^']*'|[^""'>])*)?>(?<title>.*?)</title>";
return Regex.Match(html, pattern).Groups["title"].Value.Trim();
}
static void PrintTitle(string url)
{
string html = GetHtml(url, null);
Console.WriteLine("頁面的title為:\n"+GetTitle(html));
}
static void Main(string[] args)
{
PrintTitle("http://hovertree.net");
Console.Read();
}
}
}