#include < curl / curl.h > #include < iostream > #include < stdio.h > #include < string.h > #include < pcre.h > #define OVECCOUNT 30 /* should be a multiple of 3 */ #define EBUFLEN 128 #define BUFLEN 10240 using namespacestd; size_t onWriteData(void * buffer, size_t size, size_t nmemb, void * str) { if (!str || !buffer) { return - 1; } string * result = (string * ) str; result - >append((char * ) buffer, size * nmemb); return nmemb; } //獲取頁面 int getWeb(string url, string & result) { long code = 0; string htmlpage; CURL * curl = curl_easy_init(); curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); //設置url curl_easy_setopt(curl, CURLOPT_POST, 0); //設置請求方法 curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5."); //偽裝客戶端 curl_easy_setopt(curl, CURLOPT_WRITEDATA, &htmlpage); //設置接受返回結果字符串 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, onWriteData); //設置處理方法 curl_easy_perform(curl); //請求 curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &code); if (code == 200) { cout << "request success" << endl; result = htmlpage; //cout<<htmlpage<<endl; } curl_easy_cleanup(curl); return code; } int main(int argc, char * *argv) { pcre * re; constchar * error; int erroffset; int ovector[OVECCOUNT]; int rc, i; string url = "http://www.dianping.com/search/category/212/10/g103"; string html; getWeb(url, html); //char src[] = " "; //char pattern[] = "(<a>.+?</a>)"; constchar * src = html.c_str(); char pattern[] = "(<li class=\"\"[\\s\\S]*?</li>)"; printf("String : %s\n", src); printf("Pattern: \"%s\"\n", pattern); re = pcre_compile(pattern, 0, &error, &erroffset, NULL); if (re == NULL) { printf("PCRE compilation failed at offset %d: %s\n", erroffset, error); return1; } char * p = (char * ) src; while ((rc = pcre_exec(re, NULL, p, strlen(p), 0, 0, ovector, OVECCOUNT)) != PCRE_ERROR_NOMATCH) { printf("\nOK, %d matched ...\n\n", rc); for (i = 0; i < rc - 1; i++) { char * substring_start = p + ovector[2 * i]; int substring_length = ovector[2 * i + 1] - ovector[2 * i]; char matched[10240]; memset(matched, 0, 10240); strncpy(matched, substring_start, substring_length); printf("match:%s\n", matched); } p += ovector[1]; if (!p) { break; } } pcre_free(re); return0; }