PHP小偷程序是采集的前身,有了小偷原理才會寫采集。
index.PHP
<?PHP
$con = file_get_contents("http://it.sohu.com/s2010/5651/s274087241/index.sHtml");
$preg="/·<a href=(.*) target=_blank>(.*)<\/a>/U";
preg_match_all($preg,$con,$arr);
foreach($arr[1] as $id=>$v){
echo "<a href=vIEw.PHP?url=$v>".$arr[2][$id]."</a><br>";
}
?>
vIEw.PHP
<?PHP
$con = file_get_contents($_GET[url]);
$preg="/<h1>(.*)<\/h1>/";
preg_match($preg,$con,$arr);
echo "<h1>".$arr[1]."</h1>";
echo "<hr>";
$preg2="/<div class=\"text clear\" id=\"contentText\" collection=\"Y\">(.*)<\/div>/s";
preg_match($preg2,$con,$arr2);
echo $arr2[1];
?>