正則表達式匹配html 過濾非法字符
匹配一個html標簽,匹配table如下:
[ss]*
或
[ss]*?
以上兩個表達式,一個加了"?"和一個卻不加"?",那麼這有什麼區別呢?
我們知道"?"在正則表達式裡是一個通配符:匹配前面的子表達式零次或一次,或指明一個非貪婪限定符。
在這裡,通過測試,我們得出這樣的結論:在不加"?"的情況下,在匹配下面一段內容的時候:
這是第一個table
$str=preg_replace("/s+/", " ", $str); //過濾多余回車
$str=preg_replace("/<[ ]+/si","<",$str); //過濾<__("<"號後面帶空格)
$str=preg_replace("//si","",$str); //注釋
$str=preg_replace("/<(!.*?)>/si","",$str); //過濾doctype
$str=preg_replace("/<(/?html.*?)>/si","",$str); //過濾html標簽
$str=preg_replace("/<(/?head.*?)>/si","",$str); //過濾head標簽
$str=preg_replace("/<(/?meta.*?)>/si","",$str); //過濾meta標簽
$str=preg_replace("/<(/?body.*?)>/si","",$str); //過濾body標簽
$str=preg_replace("/<(/?link.*?)>/si","",$str); //過濾link標簽
$str=preg_replace("/<(/?form.*?)>/si","",$str); //過濾form標簽
$str=preg_replace("/cookie/si","cookie",$str); //過濾cookie標簽
$str=preg_replace("/<(applet.*?)>(.*?)<(/applet.*?)>/si","",$str); //過濾applet標簽
$str=preg_replace("/<(/?applet.*?)>/si","",$str); //過濾applet標簽
$str=preg_replace("/<(style.*?)>(.*?)<(/style.*?)>/si","",$str); //過濾style標簽
$str=preg_replace("/<(/?style.*?)>/si","",$str); //過濾style標簽
$str=preg_replace("/<(title.*?)>(.*?)<(/title.*?)>/si","",$str); //過濾title標簽
$str=preg_replace("/<(/?title.*?)>/si","",$str); //過濾title標簽
$str=preg_replace("/<(object.*?)>(.*?)<(/object.*?)>/si","",$str); //過濾object標簽
$str=preg_replace("/<(/?objec.*?)>/si","",$str); //過濾object標簽
$str=preg_replace("/<(noframes.*?)>(.*?)<(/noframes.*?)>/si","",$str); //過濾noframes標簽
$str=preg_replace("/<(/?noframes.*?)>/si","",$str); //過濾noframes標簽
$str=preg_replace("/<(i?frame.*?)>(.*?)<(/i?frame.*?)>/si","",$str); //過濾frame標簽
$str=preg_replace("/<(/?i?frame.*?)>/si","",$str); //過濾frame標簽
$str=preg_replace("/<(script.*?)>(.*?)<(/script.*?)>/si","",$str); //過濾script標簽
$str=preg_replace("/<(/?script.*?)>/si","",$str); //過濾script標簽
$str=preg_replace("/網頁特效/si","javascript",$str); //過濾script標簽
$str=preg_replace("/vbscript/si","vbscript",$str); //過濾script標簽
$str=preg_replace("/on([a-z]+)s*=/si","on1=",$str); //過濾script標簽
$str=preg_replace("/&#/si","&#",$str); //過濾script標簽,如javascript:alert('aabb)
?>