目前還是有BUG的,最新的測試頁面在: http://www.reallydo.com/getimg.asp
正則分析頁面在: http://jorkin.reallydo.com/article.asp?id=380
發現BUG請在後面留言,謝謝.
1.31修正
src=後面有空格不能正確匹配.已修正.
src=''為空時出錯.已修正.
發現BUG: 圖片路徑有多個空格時只能保留一個.未修正.
2.18修正
圖片路徑有多個空格時只能保留一個的BUG.已修正.
復制代碼 代碼如下:
<%
'功能:獲取全部圖片地址,保存到一個數組.
'來源:http://jorkin.reallydo.com/article.asp?id=448
'需要ReplaceAll函數:http://jorkin.reallydo.com/article.asp?id=406
Function getIMG(sString)
Dim sReallyDo, regEx, iReallyDo
Dim oMatches, cMatch
'//定義一個空數組
iReallyDo = -1
ReDim aReallyDo(iReallyDo)
If IsNull(sString) Then
getIMG = ""
Exit Function
End If
'//格式化HTML代碼
'//將每個 <img 換行 方便正則替換
sReallyDo = sString
On Error Resume Next
sReallyDo = Replace(sReallyDo, vbCr, " ")
sReallyDo = Replace(sReallyDo, vbLf, " ")
sReallyDo = Replace(sReallyDo, vbTab, " ")
sReallyDo = Replace(sReallyDo, "<img ", vbCrLf & "<img ", 1, -1, 1)
sReallyDo = Replace(sReallyDo, "/>", " />", 1, -1, 1)
sReallyDo = ReplaceAll(sReallyDo, "= ", "=", True)
sReallyDo = ReplaceAll(sReallyDo, "> ", ">", True)
sReallyDo = Replace(sReallyDo, "><", ">" & vbCrLf & "<")
sReallyDo = Trim(sReallyDo)
On Error GoTo 0
Set regEx = New RegExp
regEx.IgnoreCase = True
regEx.Global = True
'//去除onclick,onload等腳本
regEx.Pattern = "\s[on].+?=([\""|\'])(.*?)\1"
sReallyDo = regEx.Replace(sReallyDo, "")
'//將SRC不帶引號的圖片地址加上引號
regEx.Pattern = "<img.*?\ssrc=([^\""\'\s][^\""\'\s>]*).*?>"
sReallyDo = regEx.Replace(sReallyDo, "<img src=""$1"" />")
'//正則匹配圖片SRC地址
regEx.Pattern = "<img.*?\ssrc=([\""\'])([^\""\']+?)\1.*?>"
Set oMatches = regEx.Execute(sReallyDo)
'//將圖片地址存入數組
For Each cMatch in oMatches
iReallyDo = iReallyDo + 1
ReDim Preserve aReallyDo(iReallyDo)
aReallyDo(iReallyDo) = regEx.Replace(cMatch.Value, "$2")
Next
getIMG = aReallyDo
End Function
%>