目前還是有BUG的,最新的測試頁面在: http://www.reallydo.com/getimg.asp
正則分析頁面在: http://jorkin.reallydo.com/article.asp?id=380
發現BUG請在後面留言,謝謝.
1.31修正
src=後面有空格不能正確匹配.已修正.
src=''為空時出錯.已修正.
發現BUG: 圖片路徑有多個空格時只能保留一個.未修正.
2.18修正
圖片路徑有多個空格時只能保留一個的BUG.已修正.
<%
'功能:獲取全部圖片地址,保存到一個數組.
'來源:http://jorkin.reallydo.com/article.asp?id=448
'需要ReplaceAll函數:http://jorkin.reallydo.com/article.asp?id=406
Function getIMG(sString)
Dim sReallyDo, regEx, iReallyDo
Dim oMatches, cMatch
'//定義一個空數組
iReallyDo = -1
ReDim aReallyDo(iReallyDo)
If IsNull(sString) Then
getIMG = aReallyDo
Exit Function
End If
'//格式化HTML代碼
'//將每個 <img 換行 方便正則替換
sReallyDo = sString
On Error Resume Next
sReallyDo = Replace(sReallyDo, vbCr, " ")
sReallyDo = Replace(sReallyDo, vbLf, " ")
sReallyDo = Replace(sReallyDo, vbTab, " ")
sReallyDo = Replace(sReallyDo, "<img ", vbCrLf & "<img ", 1, -1, 1)
sReallyDo = Replace(sReallyDo, "/>", " />", 1, -1, 1)
sReallyDo = ReplaceAll(sReallyDo, "= ", "=", True)
sReallyDo = ReplaceAll(sReallyDo, "> ", ">", True)
sReallyDo = Replace(sReallyDo, "><", ">" & vbCrLf & "<")
sReallyDo = Trim(sReallyDo)
Set regEx = New RegExp
regEx.IgnoreCase = True
regEx.Global = True
'//去除onclick,onload等腳本
regEx.Pattern = "s[on].+?=([""|'])(.*?)1"
sReallyDo = regEx.Replace(sReallyDo, "")
'//將SRC不帶引號的圖片地址加上引號
regEx.Pattern = "<img.*?ssrc=([^""'s][^""'s>]*).*?>"
sReallyDo = regEx.Replace(sReallyDo, "<img src=""$1"" />")
'//正則匹配圖片SRC地址
http://bizhi.cncms.com/
regEx.Pattern = "<img.*?ssrc=([""'])([^""']+?)1.*?>"
Set oMatches = regEx.Execute(sReallyDo)
'//將圖片地址存入數組
For Each cMatch in oMatches
iReallyDo = iReallyDo + 1
ReDim PReserve aReallyDo(iReallyDo)
aReallyDo(iReallyDo) = regEx.Replace(cMatch.Value, "$2")
Next
getIMG = aReallyDo
End Function
%>
<%
'用法:很多人都問我怎麼用,其實數組怎麼用的這個就怎麼用.
sContent = "HTML代碼字段" '//sContent代表Html代碼,原來寫個ors怎麼就不懂是記錄集呢。。。
Dim aImages : aImages = GetImg(sContent) '//定義一個數組,並且用來存放所有分析到的圖片地址
'列出所有圖片的地址:
For i = 0 To UBound(aImages)
Response.Write("<b>第" & i + 1 & "張圖片地址:</b> " & aImages(i) & "<br />")
Next
'列出第一張圖片地址:
If UBound(aImages)> -1 Then Response.Write("<p><b>第一張圖片地址:</b> " & aImages(0) & "</p>")
'列出最後一張圖片地址:
If UBound(aImages)> -1 Then Response.Write("<p><b>最後一張圖片地址:</b> " & aImages(UBound(aImages)) & "</p>")
%>