asp 正则表达式抓取自定义标签里面的内容
asp提取自定义标签里面的内容麻烦高手帮我写个正则表达式能抓取<!--<list:n>--><!--</list:n>-->这个标签对里面的内容要因为是读取整个html页...
asp提取自定义标签里面的内容 麻烦高手帮我写个正则表达式能抓取<!--<list:n>--><!--</list:n>-->这个标签对里面的内容要
因为是读取整个html页面的内容 所以要支持空格的一下是要提取的字符串(有空格和换行的)以下是例子 展开
因为是读取整个html页面的内容 所以要支持空格的一下是要提取的字符串(有空格和换行的)以下是例子 展开
3个回答
展开全部
Server.Scripttimeout=9999999
Response.Expires = 0
Response.expiresabsolute = Now() - 1
Response.addHeader "pragma", "no-cache"
Response.addHeader "cache-control", "private"
Response.CacheControl = "no-cache"
Response.Buffer = True
Response.Clear
Server.ScriptTimeOut=999999999
'***************************************************************
'* 定义 Function 函数
'***************************************************************
function Geturl(url)
dim Http
set Http=server.createobject("MSXML2.XMLHTTP")
'set Http=server.createobject("Microsoft.Xmlhttp")
Http.open "GET",url,false
Http.send()
if Http.readystate<>4 then
exit function
end if
Geturl=bytesToBSTR(Http.responseBody,"gb2312")
set http=nothing
if err.number<>0 then err.Clear
end Function
Function BytesToBstr(body,Cset)
dim objstream
s="stream"
set objstream = Server.CreateObject("adodb."&s)
objstream.Type = 1
objstream.Mode =3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
Public Function Replacehtml(Textstr)
Dim Str,re
Str=Textstr
Set re=new RegExp
re.IgnoreCase =True
re.Global=True
re.Pattern="<(.[^>]*)>"
Str=re.Replace(Str, "")
Set Re=Nothing
Replacehtml=Str
End Function
Function GetArray(Byval ConStr,StartStr,OverStr,IncluL,IncluR)
On Error Resume Next
If ConStr="$False$" or ConStr="" Or IsNull(ConStr)=True or StartStr="" Or OverStr="" or IsNull(StartStr)=True Or IsNull(OverStr)=True Then
GetArray="$False$"
Exit Function
End If
Dim TempStr,TempStr2,objRegExp,Matches,Match,Templisturl
TempStr=""
Set objRegExp = New Regexp
objRegExp.IgnoreCase = True
objRegExp.Global = True
objRegExp.Pattern = "("&StartStr&").+?("&OverStr&")"
Set Matches =objRegExp.Execute(ConStr)
For Each Match in Matches
if Templisturl =Match.Value then
Else
TempStr=TempStr & "$Array$" & Match.Value
Templisturl=Match.Value
end if
Next
Set Matches=nothing
If TempStr="" Then
GetArray="$False$"
Exit Function
End If
TempStr=Right(TempStr,Len(TempStr)-7)
If IncluL=False then
objRegExp.Pattern =StartStr
TempStr=objRegExp.Replace(TempStr,"")
End if
If IncluR=False then
objRegExp.Pattern =OverStr
TempStr=objRegExp.Replace(TempStr,"")
End if
Set objRegExp=nothing
Set Matches=nothing
TempStr=Replace(TempStr,"""","")
TempStr=Replace(TempStr,"'","")
'TempStr=Replace(TempStr," ","")
'TempStr=Replace(TempStr,"(","")
'TempStr=Replace(TempStr,")","")
If TempStr="" then
GetArray="$False$"
Else
GetArray=TempStr
End if
End Function
Function ReplaceTrim(ByVal strContent)
On Error Resume Next
Dim re
Set re = New RegExp
re.IgnoreCase = True
re.Global = True
re.Pattern = "(" & Chr(8) & "|" & Chr(9) & "|" & Chr(10) & "|" & Chr(13) & ")"
strContent = re.Replace(strContent, vbNullString)
Set re = Nothing
ReplaceTrim = strContent
Exit Function
End Function
Function body(wstr,start,over)
Set xiaoqi = New Regexp'设置配置对象
xiaoqi.IgnoreCase = True'忽略大小写
xiaoqi.Global = True'设置为全文搜索
xiaoqi.Pattern = ""&start&".+?"&over&""'正则表达式
Set Matches =xiaoqi.Execute(wstr)'开始执行配置
set xiaoqi=nothing
body=""
For Each Match in Matches
body=body&Match.Value '循环匹配
Next
if body="" or isnull(body) then
body="找不到数据"
else
'response.write body&len(body)&len(start)&len(over)&"<BR>"
body=mid(body,len(start)+1,len(body)-len(start)-len(over))
'response.write len(body)
end if
End Function
Function GetBody(ConStr,StartStr,OverStr,IncluL,IncluR)
If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True Or StartStr="" or IsNull(StartStr)=True Or OverStr="" or IsNull(OverStr)=True Then
GetBody="$False$"
Exit Function
End If
Dim ConStrTemp
Dim Start,Over
ConStrTemp=Lcase(ConStr)
StartStr=Lcase(StartStr)
OverStr=Lcase(OverStr)
Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare)
If Start<=0 then
GetBody="$False$"
Exit Function
Else
If IncluL=False Then
Start=Start+LenB(StartStr)
End If
End If
Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare)
If Over<=0 Or Over<=Start then
GetBody="$False$"
Exit Function
Else
If IncluR=True Then
Over=Over+LenB(OverStr)
End If
End If
GetBody=MidB(ConStr,Start,Over-Start)
End Function
%>
<%
cxurll="网页地址些这里"
end if
v_str=geturl(cxurll)
menu_str = getbody(v_str,"<!--<list:n>-->","<!--</list:n>-->",0,0)
我自己的,你填写吧
Response.Expires = 0
Response.expiresabsolute = Now() - 1
Response.addHeader "pragma", "no-cache"
Response.addHeader "cache-control", "private"
Response.CacheControl = "no-cache"
Response.Buffer = True
Response.Clear
Server.ScriptTimeOut=999999999
'***************************************************************
'* 定义 Function 函数
'***************************************************************
function Geturl(url)
dim Http
set Http=server.createobject("MSXML2.XMLHTTP")
'set Http=server.createobject("Microsoft.Xmlhttp")
Http.open "GET",url,false
Http.send()
if Http.readystate<>4 then
exit function
end if
Geturl=bytesToBSTR(Http.responseBody,"gb2312")
set http=nothing
if err.number<>0 then err.Clear
end Function
Function BytesToBstr(body,Cset)
dim objstream
s="stream"
set objstream = Server.CreateObject("adodb."&s)
objstream.Type = 1
objstream.Mode =3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
Public Function Replacehtml(Textstr)
Dim Str,re
Str=Textstr
Set re=new RegExp
re.IgnoreCase =True
re.Global=True
re.Pattern="<(.[^>]*)>"
Str=re.Replace(Str, "")
Set Re=Nothing
Replacehtml=Str
End Function
Function GetArray(Byval ConStr,StartStr,OverStr,IncluL,IncluR)
On Error Resume Next
If ConStr="$False$" or ConStr="" Or IsNull(ConStr)=True or StartStr="" Or OverStr="" or IsNull(StartStr)=True Or IsNull(OverStr)=True Then
GetArray="$False$"
Exit Function
End If
Dim TempStr,TempStr2,objRegExp,Matches,Match,Templisturl
TempStr=""
Set objRegExp = New Regexp
objRegExp.IgnoreCase = True
objRegExp.Global = True
objRegExp.Pattern = "("&StartStr&").+?("&OverStr&")"
Set Matches =objRegExp.Execute(ConStr)
For Each Match in Matches
if Templisturl =Match.Value then
Else
TempStr=TempStr & "$Array$" & Match.Value
Templisturl=Match.Value
end if
Next
Set Matches=nothing
If TempStr="" Then
GetArray="$False$"
Exit Function
End If
TempStr=Right(TempStr,Len(TempStr)-7)
If IncluL=False then
objRegExp.Pattern =StartStr
TempStr=objRegExp.Replace(TempStr,"")
End if
If IncluR=False then
objRegExp.Pattern =OverStr
TempStr=objRegExp.Replace(TempStr,"")
End if
Set objRegExp=nothing
Set Matches=nothing
TempStr=Replace(TempStr,"""","")
TempStr=Replace(TempStr,"'","")
'TempStr=Replace(TempStr," ","")
'TempStr=Replace(TempStr,"(","")
'TempStr=Replace(TempStr,")","")
If TempStr="" then
GetArray="$False$"
Else
GetArray=TempStr
End if
End Function
Function ReplaceTrim(ByVal strContent)
On Error Resume Next
Dim re
Set re = New RegExp
re.IgnoreCase = True
re.Global = True
re.Pattern = "(" & Chr(8) & "|" & Chr(9) & "|" & Chr(10) & "|" & Chr(13) & ")"
strContent = re.Replace(strContent, vbNullString)
Set re = Nothing
ReplaceTrim = strContent
Exit Function
End Function
Function body(wstr,start,over)
Set xiaoqi = New Regexp'设置配置对象
xiaoqi.IgnoreCase = True'忽略大小写
xiaoqi.Global = True'设置为全文搜索
xiaoqi.Pattern = ""&start&".+?"&over&""'正则表达式
Set Matches =xiaoqi.Execute(wstr)'开始执行配置
set xiaoqi=nothing
body=""
For Each Match in Matches
body=body&Match.Value '循环匹配
Next
if body="" or isnull(body) then
body="找不到数据"
else
'response.write body&len(body)&len(start)&len(over)&"<BR>"
body=mid(body,len(start)+1,len(body)-len(start)-len(over))
'response.write len(body)
end if
End Function
Function GetBody(ConStr,StartStr,OverStr,IncluL,IncluR)
If ConStr="$False$" or ConStr="" or IsNull(ConStr)=True Or StartStr="" or IsNull(StartStr)=True Or OverStr="" or IsNull(OverStr)=True Then
GetBody="$False$"
Exit Function
End If
Dim ConStrTemp
Dim Start,Over
ConStrTemp=Lcase(ConStr)
StartStr=Lcase(StartStr)
OverStr=Lcase(OverStr)
Start = InStrB(1, ConStrTemp, StartStr, vbBinaryCompare)
If Start<=0 then
GetBody="$False$"
Exit Function
Else
If IncluL=False Then
Start=Start+LenB(StartStr)
End If
End If
Over=InStrB(Start,ConStrTemp,OverStr,vbBinaryCompare)
If Over<=0 Or Over<=Start then
GetBody="$False$"
Exit Function
Else
If IncluR=True Then
Over=Over+LenB(OverStr)
End If
End If
GetBody=MidB(ConStr,Start,Over-Start)
End Function
%>
<%
cxurll="网页地址些这里"
end if
v_str=geturl(cxurll)
menu_str = getbody(v_str,"<!--<list:n>-->","<!--</list:n>-->",0,0)
我自己的,你填写吧
更多追问追答
追问
v_str="-->-->"
你试下给v_str赋上面值 看看能提取出 不 为什么我这不好用 我只要能匹配规则的正则表达式就行 至于html页面是怎么读出来的你不用管
追答
你自己可以测试输出结果啊,看下需要改什么啊,比如空格,双引号的啊。
我用我的都做了N多小偷站点了
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询