一个非常简单 易懂的爬虫实例
不要想着去爬那种正规网站 乱七八糟的加密js和ajax传来传去会把你搞崩溃的
还是这种把图片挂到图床上的好- Set http = CreateObject("Msxml2.XMLHTTP")
- set fso=createobject("scripting.filesystemobject")
- Set document = CreateObject("htmlfile")
- Set ado = createobject("Adodb.Stream")
- set wshell=createobject("wscript.shell")
- document.designmode="on"
- document.write gethtml("https://www.mhua5.com/comic-wangyouzhijinzhanfashi.html","text")
- function gethtml(byval url,byval t_b)
- with http
- .open "GET",url,false
- .send
- if t_b="text" then
- gethtml=.responsetext
- elseif t_b="binary" then
- gethtml=.responsebody
- end if
- end with
- end function
-
- sub writeb(target,htmlbody)
- ado.Type = 1
- ado.Open
- ado.Write htmlbody
- ado.SaveToFile target
- ado.Close
- End Sub
-
- title=replace(document.title," ","")
- fso.createfolder title
- set j_chapter=document.getelementsbytagname("A")
- for each j in j_chapter
- if j.classname="j-chapter-link" then
- nowfolder=replace(title&"\"&j.innertext," ","")
- fso.createfolder nowfolder
- set document1=createobject("htmlfile")
- document1.designmode="on"
- document1.write gethtml(replace(j.href,"about:","https://www.mhua5.com"),"text")
- z=0
- for each i in document1.getelementsbytagname("IMG")
- if i.classname="lazy-read" then
- z=z+1
- writeb nowfolder&"\"&z&".jpeg",gethtml(replace(i.getattribute("data-original"),"http://","https://"),"binary")'这里的replace是因为源代码里httphttps写错了(可能是为了坑别人?)
- end if
- next
- end if
- next
复制代码
|