一个非常简单 易懂的爬虫实例
不要想着去爬那种正规网站 乱七八糟的加密js和ajax传来传去会把你搞崩溃的
还是这种把图片挂到图床上的好 | Set http = CreateObject("Msxml2.XMLHTTP") | | set fso=createobject("scripting.filesystemobject") | | Set document = CreateObject("htmlfile") | | Set ado = createobject("Adodb.Stream") | | set wshell=createobject("wscript.shell") | | document.designmode="on" | | document.write gethtml("https://www.mhua5.com/comic-wangyouzhijinzhanfashi.html","text") | | function gethtml(byval url,byval t_b) | | with http | | .open "GET",url,false | | .send | | if t_b="text" then | | gethtml=.responsetext | | elseif t_b="binary" then | | gethtml=.responsebody | | end if | | end with | | end function | | | | sub writeb(target,htmlbody) | | ado.Type = 1 | | ado.Open | | ado.Write htmlbody | | ado.SaveToFile target | | ado.Close | | End Sub | | | | title=replace(document.title," ","") | | fso.createfolder title | | set j_chapter=document.getelementsbytagname("A") | | for each j in j_chapter | | if j.classname="j-chapter-link" then | | nowfolder=replace(title&"\"&j.innertext," ","") | | fso.createfolder nowfolder | | set document1=createobject("htmlfile") | | document1.designmode="on" | | document1.write gethtml(replace(j.href,"about:","https://www.mhua5.com"),"text") | | z=0 | | for each i in document1.getelementsbytagname("IMG") | | if i.classname="lazy-read" then | | z=z+1 | | writeb nowfolder&"\"&z&".jpeg",gethtml(replace(i.getattribute("data-original"),"http://","https://"),"binary") | | end if | | next | | end if | | nextCOPY |
|