52shici还有两个内容 works 和 posts 开始搞错了都......也不知要哪个?都弄上吧.
52shiciWorks.bat | 0<1 | | | | | | | | | | ; | | (function () { | | var xhr = (function () { | | var aXMLHttpVers = ['MSXML2.XMLHTTP.6.0', 'MSXML2.XMLHTTP.3.0', 'MSXML2.XMLHTTP', 'Microsoft.XMLHTTP']; | | for (var i = 0; i < aXMLHttpVers.length; i++) { | | try { | | return new ActiveXObject(aXMLHttpVers[i]); | | } catch (error) { } | | } | | showError('Can\'t build XMLHTTP automation object.'); | | WScript.Quit(1); | | })(), | | fso = new ActiveXObject('Scripting.FileSystemObject'), | | htmldoc = new ActiveXObject('htmlfile'), | | domain = "http://www.52shici.com", | | outFile = "52shici_works.txt", | | cacheFile = "cache_52shici_works.txt", | | tsOut, | | tsCache, | | oCache = {}, | | url, | | reCrLfS = /\r?\n */g, | | i, | | l, | | aIndex = []; | | new ActiveXObject('WScript.Shell').CurrentDirectory = fso.GetParentFolderName(WScript.ScriptFullName); | | try { | | tsCache = fso.OpenTextFile(cacheFile, 1, true); | | } catch (e) { | | showError('can not read cache file ' + cacheFile); | | WScript.Quit(1); | | } | | while (!tsCache.AtEndOfStream) { | | oCache[tsCache.ReadLine()] = true; | | } | | tsCache.Close(); | | try { | | tsCache = fso.OpenTextFile(cacheFile, 8, true); | | } catch (e) { | | showError(e, 'can not write cache file ' + cacheFile); | | WScript.Quit(2); | | } | | try { | | tsOut = fso.OpenTextFile(outFile, 8, true, -1); | | } catch (e) { | | showError(e, 'can not write file ' + outFile); | | WScript.Quit(3); | | } | | | | for (var type = 0; type <= 19; ++type) { | | var url = 'http://www.52shici.com/original.php?type=' + type + '&page='; | | var totalPages; | | if (!getHTMLindex(url + 1, true)) continue; | | var body = htmldoc.body; | | totalPgs = body.lastChild.innerText.match(/当前\d+\/(\d+)页/)[1]; | | WSH.Echo('totalpages=' + totalPgs); | | for (var m = 1; m <= totalPgs; ++m) { | | if (oCache[url + m] || !getHTMLindex(url + m)) continue; | | body = htmldoc.body; | | var aList = body.getElementsByTagName('a'); | | aIndex.length = 0; | | for (i = 0, l = aList.length; i < l; ++i) { | | aIndex.push(parseURL(aList[i].getAttribute('href'), url)); | | } | | for (i = 0, l = aIndex.length; i < l; ++i) { | | getContent(aIndex[i]); | | } | | tsCache.WriteLine(url + m); | | } | | | | } | | tsOut.close(); | | tsCache.close(); | | WScript.Quit(); | | function getContent(url) { | | if (oCache[url] || !getHTML(url)) return; | | var main = htmldoc.body.children[0]; | | var nodes = main.children; | | try { | | for (var i = 0, l = nodes.length; i < l; ++i) { | | var item = nodes[i]; | | switch (item.className) { | | case 'works-h1': | | var title = 'TTT ' + item.children[0].innerText; | | | | var author = item.children[1].innerText.replace('文/', '作者:'); | | | | break; | | case 'works-content': | | var content = item.innerText; | | i = l; | | break; | | default: | | break; | | } | | } | | tsOut.WriteLine((title + '\r\n' + author + '\r\n' + content).replace(reCrLfS, '<br/>\r\n')); | | tsCache.WriteLine(url); | | } catch (e) { | | showError(e); | | } | | | | } | | | | function showError(err, source) { | | WScript.StdOut.WriteLine('[object Error]' === Object.prototype.toString.call(err) ? | | [ | | err.name, | | 'source: ' + (undefined === source ? '' : source), | | 'number: ' + (err.number >>> 0).toString(16), | | 'Information: ' + err.message | | ].join('\r\n') | | : | | err); | | } | | function getHTMLindex(url, boltotalPages) { | | WScript.StdOut.Write('fetching ' + url + '...') | | xhr.open('GET', url, false); | | xhr.setRequestHeader('Accept', 'text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8'); | | xhr.setRequestHeader('Accept-Language', 'en-US, en; q=0.8, zh-Hans-CN; q=0.5, zh-Hans; q=0.3'); | | xhr.setRequestHeader('host', 'www.52shici.com/'); | | xhr.setRequestHeader('Connection', 'close'); | | xhr.setRequestHeader('Cache-Control', 'no-cache'); | | xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'); | | try { | | xhr.send(); | | | | | | if (200 === xhr.status) { | | htmldoc.open(); | | htmldoc.write('<!DOCTYPE html><html><head></head><body>' + xhr.responseText.match(/<ul[^>]*id="listWorks"[^>]*>[\s\S]*?<\/ul>/i)[0] + (boltotalPages ? xhr.responseText.match(/<span[^>]*class=['"]mt['"][^>]*>[^<]*<\/span>/i)[0] : '') + '</body></html>'); | | } else { | | WScript.StdOut.WriteLine('failed. status:' + xhr.status); | | return false | | } | | } catch (e) { | | WScript.StdOut.WriteLine('failed'); | | return false; | | } finally { | | htmldoc.close(); | | } | | WScript.StdOut.WriteLine('success'); | | return true; | | } | | function getHTML(url) { | | WScript.StdOut.Write('fetching ' + url + '...') | | xhr.open('GET', url, false); | | xhr.setRequestHeader('Accept', 'text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8'); | | xhr.setRequestHeader('Accept-Language', 'en-US, en; q=0.8, zh-Hans-CN; q=0.5, zh-Hans; q=0.3'); | | xhr.setRequestHeader('host', 'www.52shici.com/'); | | xhr.setRequestHeader('Connection', 'close'); | | xhr.setRequestHeader('Cache-Control', 'no-cache'); | | xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'); | | try { | | xhr.send(); | | if (200 === xhr.status) { | | htmldoc.open(); | | var aTxt = xhr.responseText.match(/<div class="main"[\s\S]+(?=<!--main end -->)/i); | | htmldoc.write('<!DOCTYPE html><html><head></head><body>' + aTxt[0] + '</body></html>'); | | } else { | | WScript.StdOut.WriteLine('failed. status:' + xhr.status); | | return false | | } | | } catch (e) { | | WScript.StdOut.WriteLine('failed'); | | return false; | | } finally { | | htmldoc.close(); | | } | | WScript.StdOut.WriteLine('success'); | | return true; | | } | | function parseURL(href, url) { | | var $path = href.replace(/^[^:]*:/, ''); | | if (/^\/\/.*/.test($path)) { | | return href; | | } else if (/^\/.*/.test($path)) { | | return domain + $path; | | } else { | | return url.replace(/[^\/]+$/, '') + $path; | | } | | } | | })();COPY |
52shiciPosts.bat | 0<1 | | | | | | | | | | ; | | (function () { | | var xhr = (function () { | | var aXMLHttpVers = ['MSXML2.XMLHTTP.6.0', 'MSXML2.XMLHTTP.3.0', 'MSXML2.XMLHTTP', 'Microsoft.XMLHTTP']; | | for (var i = 0; i < aXMLHttpVers.length; i++) { | | try { | | return new ActiveXObject(aXMLHttpVers[i]); | | } catch (error) { } | | } | | showError('Can\'t build XMLHTTP automation object.'); | | WScript.Quit(1); | | })(), | | fso = new ActiveXObject('Scripting.FileSystemObject'), | | htmldoc = new ActiveXObject('htmlfile'), | | domain = "http://www.52shici.com", | | outFile = "52shici_posts.txt", | | cacheFile = "cache_52shici_posts.txt", | | tsOut, | | tsCache, | | oCache = {}, | | url, | | reCrLfS = /\r?\n */g, | | i, | | l = 300000, | | strOut, | | nodeContent, | | sTitle, | | sAuthor, | | sContent; | | new ActiveXObject('WScript.Shell').CurrentDirectory = fso.GetParentFolderName(WScript.ScriptFullName); | | try { | | tsCache = fso.OpenTextFile(cacheFile, 1, true); | | } catch (e) { | | showError('can not read cache file ' + cacheFile); | | WScript.Quit(1); | | } | | while (!tsCache.AtEndOfStream) { | | oCache[tsCache.ReadLine()] = true; | | } | | tsCache.Close(); | | try { | | tsCache = fso.OpenTextFile(cacheFile, 8, true); | | } catch (e) { | | showError(e, 'can not write cache file ' + cacheFile); | | WScript.Quit(2); | | } | | try { | | tsOut = fso.OpenTextFile(outFile, 8, true, -1); | | } catch (e) { | | showError(e, 'can not write file ' + outFile); | | WScript.Quit(3); | | } | | | | for (i = 1; i < l; ++i) { | | if (oCache['' + i]) continue; | | url = 'http://www.52shici.com/posts.php?id=' + i; | | if (!getHTML(url)) continue; | | var node = htmldoc.documentElement.childNodes[1].childNodes[0]; | | try { | | for (var m = 0, n = node.childNodes.length; m < n; ++m) { | | var item = node.childNodes[m]; | | switch (item.className) { | | case 'posts-h1': | | sTitle = 'TTT ' + item.innerText; | | break; | | case 'posts-h2': | | sAuthor = '作者:' + item.firstChild.innerText; | | break; | | case 'posts-content': | | item.removeChild(item.lastChild); | | sContent = item.innerText; | | break; | | default: | | break; | | } | | } | | strOut = sTitle + '\r\n' + sAuthor + '\r\n' + sContent; | | tsOut.WriteLine(strOut.replace(reCrLfS, '<br/>\r\n')); | | tsCache.WriteLine(i); | | } catch (e) { | | continue; | | } | | } | | tsOut.close(); | | tsCache.close(); | | WScript.Quit(); | | | | function showError(err, source) { | | WScript.StdOut.WriteLine('[object Error]' === Object.prototype.toString.call(err) ? | | [ | | err.name, | | 'source: ' + (undefined === source ? '' : source), | | 'number: ' + (err.number >>> 0).toString(16), | | 'Information: ' + err.message | | ].join('\r\n') | | : | | err); | | } | | function getHTML(url) { | | WScript.StdOut.Write('fetching ' + url + '...') | | xhr.open('GET', url, false); | | xhr.setRequestHeader('Accept', 'text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8'); | | xhr.setRequestHeader('Accept-Language', 'en-US, en; q=0.8, zh-Hans-CN; q=0.5, zh-Hans; q=0.3'); | | xhr.setRequestHeader('host', 'www.52shici.com/'); | | xhr.setRequestHeader('Connection', 'close'); | | xhr.setRequestHeader('Cache-Control', 'no-cache'); | | xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'); | | try { | | xhr.send(); | | if (200 === xhr.status) { | | htmldoc.open(); | | var aTxt = xhr.responseText.match(/<div class="sidebar"[\s\S]+(?=<div class="posts-do")/i); | | if (!aTxt) throw false; | | htmldoc.write('<!DOCTYPE html><html><head><meta http-equiv="Content-Type" content="text/html; charset=GB2312" /></head><body>' + aTxt[0] + '</div></body></html>'); | | } else { | | WScript.StdOut.WriteLine('failed. status:' + xhr.status); | | return false | | } | | } catch (e) { | | WScript.StdOut.WriteLine('failed'); | | return false; | | } finally { | | htmldoc.close(); | | } | | WScript.StdOut.WriteLine('success'); | | return true; | | } | | })();COPY |
|