| 0<1 |
| |
| |
| |
| |
| ; |
| function makeXHR() { |
| for (var i = 0; i < aXMLHttpVers.length; i++) { |
| try { |
| return new ActiveXObject(aXMLHttpVers[i]); |
| } catch (error) { } |
| } |
| showError('Can\'t build XMLHTTP automation object.'); |
| WScript.Quit(1); |
| } |
| function makeHtmldoc() { |
| var htmldoc = new ActiveXObject('htmlfile'); |
| htmldoc.open(); |
| htmldoc.write('<!DOCTYPE html><html><head><meta charset="utf-8" /><meta http-equiv="X-UA-Compatible" content="IE=edge"><title>Page Title</title><meta name="viewport" content="width=device-width, initial-scale=1"></head><body></body></html>'); |
| htmldoc.close(); |
| return htmldoc; |
| } |
| function makeADOStream() { |
| var oADO = new ActiveXObject('ADODB.Stream'); |
| oADO.Mode = 3; |
| return oADO; |
| } |
| function byte2str(ado, byteArr, sEncoding) { |
| ado.Type = 1; |
| ado.Open(); |
| ado.Write(byteArr); |
| ado.Position = 0; |
| ado.Type = 2; |
| ado.Charset = sEncoding; |
| var s = ado.ReadText(-1); |
| ado.Close(); |
| return s; |
| } |
| function xhrWorkers() { |
| var url; |
| var xhr_ = makeXHR(); |
| var htmldoc = makeHtmldoc(); |
| var oADO = makeADOStream(); |
| var re = /<!-- 诗歌正文开始 -->[\s\S]+(?=<!-- 诗歌正文结束 -->)/i; |
| xhr_.onReadyStateChange = function () { |
| if (xhr_.readyState === 4) { |
| if (xhr_.status === 200) { |
| var str = byte2str(oADO, xhr_.responseBody, 'utf-8'); |
| var m = str.match(re); |
| if (m) { |
| try { |
| htmldoc.body.innerHTML = m[0]; |
| var title = 'TTT ' + htmldoc.body.getElementsByTagName('h3')[0].innerText; |
| var author = htmldoc.body.children[1].children[0].children[0].innerText; |
| var content = htmldoc.body.getElementsByClassName('m-lg font14')[0].innerText; |
| tsOut.WriteLine((title + '\r\n' + author + '\r\n' + content + '\r\n').replace(/\r?\n\s*/g, '<br/>\r\n')); |
| tsCache.WriteLine(url); |
| WScript.StdOut.WriteLine(url); |
| } catch (e) { |
| |
| } |
| } |
| } else { |
| WScript.StdOut.WriteLine(url + ' status=' + xhr_.status); |
| } |
| aXhr.push(worker); |
| } |
| }; |
| var worker = { |
| job: function (method, uri, async) { |
| url = uri; |
| xhr_.open(method, uri, async); |
| |
| xhr_.send(); |
| } |
| }; |
| return worker; |
| } |
| function setRequestHeaders(xhr_) { |
| xhr_.setRequestHeader('Accept', 'text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8'); |
| xhr_.setRequestHeader('Accept-Language', 'en-US, en; q=0.8, zh-Hans-CN; q=0.5, zh-Hans; q=0.3'); |
| xhr_.setRequestHeader('Accept-Encoding', 'gzip, deflate'); |
| xhr_.setRequestHeader('TE', 'gzip, deflate'); |
| |
| xhr_.setRequestHeader('Cache-Control', 'no-cache'); |
| xhr_.setRequestHeader('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'); |
| } |
| function parseURL(href, url) { |
| var $path = href.replace(/^[^:]*:/, ''); |
| if (/^\/\/.*/.test($path)) { |
| return href; |
| } else if (/^\/.*/.test($path)) { |
| return domain + $path; |
| } else { |
| return url.replace(/[^\/]+$/, '') + $path; |
| } |
| } |
| function showError(err, source) { |
| WScript.StdOut.WriteLine('[object Error]' === Object.prototype.toString.call(err) ? |
| [ |
| err.name, |
| 'source: ' + (undefined === source ? '' : source), |
| 'number: ' + (err.number >>> 0).toString(16), |
| 'Information: ' + err.message |
| ].join('\r\n') |
| : |
| err); |
| } |
| var aXMLHttpVers = ['MSXML2.XMLHTTP.6.0', 'MSXML2.XMLHTTP.3.0', 'MSXML2.XMLHTTP', 'Microsoft.XMLHTTP'], |
| fso = new ActiveXObject('Scripting.FileSystemObject'), |
| domain = "http://www.zgshige.com", |
| outFile = "zgshige.txt", |
| cacheFile = "cache_zgshige.txt", |
| tsOut, |
| tsCache, |
| oCache = {}; |
| new ActiveXObject('WScript.Shell').CurrentDirectory = fso.GetParentFolderName(WScript.ScriptFullName); |
| try { |
| tsCache = fso.OpenTextFile(cacheFile, 1, true); |
| } catch (e) { |
| WScript.Echo('can not read cache file ' + cacheFile); |
| WScript.Quit(1); |
| } |
| while (!tsCache.AtEndOfStream) { |
| oCache[tsCache.ReadLine()] = true; |
| } |
| tsCache.Close(); |
| try { |
| tsCache = fso.OpenTextFile(cacheFile, 8, true); |
| } catch (e) { |
| showError(e, 'can not write cache file ' + cacheFile); |
| WScript.Quit(2); |
| } |
| try { |
| tsOut = fso.OpenTextFile(outFile, 8, true, -1); |
| } catch (e) { |
| showError(e, 'can not write file ' + outFile); |
| WScript.Quit(3); |
| } |
| var oTasks = { |
| atEnd: false, |
| current: null, |
| moveNext: (function () { |
| var aIndex = []; |
| |
| var nextPage = 'http://www.zgshige.com/zcms/catalog/15112/pc/index_201.shtml'; |
| var xhr = makeXHR(); |
| var htmldoc = makeHtmldoc(); |
| var oADO = makeADOStream(); |
| var re = /<!-- 主体开始 -->[\S\s]*?(?=<!-- 主体结束 -->)/i; |
| var url = nextPage; |
| xhr.onReadyStateChange = function () { |
| if (xhr.readyState === 4) { |
| if (200 === xhr.status) { |
| var str = byte2str(oADO, xhr.responseBody, 'utf-8'); |
| var m = str.match(re); |
| if (m) { |
| htmldoc.body.innerHTML = m[0]; |
| var nodes = htmldoc.documentElement.getElementsByClassName('fc_ch1'); |
| for (nextPage = null, i = 0, l = nodes.length; i < l; ++i) { |
| if (nodes[i].firstChild.nodeValue === '下一页') { |
| nextPage = parseURL(nodes[i].parentNode.getAttribute('href'), url); |
| break; |
| } |
| } |
| if (!oCache[url]) { |
| nodes = htmldoc.body.getElementsByClassName('fc-green text-uppercase'); |
| for (var i = 0, l = nodes.length; i < l; ++i) { |
| var item = parseURL(nodes[i].getAttribute('href'), url); |
| if (!oCache[item]) { |
| aIndex.push(item); |
| } |
| } |
| aIndex.push(url); |
| } |
| if (url = nextPage) { |
| xhr.open('GET', url, true); |
| |
| xhr.send(); |
| } else { |
| oTasks.atEnd = true; |
| return false; |
| } |
| |
| } else { |
| nextPage = null; |
| oTasks.atEnd = true; |
| return false; |
| } |
| } else { |
| nextPage = null; |
| oTasks.atEnd = true; |
| return false; |
| } |
| } |
| }; |
| xhr.open('GET', url, true); |
| setRequestHeaders(xhr); |
| xhr.send(); |
| return function () { |
| return aIndex.shift(); |
| }; |
| })() |
| }; |
| var maxThreads = 20, |
| aXhr = [], |
| i, l, task, |
| reIndex = /index.*html$/i; |
| for (i = 0; i < maxThreads; ++i) { |
| aXhr.push(xhrWorkers()); |
| } |
| while (!oTasks.atEnd) { |
| while (task = oTasks.moveNext()) { |
| if (reIndex.test(task)) { |
| tsCache.WriteLine(task); |
| WScript.StdOut.WriteLine(task); |
| continue; |
| } |
| while (!aXhr.length) { |
| WScript.Sleep(10); |
| } |
| aXhr.shift().job('GET', task, true); |
| } |
| WScript.Sleep(100); |
| } |
| while (task = oTasks.moveNext()) { |
| if (reIndex.test(task)) { |
| tsCache.WriteLine(task); |
| WScript.StdOut.WriteLine(task); |
| continue; |
| } |
| while (!aXhr.length) { |
| WScript.Sleep(10); |
| } |
| aXhr.shift().job('GET', task, true); |
| } |
| |
| tsOut.close(); |
| tsCache.close(); |
| WScript.Quit(0);COPY |