本帖最后由 flashercs 于 2018-9-21 09:51 编辑
| var xhr = (function () { | | var aXMLHttpVers = ['MSXML2.XMLHTTP.6.0', 'MSXML2.XMLHTTP.3.0', 'MSXML2.XMLHTTP', 'Microsoft.XMLHTTP']; | | for (var i = 0; i < aXMLHttpVers.length; i++) { | | try { | | return new ActiveXObject(aXMLHttpVers[i]); | | } catch (error) { } | | } | | showError('Can\'t build XMLHTTP automation object.'); | | WScript.Quit(1); | | })(); | | var fso = new ActiveXObject('Scripting.FileSystemObject'), | | shell = new ActiveXObject('WScript.Shell'), | | curDir, | | url = 'http://www.chinapoesy.com/', | | aTSIndex = ['TangShiAllIndex2.html', 'TangShiAllIndex1.html', 'TangShiAllIndex.html'], | | aTSUrl = [], | | arr, | | outFile = 'tangshi.txt', | | reTSIndex = /<a[^>]+href=(['"])(.+?)\1[^>]*>\s*第\d+卷\s*<\/a>/gi, | | rePoem = /<li[^>]+class="LiTitle"[^>]*>\s*<a[^>]+class="Green"[^>]*>\s*(.+?)\s*<\/a>[\S\s]+?<\/li>\s*<li[^>]+class="LiContent[^"]*"[^>]*>\s*<b>(.+?)<\/b>([\S\s]+?)<\/li>/gi, | | rePB = /<[^>]*>|\r|\n/g, | | reNextPage = /<a[^>]+href="\/([^"]+)"[^>]*>\s*<img\s+src="\/Images\/Pager\/nextn.gif"\s+border="0"[^>]*>\s*<\/a>/i, | | aFields = ['作者', '标题', '正文'], | | sSplit = '\t', | | ts, | | sHtml, | | i, | | l, | | timer; | | | | timer = new Date(); | | curDir = shell.CurrentDirectory = fso.GetParentFolderName(WScript.ScriptFullName); | | try { | | ts = fso.OpenTextFile(outFile, 2, true); | | } catch (err) { | | showError(err, 'Writing to ' + outFile); | | WScript.Quit(2); | | } | | | | for (i = aTSIndex.length; i >= 0; i--) { | | sHtml = getHtml(url + aTSIndex[i]); | | while (arr = reTSIndex.exec(sHtml)) { | | aTSUrl.push(arr[2]); | | } | | } | | | | ts.WriteLine(aFields.join(sSplit)); | | for (i = 0, l = aTSUrl.length; i < l; i++) { | | writePoem(url + aTSUrl[i]); | | | | while (arr = reNextPage.exec(sHtml)) { | | writePoem(url + arr[1]); | | } | | } | | ts.Close(); | | WScript.Echo('Mission complete.\nTime elapsed: ' + (new Date() - timer) / 1000 + 's'); | | WScript.Quit(); | | | | function getHtml(URL) { | | xhr.open('GET', URL, false); | | xhr.send(); | | if (200 === xhr.status) { | | return xhr.responseText; | | } | | showError('fetch URI "' + URL + '" failed.\nstatus: ' + xhr.status); | | return ''; | | } | | function writePoem(URL) { | | sHtml = getHtml(URL); | | while (arr = rePoem.exec(sHtml)) { | | ts.WriteLine([arr[1], arr[2], arr[3].replace(rePB, '')].join(sSplit)); | | } | | } | | function showError(err, source) { | | WScript.Echo('[object Error]' === Object.prototype.toString.call(err) ? | | [ | | err.name, | | 'source: ' + (undefined === source ? '' : source), | | 'number: ' + (err.number >>> 0).toString(16), | | 'equipment: ' + (err.number >> 16 & 0x1FFF), | | 'code: ' + (err.number & 0xFFFF), | | 'Information: ' + err.message | | ].join('\n') | | : | | err); | | }COPY |
|