- <# :
- cls&echo off&cd /d "%~dp0"&mode con lines=5000
- powershell -NoProfile -ExecutionPolicy bypass "[IO.File]::ReadAllText(\"%~f0\",[Text.Encoding]::GetEncoding('GB2312'))|Invoke-Expression"
- pause
- exit
- #>
- $findword="qq.com";
- $txtfile1=".\1.txt";
- $txtfile2=".\2.txt";
-
- function gethtml($u){
- $t='';
- for($j=1;$j -le 3;$j++){
- try{
- $req=Invoke-WebRequest -Uri $u;
- $t=$req.Content;
- break;
- }catch{
- write-host ('第'+$j.ToString()+'次获取网页源码失败');
- start-sleep -Seconds 3;
- }
- }
- return $t;
- }
-
- $newfolder=".\源码";
- if(-not (test-path -literal $newfolder)){[void][IO.Directory]::CreateDirectory($newfolder);}
- $enc=[Text.Encoding]::GetEncoding('GB2312');
- $fs=New-Object System.IO.FileStream($txtfile2, [System.IO.FileMode]::Create);
- $sw=New-Object System.IO.StreamWriter($fs, $enc);
- $text=[IO.File]::ReadAllLines($txtfile1, $enc);
- for($i=0;$i -lt $text.count;$i++){
- write-host $text[$i];
- $html=gethtml $text[$i];
- $outfile=$newfolder.trimend('\')+'\'+($i+1).ToString()+'.txt';
- [Io.File]::WriteAllText($outfile, $html, [Text.Encoding]::GetEncoding('UTF-8'))
- $m=[regex]::matches($html, 'href=[''"]?([^''" ]+)');
- $isexist=$false;
- foreach($it in $m){
- if($it.groups[1].value.Contains($findword)){
- $sw.WriteLine($it.groups[1].value);
- $sw.Flush();
- $isexist=$true;
- }
- }
- if($isexist){
- write-host 'match' -ForegroundColor green;
- }
- }
- $sw.Close();
- $fs.Close();
复制代码
|