可以尝试下多线程,线程数越多越快,取决你的cpu
数据处理函数可以自行优化- #&cls&cd /d "%~dp0" & @powershell -c "Get-Content '%~0' | Select-Object -Skip 1 | Out-String | Invoke-Expression" & pause&exit
- cls
- $t1 = Get-Date
- $src_dir = '新建文件夹'
- $dst_dir = 'out'
-
- [void][System.IO.Directory]::CreateDirectory($dst_dir)
-
- #线程函数 处理数据
- $HandleGroupJob = {
- #线程参数
- param($dst_dir,$groupInfo)
- Write-Host $groupInfo.Name
- #汇总去重并筛选
- $set = New-Object 'System.Collections.Generic.HashSet[string]'
- & { $groupInfo.Group | foreach { [IO.File]::ReadAllLines($_.FullName)} } | foreach {
- if($_ -match '\S+\s+\d+\s+-?\d{2,}'){
- [void]$set.Add(($_ -replace 'SH','1|' -replace 'SZ','0|' -replace '\s+','|'))
- }
- }
- #输出
- Out-File -InputObject $set -FilePath ('{0}\{1}' -f $dst_dir,$groupInfo.Name)
- $set = $null
- return ($groupInfo.Name + ' 已完成')
- }
-
- #多线程设置
- $pool = [runspacefactory]::CreateRunspacePool(1,10) #最多10个线程并发
- $pool.Open()
- $threads = New-Object 'System.Collections.ArrayList'
- $results = New-Object 'System.Collections.ArrayList'
-
- '开始创建线程...'
- Get-ChildItem -Recurse -Path $src_dir -Filter '*.txt' | Group-Object {$_.Name} | foreach {
- $_.Name
- $thread = [powershell]::Create()
- $thread.RunspacePool = $pool
- [void]$thread.AddScript($HandleGroupJob)
- [void]$thread.AddArgument($dst_dir)
- [void]$thread.AddArgument($_)
- [void]$threads.Add($thread)
- [void]$results.Add($thread.BeginInvoke())
- }
- '-------------------------------'
-
- '等待线程结束'
- while($true){
- $all_done = $true
- for($i = 0; $i -lt $results.Count; $i++){
- if($results[$i] -ne $null){
- if($results[$i].IsCompleted){
- $threads[$i].EndInvoke($results[$i])
- $threads[$i].Dispose()
- $threads[$i] = $null
- $results[$i] = $null
- [System.GC]::Collect()
- } else {
- $all_done = $false
- }
- }
- }
- if($all_done){ break }
- Start-Sleep -Milliseconds 500
- }
-
- #关闭线程池
- $pool.Close()
- '-------------------'
- '{0} -> {1}' -f $t1,(Get-Date)
复制代码
|