powershell 直接从word文档导出txt 这里档名为 a.docx- <# : batch portion (begins PowerShell multi-line comment block)
- @echo off & setlocal
- powershell -noprofile -NoLogo "iex (${%~f0} | out-string)"
- pause
- exit
- #>
- $word = New-Object -ComObject Word.Application
- $file = (ls a.docx).FullName
- $doc = $word.Documents.Open($file)
- $text = $doc.Content.Text
- $pattern =[regex] '(?i)(Module\d+\s+unit\d+)[\r\n]*(.+?)(?=Module\d+\s+unit\d+|$)'
- $paragraphs = [regex]::matches($text,$pattern)
- $doc.Close()
- $word.Quit()
- $paragraphs.ForEach({[IO.File]::WriteAllText( $_.Groups[1].Value+ '.txt',$_.Groups[2].Value,[Text.Encoding]::Default)})
复制代码
|