| Const b_txt = "b.txt" |
| Const out_text = "all.tmp" |
| Const FailName = "bug.txt" |
| |
| Set oFSO = CreateObject("Scripting.FileSystemObject") |
| Set oTextStream1 = oFSO.OpenTextFile(out_text, 2, True) |
| Set oTextStream2 = oFSO.OpenTextFile(FailName, 2, True) |
| |
| Set oStream = CreateObject("ADODB.Stream") |
| oStream.Type = 2 |
| oStream.Mode = 3 |
| oStream.Charset = "GBK" |
| oStream.Open() |
| oStream.LoadFromFile b_txt |
| arr = Split(oStream.ReadText(), vbCrLf) |
| oStream.Close() |
| |
| For Each i In arr |
| If oFSO.FileExists(i) Then |
| oStream.Charset = checkCharset(i) |
| oStream.Open() |
| oStream.LoadFromFile i |
| s = oStream.ReadText() |
| oStream.Close() |
| s = RePlace(s, vbCrLf, vbCr) |
| s = RePlace(s, vbLf, vbCr) |
| s = RePlace(s, vbCr, vbCrLf) |
| oTextStream1.WriteLine s & vbCrLf |
| Else |
| oTextStream2.WriteLine i |
| End If |
| Next |
| |
| MsgBox "ok" |
| |
| Function checkCharset(ByVal file) |
| Dim oStream, oRegExp, arr(), s |
| Set oStream = CreateObject("ADODB.Stream") |
| oStream.Type = 1 |
| oStream.Mode = 3 |
| oStream.Open() |
| oStream.LoadFromFile file |
| If oStream.Size >= 2 Then |
| s = Hex(AscB(oStream.Read(1))) |
| s = s & Hex(AscB(oStream.Read(1))) |
| If s = "FFFE" Or s = "FEFF" Then |
| checkCharset = "Unicode" |
| Exit Function |
| End If |
| End If |
| oStream.Position = 0 |
| ReDim arr(oStream.Size - 1) |
| For s = 0 To oStream.Size - 1 |
| arr(s) = ChrW(AscB(oStream.Read(1))) |
| Next |
| oStream.Close() |
| s = "[\xC0-\xDF]([^\x80-\xBF]|$)" & _ |
| "|[\xE0-\xEF].{0,1}([^\x80-\xBF]|$)" & _ |
| "|[\xF0-\xF7].{0,2}([^\x80-\xBF]|$)" & _ |
| "|[\xF8-\xFB].{0,3}([^\x80-\xBF]|$)" & _ |
| "|[\xFC-\xFD].{0,4}([^\x80-\xBF]|$)" & _ |
| "|[\xFE-\xFE].{0,5}([^\x80-\xBF]|$)" & _ |
| "|[\x00-\x7F][\x80-\xBF]" & _ |
| "|[\xC0-\xDF].[\x80-\xBF]" & _ |
| "|[\xE0-\xEF]..[\x80-\xBF]" & _ |
| "|[\xF0-\xF7]...[\x80-\xBF]" & _ |
| "|[\xF8-\xFB]....[\x80-\xBF]" & _ |
| "|[\xFC-\xFD].....[\x80-\xBF]" & _ |
| "|[\xFE-\xFE]......[\x80-\xBF]" & _ |
| "|^[\x80-\xBF]" |
| Set oRegExp = New RegExp |
| oRegExp.MultiLine = False |
| oRegExp.Pattern = s |
| checkCharset = "GBK" |
| If Not oRegExp.Test(Join(arr, "")) Then checkCharset = "UTF-8" |
| End FunctionCOPY |