批处理之家 - Powered by Discuz! Board

<char xml:id="SD-A442">
<charName>CBETA CHARACTER SD-A442</charName>
<charProp>
<localName>Romanized form in CBETA transcription</localName>
<value>ki</value>
</charProp>
<charProp>
<localName>Character in the Siddham font</localName>
<value>丁</value>
</charProp>
<charProp>
<localName>Romanized form in Unicode transcription</localName>
<value>ki</value>
</charProp>
<mapping cb:dec="1066050" type="PUA">U+104442</mapping>
</char>
<char xml:id="RJ-CAC5">
<charName>CBETA CHARACTER RJ-CAC5</charName>
<charProp>
<localName>Romanized form in CBETA transcription</localName>
<value>hri</value>
</charProp>
<charProp>
<localName>rjchar</localName>
<value>岒</value>
</charProp>
<charProp>
<localName>Romanized form in Unicode transcription</localName>
<value>hri</value>
</charProp>
<mapping cb:dec="1100485" type="PUA">U+10CAC5</mapping>
</char>

复制代码

<# :
cls
@echo off
cd /d "%~dp0"
powershell -NoProfile -ExecutionPolicy bypass "Invoke-Command -ScriptBlock ([ScriptBlock]::Create([IO.File]::ReadAllText('%~f0',[Text.Encoding]::Default))) -Args '%~dp0'"
pause
exit
#>
$path=$args[0];
$outfile=$path+'#result.csv';
$enc=[Text.Encoding]::UTF8;
$fs=New-Object System.IO.FileStream($outfile, [System.IO.FileMode]::Create);
$sw=New-Object System.IO.StreamWriter($fs, $enc);
$files=@(dir -liter $path -recurse|?{('.xml' -eq $_.Extension) -and ($_ -is [System.IO.FileInfo])});
for($i=0;$i -lt $files.length;$i++){
write-host $files[$i].FullName;
$text=[IO.File]::ReadAllText($files[$i].FullName, $enc);
$m1=[regex]::matches($text, '<char xml:id="([^"]*?)">([\s\S]+?)</char>');
if($m1.count -ge 1){
foreach($k in $m1){
$arr=@($files[$i].BaseName,'','','','','','','','','');
$a=$k.groups[1].value;
$arr[1]=$a;
$b=[regex]::match($k.groups[2].value,'[^>]+(?=</charName>)');
if($b.success){$arr[2]=$b.groups[0].value;};
$m2=[regex]::matches($k.groups[2].value, '(?<=<value>).+?(?=</value>)');
$n=3;
if($m2.count -ge 1){
foreach($v in $m2){
$arr[$n]=$v.groups[0].value;
$n++;
};
};
$f=[regex]::match($k.groups[2].value,'<mapping type="[^"]*?unicode">([\s\S]+?)</mapping>');
if($f.success){$arr[6]=$f.groups[1].value;};
$g=[regex]::match($k.groups[2].value,'type="PUA">([\s\S]+?)</mapping>');
if($g.success){$arr[7]=$g.groups[1].value;};
$h=[regex]::match($k.groups[2].value,'cb:dec="([^"]+?)".+?type="PUA"');
if($h.success){$arr[8]=$h.groups[1].value;};
$reg='<g ref="#'+$a+'">(.+?)</g>'
$p=[regex]::match($text, $reg);
if($p.success){$arr[9]=$p.groups[1].value;};
$line=$arr -join ',';
$sw.WriteLine($line);
$sw.Flush();
};
};
};
$sw.Close();
$fs.Close();

复制代码

$srcDir = 'E:\xml'; #存放xml文件的目录路径
$dstFile = 'E:\xml\Result.csv'; #输出文件路径
$xml = New-Object System.XML.XmlDocument;
$fs = New-Object System.IO.StreamWriter($dstFile, $false, [Text.Encoding]::UTF8);
$files = dir -Literal $srcDir -Filter *.xml -Recurse | ?{$_ -is [IO.FileInfo]}
$count = $files.Count;
for($i=0; $i -lt $count; $i++){
$xml.load($files[$i].FullName);
$hash = @{};
forEach( $node In $xml.GetElementsByTagName('g') ){
$key = $node.ref;
if( !$hash.ContainsKey($key) ){ $hash[$key] = $node.innerText; }
}
forEach( $node In $xml.GetElementsByTagName('char') ){
$arr = @($files[$i].BaseName, '0', '0', '0', '0', '0', '0', '0', '0');
$k = 3; $id = $node.id;
if( $id -ne $null ) { $arr[1] = $id; }
if( $node.charName -ne $null ) { $arr[2] = $node.charName; }
$k = 3;
forEach( $prop In $node.charProp ) {
$value = $prop.value;
if( $value -ne $null ) { $arr[$k++] = $value; }
}
forEach( $mapp In $node.mapping ) {
$type = $mapp.type;
if( $type -ne $null ) {
if( $type.EndsWith('unicode') ){
$arr[6] = $mapp.innerText;
} elseif( $type -eq 'PUA' ){
$arr[7] = $mapp.innerText;
}
}
}
if( $hash.ContainsKey('#' + $id) ){ $arr[8] = $hash['#' + $id]; }
$fs.WriteLine('"' + ($arr -join '","') + '"' );
}
if($i % 500 -eq 0 ) { $fs.Flush(); }
}
$fs.Flush();
$fs.Dispose();
echo 'Done'
[console]::ReadLine();

复制代码

$srcDir = 'E:\xml'; #存放xml文件的目录路径
$dstFile = 'E:\xml\Result.csv'; #输出文件路径
$xml = New-Object System.XML.XmlDocument;
$fs = New-Object System.IO.StreamWriter($dstFile, $false, [Text.Encoding]::UTF8);
$files = dir -Literal $srcDir -Filter *.xml -Recurse | ?{$_ -is [IO.FileInfo]}
$count = $files.Count;
for($i=0; $i -lt $count; $i++){
$xml.load($files[$i].FullName);
$mgrNS = New-Object System.XML.XmlNameSpaceManager($xml.NameTable);
$mgrNS.AddNameSpace('ns', $xml.DocumentElement.NameSpaceURI); #xml命名空间
forEach( $node In $xml.SelectNodes('//ns:char', $mgrNS) ){
$arr = @($files[$i].BaseName, '0', '0', '0', '0', '0', '0', '0', '0');
$id = $node.id;
if( $id -ne $null ) { $arr[1] = $id; } #第2列：id
if( $node.charName -ne $null ) { $arr[2] = $node.charName; } #第3列：charName
$k = 3;
forEach( $prop In $node.charProp) {
$value = $prop.value;
if( $value -ne $null ) { $arr[$k++] = $value; } #第4-6列：value
}
forEach( $mapp In $node.mapping ) {
$type = $mapp.type;
if( $type -ne $null ) {
if( $type.EndsWith('unicode') ){
$arr[6] = $mapp.innerText; #第7列：type='unicode'对应的文字
} elseif( $type -eq 'PUA' ){
$arr[7] = $mapp.innerText; #第8列：type='PUA'对应的文字
}
}
}
$g = $xml.SelectSingleNode('//ns:g[@ref="#' + $id + '"]', $mgrNS);
$text = $g.innerText;
if( $text -ne $null ){ $arr[8] = $text; } #第9列：节点g属性ref="#id"对应的文字
$fs.WriteLine('"' + ($arr -join '","') + '"' );
}
if($i % 500 -eq 0 ) { $fs.Flush(); }
}
$fs.Flush();
$fs.Dispose();
echo 'Done';
[console]::ReadLine();

复制代码