| | | | | | | import os | | import codecs | | import re | | | | path='.' | | arr=os.listdir(path) | | for it in arr: | | file=os.path.join(path, it) | | if os.path.isfile(file) and (it[-4:].lower()=='.xml'): | | print(file) | | f=codecs.open(file,'r','utf-8') | | text=f.read() | | f.close() | | txtfile=file+'.txt' | | f=codecs.open(txtfile,'w','gb2312') | | m=re.findall(r'<ID\d*>[\s\S]+?<\/ID\d*>',text) | | for it in m: | | brr=['','','','',''] | | a=re.search(r'[^>]+(?=<\/UserType>)',it) | | if a: | | brr[0]=a.group(0) | | b=re.search(r'[^>]+(?=<\/UserVendorId>)',it) | | if b: | | brr[1]=b.group(0) | | c=re.search(r'[^>]+(?=<\/UserSubType>)',it) | | if c: | | brr[2]=c.group(0) | | d=re.search(r'[^>]+(?=<\/Flags>)',it) | | if d: | | brr[3]=d.group(0) | | e=re.search(r'(?<=<!--)[^>]+(?=-->)',it) | | if e: | | brr[4]=e.group(0) | | line='\t'.join(brr) | | f.write(line+'\r\n') | | f.close()COPY |
|