本帖最后由 happy886rr 于 2016-12-5 22:48 编辑
[2016/11/30]修复了ansi编码下/G:开关的漏洞,不作版本号提升,不作重新编译,只在核心源码中作了更新。
RF.EXE
-----------------------------------------------------------------------------
取代find、findstr的正则查找工具,智能识别文本编码,自动判断BOM类型。原生支持
ANSI、UTF8、Unicode、Unicode big endian编码。准确识别有无BOM类型。
支持pcre正则表达式查找、精确字符串查找,多种开关,完全模仿微软findstr的开关及
使用方法。具体用法与findstr大同小异,请自行品味。
源码完全开放,gcc、tcc均可编译。VC稍加修改亦能通过。
-----------------------------------------------------------------------------
COPYRIGHT@2016~2018 BY HAPPY, VERSION 1.0
REGEX FIND TOOLS
-----------------------------------------------------------------------------
rf [/F|/N|/V] [/S:[match string] ]|
[/R:[pcre expression] ]|
[/G:[ANSI strings file]] [txtfile]
-----------------------------------------------------------------------------
/H Show help information
/F Finds the line to which matches
/N Print the line number
/V Shows all rows that do not contain matching regulars
/S: Finds the line to which the string matches
/R: Finds the line to which the regular expression matches
/G: Gets the matching strings from a ANSI strings file
-----------------------------------------------------------------------------
11/06/2016
图片存为a.zip解压即是

核心代码: | | | | | | | | | | | | | #define PCRE_STATIC | | #include "pcre.h" | | #include <stdio.h> | | #include <string.h> | | #include <locale.h> | | #include <stdbool.h> | | #include <windows.h> | | | | | | #define BUFF_SIZE 4096 | | | | #define CHECK_SIZE 16383 | | | | | | char* UnicodeToANSI(const wchar_t* Str) | | { | | int L=WideCharToMultiByte(CP_ACP, 0, Str, -1, NULL, 0, NULL, NULL); | | char* Out=(char *)calloc(L+1, sizeof(char)); | | WideCharToMultiByte(CP_ACP, 0, Str, -1, Out, L, NULL, NULL); | | return Out; | | } | | wchar_t* UTF8ToUnicode(const char* Str) | | { | | int L=MultiByteToWideChar(CP_UTF8, 0, Str,-1, NULL, 0); | | wchar_t* Out=(wchar_t *)calloc(L+1, sizeof(wchar_t)); | | MultiByteToWideChar(CP_UTF8, 0, Str, -1, (LPWSTR)Out, L); | | return Out; | | } | | bool isUTF8(const char* Str) | | { | | if(!Str){ | | return false; | | } | | const unsigned char* bytes=(const unsigned char *)Str; | | while(*bytes){ | | if( | | ( | | bytes[0]<=0x7F || | | bytes[0]==0x09 || | | bytes[0]==0x0A || | | bytes[0]==0x0D || | | (0x20<=bytes[0] && bytes[0]<=0x7E) | | ) | | ){ | | bytes+=1; | | continue; | | } | | if( | | ( | | (0xC2<=bytes[0] && bytes[0]<=0xDF) && | | (0x80<=bytes[1] && bytes[1]<=0xBF) | | ) | | ){ | | bytes+=2; | | continue; | | } | | if( | | ( | | (bytes[0]==0xE0) && | | (0xA0<=bytes[1] && bytes[1]<=0xBF) && | | (0x80<=bytes[2] && bytes[2]<=0xBF) | | ) || | | ( | | ( | | (0xE1<=bytes[0] && bytes[0]<=0xEC)|| | | bytes[0]==0xEE || | | bytes[0]==0xEF | | ) && | | (0x80<=bytes[1] && bytes[1]<=0xBF) && | | (0x80<=bytes[2] && bytes[2]<=0xBF) | | ) || | | ( | | (bytes[0]==0xED) && | | (0x80<=bytes[1] && bytes[1]<=0x9F) && | | (0x80<=bytes[2] && bytes[2]<=0xBF) | | ) | | ){ | | bytes+=3; | | continue; | | } | | if( | | ( | | (bytes[0]==0xF0) && | | (0x90<=bytes[1] && bytes[1]<=0xBF) && | | (0x80<=bytes[2] && bytes[2]<=0xBF) && | | (0x80<=bytes[3] && bytes[3]<=0xBF) | | ) || | | ( | | (0xF1<=bytes[0] && bytes[0]<=0xF3) && | | (0x80<=bytes[1] && bytes[1]<=0xBF) && | | (0x80<=bytes[2] && bytes[2]<=0xBF) && | | (0x80<=bytes[3] && bytes[3]<=0xBF) | | ) || | | ( | | (bytes[0]==0xF4) && | | (0x80<=bytes[1] && bytes[1]<=0x8F) && | | (0x80<=bytes[2] && bytes[2]<=0xBF) && | | (0x80<=bytes[3] && bytes[3]<=0xBF) | | ) | | ){ | | bytes+=4; | | continue; | | } | | return false; | | } | | return true; | | } | | | | | | int CheckBom(FILE* fp) | | { | | unsigned char* buf=(unsigned char*)calloc(3,sizeof(unsigned char)); | | unsigned char* buf2; | | fseek(fp, 0, SEEK_SET); | | fread(buf, sizeof(unsigned char), 3, fp); | | if(buf[0]==0xEF && buf[1]==0xBB && buf[2]==0xBF){return 3;} | | else if(buf[0]==0xFF && buf[1]==0xFE){return 5;} | | else if(buf[0]==0xFE && buf[1]==0xFF){return 6;} | | else{ | | fseek(fp, 0, SEEK_SET); | | buf2=(unsigned char*)calloc(CHECK_SIZE,sizeof(unsigned char)); | | fread(buf2, sizeof(unsigned char), CHECK_SIZE, fp); | | if(isUTF8(buf2)){ | | free(buf2); | | return 2; | | } | | free(buf2); | | } | | return 1; | | } | | | | | | int RFindLine(FILE* fp, char* src, int FLAG) | | { | | bool mode=false; | | int BOM=0, EN=0, i=0, n=0; | | FILE* sp; | | char* Li=(char *)malloc(BUFF_SIZE*sizeof(char)); | | char* LineV;char* LineU; | | pcre *re; | | int erroffset, ovector[30], rc; | | const char *error; | | | | if ( (FLAG&0x0F)==0x02 ){ | | pcre_compile(src, 0, &error, &erroffset, NULL); | | if( (re=pcre_compile(src, 0, &error, &erroffset, NULL)) == NULL ){ | | fputs("PCRE compilation failed", stderr); | | exit(1); | | } | | }else if( (FLAG&0x0F)==0x03 ){ | | if( (sp=fopen(src, "rb"))==NULL ){ | | fputs("Read matching failed", stderr); | | exit(1); | | } | | } | | | | BOM=CheckBom(fp); | | if (BOM==1 || BOM==2){ | | EN=0; | | }else if(BOM==5 || BOM==6){ | | EN=2; | | }else if(BOM==3){ | | EN=3; | | } | | | | fseek(fp, EN, SEEK_SET); | | | | if(BOM==1){ | | char* Line=(char *)malloc(BUFF_SIZE*sizeof(char)); | | while(!feof(fp)){ | | memset(Line, 0, BUFF_SIZE*sizeof(char)); | | fgets(Line, BUFF_SIZE, fp); | | i++; | | if ( (FLAG&0x0F)==0x01 ){ | | if( strstr(Line, src)!=NULL ){ | | mode=true; | | }else{ | | mode=false; | | } | | }else if( (FLAG&0x0F)==0x02 ){ | | if( pcre_exec(re, NULL, Line, strlen(Line), 0, 0, ovector, 30) >= 0 ){ | | mode=true; | | }else{ | | mode=false; | | } | | }else if( (FLAG&0x0F)==0x03 ){ | | mode=false; | | fseek(sp, 0, SEEK_SET); | | while(!feof(sp)){ | | memset(Li, 0, BUFF_SIZE*sizeof(char)); | | fgets(Li, BUFF_SIZE, sp); | | char* tp=Li; | | while(*tp=='\t' ||*tp==' ' ||*tp=='\r' ||*tp=='\n'){tp++;} | | int tp_LEN=strlen(tp); | | tp[tp_LEN-2]=(tp[tp_LEN-2]=='\r')?'\0':tp[tp_LEN-2]; | | if(tp[0]!='\0' &&strstr(Line, tp)!=NULL){ | | mode=true; | | break; | | } | | } | | } | | | | if( (FLAG>>4)==0x03 && mode==true ){ | | fprintf(stdout, "%d:%s", i, Line); | | }else if( | | ((FLAG>>4)==0x02 && mode==false)|| | | ((FLAG>>4)==0x01 && mode==true ) | | ){ | | fputs(Line, stdout); | | } | | } | | }else if(BOM==2 || BOM==3){ | | char* Line=(char *)malloc(BUFF_SIZE*sizeof(char)); | | while(!feof(fp)){ | | memset(Line, 0, BUFF_SIZE*sizeof(char)); | | fgets(Line, BUFF_SIZE, fp); | | i++; | | if(BOM>1){LineU=UnicodeToANSI(UTF8ToUnicode(Line));} | | if ( (FLAG&0x0F)==0x01 ){ | | if( strstr(LineU, src)!=NULL ){ | | mode=true; | | }else{ | | mode=false; | | } | | }else if( (FLAG&0x0F)==0x02 ){ | | if( pcre_exec(re, NULL, LineU, strlen(LineU), 0, 0, ovector, 30) >= 0 ){ | | mode=true; | | }else{ | | mode=false; | | } | | }else if( (FLAG&0x0F)==0x03 ){ | | fseek(sp, 0, SEEK_SET); | | while(!feof(sp)){ | | memset(Li, 0, BUFF_SIZE*sizeof(char)); | | fgets(Li, BUFF_SIZE, sp); | | if( strstr(LineU, Li)!=NULL ){ | | mode=true; | | break; | | } | | } | | } | | | | if( (FLAG>>4)==0x03 && mode==true ){ | | fprintf(stdout, "%d:%s", i, LineU); | | }else if( | | ((FLAG>>4)==0x02 && mode==false)|| | | ((FLAG>>4)==0x01 && mode==true ) | | ){ | | fputs(LineU, stdout); | | } | | } | | }else if(BOM==5){ | | wchar_t* LineW=(wchar_t *)calloc(BUFF_SIZE, sizeof(wchar_t)); | | while(!feof(fp)){ | | memset(LineW, 0, BUFF_SIZE*sizeof(wchar_t)); | | fgetws(LineW, BUFF_SIZE, fp); | | i++; | | LineV=UnicodeToANSI(LineW); | | if ( (FLAG&0x0F)==0x01 ){ | | if( strstr(LineV, src)!=NULL ){ | | mode=true; | | }else{ | | mode=false; | | } | | }else if( (FLAG&0x0F)==0x02 ){ | | if( pcre_exec(re, NULL, LineV, strlen(LineV), 0, 0, ovector, 30) >= 0 ){ | | mode=true; | | }else{ | | mode=false; | | } | | }else if( (FLAG&0x0F)==0x03 ){ | | fseek(sp, 0, SEEK_SET); | | while(!feof(sp)){ | | memset(Li, 0, BUFF_SIZE*sizeof(char)); | | fgets(Li, BUFF_SIZE, sp); | | if( strstr(LineV, Li)!=NULL ){ | | mode=true; | | break; | | } | | } | | } | | if ( (FLAG>>4)==0x03 && mode==true ){ | | fprintf(stdout, "%d:%s", i, LineV); | | }else if( | | ((FLAG>>4)==0x02 && mode==false)|| | | ((FLAG>>4)==0x01 && mode==true ) | | ){ | | fputs(LineV, stdout); | | } | | } | | }else if(BOM==6){ | | wchar_t* LineW=(wchar_t *)calloc(BUFF_SIZE, sizeof(wchar_t)); | | while(!feof(fp)){ | | memset(LineW, 0, BUFF_SIZE*sizeof(wchar_t)); | | fgets(LineW, BUFF_SIZE, fp); | | i++; | | for(n=0;LineW[n]!=0x0000;n++){ | | LineW[n]=(LineW[n]&0x00FF)<<8|(LineW[n]&0xFF00)>>8; | | } | | LineV=UnicodeToANSI(LineW); | | if ( (FLAG&0x0F)==0x01 ){ | | if( strstr(LineV, src)!=NULL ){ | | mode=true; | | }else{ | | mode=false; | | } | | }else if( (FLAG&0x0F)==0x02 ){ | | if( pcre_exec(re, NULL, LineV, strlen(LineV), 0, 0, ovector, 30) >= 0 ){ | | mode=true; | | }else{ | | mode=false; | | } | | }else if( (FLAG&0x0F)==0x03 ){ | | fseek(sp, 0, SEEK_SET); | | while(!feof(sp)){ | | memset(Li, 0, BUFF_SIZE*sizeof(char)); | | fgets(Li, BUFF_SIZE, sp); | | if( strstr(LineV, Li)!=NULL ){ | | mode=true; | | break; | | } | | } | | } | | if ( (FLAG>>4)==0x03 && mode==true ){ | | fprintf(stdout, "%d:%s", i, LineV); | | }else if( | | ((FLAG>>4)==0x02 && mode==false)|| | | ((FLAG>>4)==0x01 && mode==true ) | | ){ | | fputs(LineV, stdout); | | } | | } | | } | | fflush(stdout); | | if( (FLAG&0x0F)==0x02 ){pcre_free(re);} | | free(Li); | | return 0; | | } | | | | | | void Help_Info(FILE* stream, int Exit_Code) | | { | | fprintf(stream, | | "COPYRIGHT@2016~2018 BY HAPPY, VERSION 1.0\n" | | "REGEX FIND TOOLS\n" | | "-----------------------------------------------------------------------------\n" | | "rf [/F|/N|/V] [/S:[match string] ]|\n" | | " [/R:[pcre expression] ]|\n" | | " [/G:[ANSI strings file]] [txtfile]\n" | | "-----------------------------------------------------------------------------\n" | | " /H Show help information\n" | | " /F Finds the line to which matches\n" | | " /N Print the line number\n" | | " /V Shows all rows that do not contain matching regulars\n" | | " /S: Finds the line to which the string matches\n" | | " /R: Finds the line to which the regular expression matches\n" | | " /G: Gets the matching strings from a ANSI strings file\n" | | "-----------------------------------------------------------------------------\n" | | " 11/06/2016\n" | | ); | | exit(Exit_Code); | | } | | | | | | int main(int argc, char** argv) | | { | | FILE* fp; | | unsigned char FLAG=0; | | if( (argc==4) && (argv[1][0]=='/') && (argv[2][0]=='/') && (argv[2][2]== ':') ){ | | switch(argv[1][1]){ | | case 'F': | | case 'f': | | FLAG|=0x10; | | break; | | case 'V': | | case 'v': | | FLAG|=0x20; | | break; | | case 'N': | | case 'n': | | FLAG|=0x30; | | break; | | default: | | Help_Info(stderr, 2); | | } | | switch(argv[2][1]){ | | case 'S': | | case 's': | | FLAG|=0x01; | | break; | | case 'R': | | case 'r': | | FLAG|=0x02; | | break; | | case 'G': | | case 'g': | | FLAG|=0x03; | | break; | | default: | | Help_Info(stderr, 1); | | } | | }else { | | Help_Info(stderr, 3); | | } | | | | if( (fp=fopen(argv[3], "rb"))==NULL ){ | | fputs("Read failed", stderr); | | return 3; | | } | | RFindLine(fp, argv[2]+3, FLAG); | | fclose(fp); | | return 0; | | }COPY |
|