查看文章 |
最近想做一个正则表达式的解析工具, [regexp.c]中主要函数声明和源代码 ======================================= 这里是词法分析的函数,他从一个正则表达式中读区一个特定的词,并返回类型, 具体的功能各位看代码吧,不算复杂,我的文档还没有整理完毕。 这里面的枚举类型:SymbolType(在[regexp.h]中)的定义也一块贴出来了,以便各位参考。 ======================================= /*终结符类型*/
typedef enum SymbolType_enum { UNKNOWN = 0, END_REGEXP, START_REGEXP, INPUT_ELE, REPEAT_ZERO_MORE, REPEAT_ZERO_ONCE, REPEAT_ONCE_MORE, REPEAT_RANGE_MN, REPEAT_RANGE_M, REPEAT_RANGE_M_MORE, AND_MACHINE_BEGIN, AND_MACHINE_END, OR_MACHINE_BEGIN, OR_MACHINE_END, NOT_OP, DOT, BACKTRACE, NUMBER, NOT_NUMBER, ALL_SPACE, NOT_ALL_SPACE, AZaz09_, NOT_AZaz09_, LETTER_RANGE }SymbolType; ======================================= /*词法分析,取得一个符号*/
SymbolType getSymbol() { int *cc = &g_symbol_charvalue; char tmp; /*临时变量*/ *cc = getASCII(g_strRegExp[++g_scan_pos]); /*正则表达式结尾*/ if (*cc == NULL) {return g_symbol_type=END_REGEXP;} if (*cc == '(') {return g_symbol_type=AND_MACHINE_BEGIN;} if (*cc == ')') {return g_symbol_type=AND_MACHINE_END;} if (*cc == '[') {return g_symbol_type=OR_MACHINE_BEGIN;} if (*cc == ']') {return g_symbol_type=OR_MACHINE_END;} if (*cc == '|') {return g_symbol_type=BACKTRACE;} if (*cc == '^') {return g_symbol_type=NOT_OP;} if (*cc == '.') {return g_symbol_type=DOT;} if (*cc == '-') {return g_symbol_type=LETTER_RANGE;} /*转义字符*/ if (*cc == '\\') { *cc = getASCII(g_strRegExp[++g_scan_pos]); if (*cc == 'd') {return g_symbol_type=NUMBER;} if (*cc == 'D') {return g_symbol_type=NOT_NUMBER;} if (*cc == 'f') {*cc = '\f'; return g_symbol_type=INPUT_ELE;} if (*cc == 'n') {*cc = '\n'; return g_symbol_type=INPUT_ELE;} if (*cc == 'r') {*cc = '\r'; return g_symbol_type=INPUT_ELE;} if (*cc == 't') {*cc = '\t'; return g_symbol_type=INPUT_ELE;} if (*cc == 'v') {*cc = '\v'; return g_symbol_type=INPUT_ELE;} if (*cc == 's') {return g_symbol_type=ALL_SPACE;} if (*cc == 'S') {return g_symbol_type=NOT_ALL_SPACE;} if (*cc == 'w') {return g_symbol_type=AZaz09_;} if (*cc == 'W') {return g_symbol_type=NOT_AZaz09_;} /*16进制*/ if (*cc == 'x') { tmp = getASCII(g_strRegExp[++g_scan_pos]); if (tmp>='0' && tmp<='9') { *cc = tmp - '0'; } else if (tmp>='a' && tmp<='f') { *cc = tmp - 'a' + 10; } else if (tmp>='A' && tmp<='F') { *cc = tmp - 'A' + 10; } else { /*不是16进制数字,返回原字符'x'*/ g_scan_pos--; return g_symbol_type=INPUT_ELE; } tmp = getASCII(g_strRegExp[++g_scan_pos]); if (tmp>='0' && tmp<='9') { *cc *= 16; *cc += tmp - '0'; } else if (tmp>='a' && tmp<='f') { *cc *= 16; *cc += tmp - 'a' + 10; } else if (tmp>='A' && tmp<='F') { *cc *= 16; *cc += tmp - 'A' + 10; } else { /*这个不是16进制数字,只有一位:\xF*/ g_scan_pos--; return g_symbol_type=INPUT_ELE; } return g_symbol_type=INPUT_ELE; } /*8进制*/ /*以0-3开头的8进制数可以有3位*/ if (*cc>='0' && *cc<='3') { *cc -= '0'; tmp = getASCII(g_strRegExp[++g_scan_pos]); if (tmp>='0' && tmp<='7') { *cc *= 8; *cc += tmp - '0'; } else { /*只有一位8进制数字:\7*/ g_scan_pos--; return g_symbol_type=INPUT_ELE; } tmp = getASCII(g_strRegExp[++g_scan_pos]); if (tmp>='0' && tmp<='7') { *cc *= 8; *cc += tmp - '0'; } else { /*只有两位8进制数字:\77*/ g_scan_pos--; return g_symbol_type=INPUT_ELE; } return g_symbol_type=INPUT_ELE; } /*以4-7开头的8进制可以有2位*/ if (*cc>='4' && *cc<='7') { *cc -= '0'; tmp = getASCII(g_strRegExp[++g_scan_pos]); if (tmp>='0' && tmp<='7') { *cc *= 8; *cc += tmp - '0'; } else { /*只有一位8进制数字:\7*/ g_scan_pos--; return g_symbol_type=INPUT_ELE; } return g_symbol_type=INPUT_ELE; } else { /*[^x0-7dDfnrtvsSwW]*/ return g_symbol_type=INPUT_ELE; } } /*重复运算*/ if (*cc == '*') {return g_symbol_type=REPEAT_ZERO_MORE;} if (*cc == '+') {return g_symbol_type=REPEAT_ONCE_MORE;} if (*cc == '?') {return g_symbol_type=REPEAT_ZERO_ONCE;} if (*cc == '{') { *cc = getASCII(g_strRegExp[++g_scan_pos]); if (*cc>'9' || *cc<'0') {return g_symbol_type=UNKNOWN;} g_repeat_m = 0; /*取{m,n}的m*/ while (*cc>='0' && *cc<='9') { g_repeat_m *= 10; g_repeat_m += *cc-'0'; *cc = getASCII(g_strRegExp[++g_scan_pos]); } /*{m}*/ if (*cc == '}') {return g_symbol_type=REPEAT_RANGE_M;} if (*cc != ',') {return g_symbol_type=UNKNOWN;} *cc = getASCII(g_strRegExp[++g_scan_pos]); /*{m,}*/ if (*cc == '}') {return g_symbol_type=REPEAT_RANGE_M_MORE;} /*{m,n}*/ if (*cc>'9' || *cc<'0') {return g_symbol_type=UNKNOWN;} g_repeat_n = 0; /*取{m,n}的n*/ while (*cc>='0' && *cc<='9') { g_repeat_n *= 10; g_repeat_n += *cc-'0'; *cc = getASCII(g_strRegExp[++g_scan_pos]); } if (*cc == '}') { return g_symbol_type=REPEAT_RANGE_MN; } else { return g_symbol_type=UNKNOWN; } } /*其他的普通字符*/ return g_symbol_type=INPUT_ELE; return g_symbol_type=UNKNOWN; } /*得到c的ASCII码*/ int getASCII(char c) { int ascii; ascii = (int)c; if (ascii <0) { ascii += 256; } return ascii; }
|

