#include "compiler.h" #include "rlib.h" #include #define ifwhile(cond, action) \ bool _did_doit; \ _did_doit = false; \ while (cond) { \ action \ }; \ if (_did_doit) // bool is_valid = expr ? 1 : 0; // repeat: // bool valid = expr != NULL *expr > 0; // bool _expr_true = false; // if(res){ // _expr_true = true; //} // bool ifwhile(bool res){ // //} struct rrex_executor_t; typedef bool (*rrex_function)(struct rrex_executor_t *); typedef struct rrex_executor_t { char *previous_position; char previous; char *bdata; char *_bdata; char *sdata; char *_sdata; long current; bool valid; rrex_function functions[30]; } rrex_executor_t; bool rrex_match(char *sdata, char *bdata); bool rrex_execute_one(rrex_executor_t *t); bool rrex(char *s, char *r); bool rrex(char *s, char *r) { char b[4096]; rrex_compile(r, b); return rrex_match(s, b); } bool rrex_match_sol(rrex_executor_t *executor) { executor->previous = RN_ROOF; executor->previous_position = executor->bdata; bool valid = executor->sdata == executor->_sdata; if (valid) { executor->bdata++; } return valid; } bool rrex_match_dot(rrex_executor_t *executor) { executor->previous = RN_DOT; executor->previous_position = executor->bdata; if ((executor->sdata)[0] != '\n') { executor->sdata++; executor->bdata++; return true; } return false; } bool rrex_match_digit(rrex_executor_t *executor) { if (isdigit(*executor->sdata)) { executor->sdata++; executor->bdata++; return true; } return false; } bool rrex_match_whitespace(rrex_executor_t *executor) { if (*executor->sdata == ' ' || *executor->sdata == '\t' || *executor->sdata == '\n' || *executor->sdata == '\r') { executor->sdata++; executor->bdata++; return true; } return false; } bool rrex_match_word(rrex_executor_t *executor) { if (isalpha((executor->sdata)[0]) || (executor->sdata)[0] == '_') { executor->sdata++; executor->bdata++; return true; } return false; } bool rrex_match_not_word(rrex_executor_t *executor) { if (!(isalpha(*executor->sdata) || *executor->sdata == '_')) { executor->sdata++; executor->bdata++; return true; } return false; } bool rrex_match_not_digit(rrex_executor_t *executor) { if (!(isdigit(*executor->sdata))) { executor->sdata++; executor->bdata++; return true; } return false; } bool rrex_match_dollar(rrex_executor_t *executor) { if (*executor->sdata == '\0') { executor->bdata++; return true; } return false; } bool rrex_match_literal(rrex_executor_t *executor) { if (*executor->bdata == *executor->sdata) { executor->bdata++; executor->sdata++; return true; } return false; } bool rrex_match_group(rrex_executor_t *executor) { bool v = true; executor->bdata++; char *sdata_before_fail = executor->sdata; while (v && *executor->bdata != RN_GROUP_END) { v = rrex_execute_one(executor); if (!v) { while (*executor->bdata != RN_GROUP_END) { if (*executor->bdata == RN_PIPE) { v = true; executor->bdata++; break; } executor->bdata++; } } else if (*executor->bdata == RN_PIPE) { break; } } while (*executor->bdata != RN_GROUP_END) { executor->bdata++; } executor->bdata++; if (!v) { executor->sdata = sdata_before_fail; } return v; } bool rrex_match_choice(rrex_executor_t *executor) { bool v; executor->bdata++; bool reverse = *executor->bdata == RN_ROOF; if (reverse) executor->bdata++; while (*executor->bdata != RN_CHOICE_END) { v = rrex_execute_one(executor); if (reverse) { v = !v; if (v) executor->sdata++; } if (v) { break; } else { if (!reverse) executor->bdata++; } } while (*executor->bdata != RN_CHOICE_END) { executor->bdata++; } executor->bdata++; return v; } bool rrex_match_optional(rrex_executor_t *executor) { executor->bdata++; char *optional_start = executor->bdata; bool v = rrex_execute_one(executor); if (!v) { executor->bdata = optional_start; char closer = 0; if (*executor->bdata == RN_CHOICE_START) { closer = RN_CHOICE_END; } if (*executor->bdata == RN_GROUP_START) { closer = RN_GROUP_END; } if (closer) { while (*executor->bdata != closer) { executor->bdata++; } } executor->bdata++; } return true; } bool rrex_match_at_least_one(rrex_executor_t *executor) { bool v = true; bool once_valid; executor->bdata++; char *method_position = executor->previous_position; char *next = executor->bdata; while (v) { executor->bdata = method_position; v = rrex_execute_one(executor); if (v) once_valid = true; executor->bdata = next; bool v_right = rrex_execute_one(executor); if (v_right) { once_valid = true; break; } } return once_valid; } bool rrex_match_range(rrex_executor_t *executor) { // Go to first parameter and remember executor->bdata++; char char_start = *executor->bdata; // Go to second parameter and remember executor->bdata++; char char_end = *executor->bdata; // Swap parameters if first one is higher than second one if (char_start > char_end) { char temp = char_end; char_end = char_start; char_start = temp; } // Compare if current char in sdata is between parameters if (*executor->sdata >= char_start && *executor->sdata <= char_end) { executor->bdata++; executor->sdata++; return true; } // Set pointer before parameters. Back to R. executor->bdata--; executor->bdata--; return false; } bool rrex_match_plus(rrex_executor_t *executor) { char *plus_position = executor->bdata; char *next = plus_position + 1; char *to_repeat = executor->previous_position; // Return value bool valid = true; bool matched_once = false; char *sdata_before_fail; while (valid) { // Check if EOF is reached if (!*executor->sdata) { break; } executor->bdata = to_repeat; sdata_before_fail = executor->sdata; valid = rrex_execute_one(executor); if (valid) { matched_once = true; } else { // should other function do executor->sdata = sdata_before_fail; } if (!valid && *(executor->bdata = next) && rrex_execute_one(executor)) { // if(!valid) break; } } if (matched_once && executor->bdata == plus_position) { // Move pointer to after RN_PLUS sign. executor->bdata++; } return matched_once; } bool rrex_execute_one(rrex_executor_t *executor) { bool valid; executor->current = *executor->bdata; int previous = executor->current; char *previous_position = executor->bdata; if (executor->current > 31) executor->current = RN_LITERAL; valid = executor->functions[executor->current](executor); // executor->current = *executor->bdata; executor->previous = previous; executor->previous_position = previous_position; return valid; } bool rrex_match(char *sdata, char *bdata) { rrex_executor_t executor; executor.bdata = bdata; executor._bdata = bdata; executor.sdata = sdata; executor._sdata = sdata; executor.previous_position = executor.bdata; executor.functions[RN_ARANGE] = rrex_match_range; executor.functions[RN_CHOICE_START] = rrex_match_choice; executor.functions[RN_DOLLAR] = rrex_match_dollar; executor.functions[RN_DOT] = rrex_match_dot; executor.functions[RN_DRANGE] = rrex_match_range; executor.functions[RN_LITERAL] = rrex_match_literal; executor.functions[RN_SLASH_CD] = rrex_match_not_digit; executor.functions[RN_SLASH_CW] = rrex_match_not_word; executor.functions[RN_PLUS] = rrex_match_plus; executor.functions[RN_ASTERISK] = rrex_match_at_least_one; executor.functions[RN_WHITESPACE] = rrex_match_whitespace; executor.functions[RN_GROUP_START] = rrex_match_group; executor.functions[RN_QUESTION] = rrex_match_optional; executor.functions[RN_ROOF] = rrex_match_sol; executor.functions[RN_DIGIT] = rrex_match_digit; executor.functions[RN_ALPHA] = rrex_match_word; rrex_executor_t *ex = &executor; char *s_padding = ex->sdata; bool valid = true; while (valid && *ex->bdata) { valid = rrex_execute_one(&executor); if (!valid && *ex->sdata) { if (*ex->_bdata == RN_ROOF) { break; } s_padding++; ex->sdata = s_padding; ex->bdata = ex->_bdata; if (*ex->bdata && *ex->sdata) valid = true; } } return valid; } void rrex_executor_tests() { rtest_banner("rrex regular expressions"); // rassert(rrex("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaq", "[^qxyzv]+q$")); rassert(rrex("abababa", "^(ab)+a$")); rassert(rrex(" a ", "\\sa\\s")); rassert(!rrex("a", "\\s")); rassert(rrex("abc", "ab[def]?c")); rassert(rrex("abc", "ab(d|e|f)?c")); rassert(rrex("1990-01-13", "^(19|20)\\d\\d-(0[1-9]|1[0-2])-(0[1-9]|[12]\\d|3[01])$")); rassert(rrex("1990-01-13", "(19|20)\\d\\d-[0?1]\\d-[0123]\\d")); // rassert(rrex("1990-1-3", "(19|20)\\d\\d-[0?1]\\d-[0123]\\d")); // rassert(rrex("1990-1-3", "(19|20)\\d\\d-[01]?\\d-[0123]\\d")); rassert( rrex("1990-13-25", "(19|20)\\d\\d-([01]\\d?||\\d)-([0123]\\d|\\d)$")); rassert( !rrex("1990-13-45", "(19|20)\\d\\d-([01]\\d?||\\d)-([0123]\\d|\\d)$")) //(19|20)\d\d-[01]?\d-[0123]\d rassert(rrex("a", "[zsa]")); rassert(rrex("abcdefg", "abcd?efg")); rassert(rrex("abcefg", "abcd?efg")); rassert(rrex("ce", "(a|b|c|d)e")); rassert(rrex("A", "A-Z")); rassert(rrex("a", "a-Z")); rassert(rrex("abcab", "[abc][acb]{4}$")); rassert(rrex("aa", "\\w{2}$")); rassert(rrex("a", "[ca]")); rassert(rrex("1-4", "1\\-4")); rassert(rrex("a", "[ba]")); rassert(rrex("5", "4-9")); rassert(rrex("4", "4-9")); rassert(rrex("9", "4-9")); rassert(rrex("123A", "1-41-41-4A")); rassert(!rrex("123B", "1-41-41-4A")); rassert(!rrex("1", "4-9")); rassert(rrex("abca", "[abc][abc][abc]a$")); rassert(rrex("abca", "[a-z][abc][abc]a")); rassert(rrex("abca", "[\\w][abc][abc]a")); rassert(rrex("a5a5g!a", "a0-9a-z\\d\\D\\Wa")); rassert(!rrex("1", "\\D")); rassert(!rrex("a", "\\W")); rassert(!rrex("1", "\\w")); rassert(!rrex("a", "\\d")); rassert(!rrex("\n", ".")); rassert(rrex("a", "a$")); rassert(rrex("a1ba1ba1b", "[a-z\\db]{3}")); rassert(rrex("abbc", "a{1}[a-z]{2}c{1}")); rassert(rrex("aA", "[a-zA-Z]{2}")); rassert(!rrex("123", "\\d+a")); rassert(rrex("123a", "[123]+a")); printf("JSSS\n"); rassert(rrex("123", "[123]+")); rassert(!rrex("123b", "[123]+a")); // rassert(!rrex("123", "[123]+b")); NOT READY YET rassert(rrex("abababc", "^(ab)+c$")); rassert(!rrex("abababb", "^(ab)+a$")); rassert(!rrex("abababa", "^(ab)+b$")); rassert(!rrex("abdabdabda", "^(abc)+a$")); rassert(!rrex("abababa", "^(abc)+a$")); rassert(rrex("123a33", "\\d+a\\d+")); rassert(!rrex("123ab", "\\d+$")); rassert(rrex("567", "[^1234]")); rassert(rrex("400", "[^5]")); rassert(!rrex("132213gh", ".*gd")); rassert(!rrex("132213gd", ".*gh")); rassert(rrex("#include \"test.h\"x", "#include *\"[a-z\\.]*\"x")); // rassert(rrex("#include \"test.h\"x", "#include.*\".*\"x")); rassert(!rrex("#include \"test.h\"y", ".*#include.*\".*\"x")); rassert(rrex("123test", "^123")); rassert(rrex("test123", "123")); rassert(!rrex("test123", "^123")); rassert(rrex("test123", "123$")); rassert(rrex("test123test", "123")); rassert(!rrex("test123test", "123$")); }