#define R4_DEBUG_a #include "rrex4.h" #include "rlib.h" #include bool bench_r4(unsigned int times, char *str, char *expr) { RBENCH(times, { r4_t *r = r4(str, expr); if (r->valid == false) { printf("Bench r4 error\n"); exit(1); } r4_free(r); }); return true; } void bench_c(unsigned int times, char *str, char *expr) { regex_t regex; if (regcomp(®ex, expr, REG_EXTENDED)) { printf("Creg: error in regular expression.\n"); exit(1); } RBENCH(times, { if (regexec(®ex, str, 0, NULL, 0)) { printf("Creg: error executing regular expression.\n"); exit(1); } }); regfree(®ex); } bool bench(unsigned int times, char *str, char *expr) { printf("%d:(%s)<%s>\n", times, str, expr); printf("c:"); bench_c(times, str, expr); printf("r:"); bench_r4(times, str, expr); return true; } void test_r4_next() { r4_t *r = r4_new(); char *str = "abcdefghijklmnop"; char *reg = "(\\w\\w\\w\\w)"; r = r4(str, reg); assert(r->valid); assert(r->match_count == 1); assert(!strcmp(r->matches[0], "abcd")); // Again with same regex as parameter r = r4_next(r, reg); assert(r->valid); assert(r->match_count == 1); assert(!strcmp(r->matches[0], "efgh")); // Again with same regex as parameter r = r4_next(r, reg); assert(r->valid); assert(r->match_count == 1); assert(!strcmp(r->matches[0], "ijkl")); // Reuse expression, NULL parameter r = r4_next(r, NULL); assert(r->valid); assert(r->match_count == 1); assert(!strcmp(r->matches[0], "mnop")); // No results using r4_next r = r4_next(r, NULL); assert(r->valid); assert(r->match_count == 0); // Again no results using r4_next, Shouldn't crash r = r4_next(r, NULL); assert(r->valid); assert(r->match_count == 0); r4_free(r); } void bench_all(unsigned int times) { assert(bench(times, "suvw", "[abcdefghijklmnopqrstuvw][abcdefghijklmnopqrstuvw][" "abcdefghijklmnopqrstuvw][abcdefghijklmnopqrstuvw]")); assert(bench(times, "ponyyy", "^p+o.*yyy$$$$")); assert(bench(times, " ponyyzd", "p+o.*yyzd$$$$")); assert(bench(times, "abc", "def|gek|abc")); assert(bench(times, "abc", "def|a?b?c|def")); assert(bench(times, "NL18RABO0322309700", "([A-Z]{2})([0-9]{2})([A-Z]{4}[0-9])([0-9]+)$")); assert(bench(times, "a 1 b 2 c 3 d 4 ", "([A-Z0-9 ]+)")); } bool r4_match_stats(char *str, char *expr) { r4_t *r = r4(str, expr); bool result = r->valid; printf("%d:(%s)<%s>\n", r->validation_count, r->_str, r->_expr); if (result) { printf(" - match(0)\t: \"%s\"\n", r->match); } for (unsigned i = 0; i < r->match_count; i++) { printf(" - match(%d)\t: \"%s\"\n", i + 1, r->matches[i]); } r4_free(r); return result; } void test_r4_bug_check_capture_overflow() { // This is a former bug in r4. // Case one r4_t *r = r4("test", "(test)+"); assert(r->match_count == 1); r4_free(r); // Case two r = r4("tester", "(t\\est\\e\\r)+"); assert(r->match_count == 1); printf("%s\n", r->matches[0]); r4_free(r); // Case three r = r4("test", "(t\\est\\e\\r)+"); assert(r->match_count == 0); r4_free(r); } void test_r4_capture_main_group() { // Case 1 r4_t *r = r4("testtesttesttest", "(test)+test$"); // printf("%s\n",r->match); // assert(!strcmp(r->match,"testtesttesttest")); assert(r->match_count == 3); assert(!strcmp(r->matches[0], "test")); assert(!strcmp(r->matches[1], "test")); assert(!strcmp(r->matches[2], "test")); r4_free(r); // Case 2 (with search) /* r = r4(" testtesttesttest","(test)+test$"); printf("%s\n",r->match); assert(!strcmp(r->match,"testtesttesttest")); assert(r->match_count == 3); assert(!strcmp(r->matches[0], "test")); assert(!strcmp(r->matches[1], "test")); assert(!strcmp(r->matches[2], "test")); r4_free(r); */ } char test_r4_capture_dynamic_amount() { r4_t *r = r4("testtesttesttest", "(test)+test$"); assert(r->match_count == 3); assert(!strcmp(r->matches[0], "test")); assert(!strcmp(r->matches[1], "test")); assert(!strcmp(r->matches[2], "test")); r4_free(r); return true; // Some advanced capturing // Fails r = r4("testtesttesttest", "([tes]+)+test$"); printf("%d\n", r->match_count); assert(r->match_count == 1); assert(!strcmp(r->matches[0], "testtesttest")); r4_free(r); } int main(int argc, char *argv[]) { for (int i = 0; i < argc; i++) { if (!strcmp(argv[i], "--debug")) { r4_enable_debug(); } } // Has to be fixed r4_match_stats("r4@r4.net", "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]*$"); // r4_match_stats("r4@r4.net", "^[^@\\s]+@[^@\\s]+\\.[^@\\s]+$"); // exit(0); test_r4_capture_main_group(); assert(r4_match_stats("testtesttesttest", "(test)+test$")); assert(r4_match_stats("testtest", "test")); // Group testing assert(r4_match_stats("aaadddd", "(a+)(d+)$")); assert(r4_match_stats("aaa", "(a+)$")); assert(r4_match_stats("aaadddd", "(d+)$")); assert(r4_match_stats("aaadddd", "(d+)")); assert(r4_match_stats("aaa\"dddd\"", "\"(d+)\"")); assert(r4_match_stats("aaadddd", "(a*)(d+)$")); assert(r4_match_stats("aaa", "(a*)$")); assert(r4_match_stats("aaadddd", "(d*)$")); assert(r4_match_stats("aaadddd", "(d*)")); assert(r4_match_stats("aaa\"dddd\" ", "\"(d*)\"\\s*")); // Words assert(r4_match_stats("a", "\\w")); assert(!r4_match_stats("1", "\\w")); assert(r4_match_stats("1", "\\W")); assert(!r4_match_stats("a", "\\W")); assert(r4_match_stats("aa", "\\w{2}")); assert(r4_match_stats("11", "\\W{2}")); assert(r4_match_stats("1", "[\\W]")); // Digits assert(r4_match_stats("1", "\\d")); assert(!r4_match_stats("a", "\\d")); assert(r4_match_stats("a", "\\D")); assert(!r4_match_stats("1", "\\D")); assert(r4_match_stats("11", "\\d{2}$")); assert(r4_match_stats("aa", "\\D{2}$")); assert(r4_match_stats("a", "[\\D]")); // Whitespace assert(r4_match_stats(" ", "\\s")); assert(r4_match_stats(" a", "\\s")); assert(!r4_match_stats("a", "[\\s]")); assert(r4_match_stats("a ", "[\\s]")); assert(r4_match_stats("a", "\\S")); assert(!r4_match_stats(" ", "\\S")); assert(!r4_match_stats(" ", "[\\S]")); assert(r4_match_stats("b ", "[\\S]")); assert(r4_match_stats(" b", "[\\S]")); // Boundaries assert(r4_match_stats("a", "\\b")); assert(r4_match_stats("a", "\\ba$")); assert(r4_match_stats("a", "^\\ba$")); assert(r4_match_stats("aa", "\\b")); assert(!r4_match_stats("aa", "\\b$")); assert(r4_match_stats("aa", "[\\b]")); assert(r4_match_stats("a", "\\B")); assert(r4_match_stats("a", "\\Ba$")); assert(r4_match_stats("a", "^\\Ba$")); assert(r4_match_stats("aa", "\\B")); assert(!r4_match_stats("aa", "^\\B")); assert(!r4_match_stats("a1", "a[\\B]$")); // Optional assert(!r4_match_stats("a", "?")); assert(r4_match_stats("a", "a?")); assert(r4_match_stats("a", "b?")); assert(r4_match_stats("a", "^b?")); assert(r4_match_stats("a", "a?$")); assert(!r4_match_stats("a", "b?$")); assert(r4_match_stats("a", "[def]?a$")); // Range assert(r4_match_stats("a", "a{1}")); assert(r4_match_stats("ab", "a{1}")); assert(r4_match_stats("aa", "a{2}")); assert(!r4_match_stats("aab", "a{3}")); assert(!r4_match_stats("a1", "a{2}")); assert(r4_match_stats("ab", "a{1,2}")); assert(r4_match_stats("aa", "a{2,}")); // Group (Custom function set) r4_match_stats("*?+$^.|\\[{()}]@ ", "[*?+$^.|\\\\[{()}]]+$@\\s"); // Miscellaneous tests bool debug_mode_original = _r4_debug; _r4_debug = false; r4_enable_debug(); assert(_r4_debug); r4_disable_debug(); assert(!_r4_debug); _r4_debug = debug_mode_original; assert(r4_match("a", "a")); assert(!r4_match("b", "a")); r4_init(NULL); r4_free(NULL); r4_free_matches(NULL); // Next tests test_r4_next(); // Check if former known bugs are still fixed test_r4_bug_check_capture_overflow(); // Check if capture amount is dynamic test_r4_capture_dynamic_amount(); char *c_function_regex = "(\\w[\\w\\d]*[\\s\\*]*)\\s*\\w[\\w\\d]*\\s*\\((.*)\\)\\s*\\{"; r4_match_stats("int **main() {}", c_function_regex); r4_match_stats("int main(int argc, char *argv[],(void *)aaa) {}", c_function_regex); assert(r4_match_stats("NL18RABO0322309700", "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d{10})")); // exit(0); unsigned int times = 1; bench_all(times); RBENCH(1, { assert(r4_match_stats("#define DEFINETEST 1", "#define\\s(+[\\w\\d_]+)\\s+[\\w\\d_]+")); // assert(r4_match_stats("#define DEFINETEST 1\n", // s "#define\\s+\\w[\\d\\w_]+\\s+[\\w\\d_]\\s*")); assert(!r4_match_stats("aa", "aaaa")); assert(r4_match_stats("ponyyy", "^p+o.*yyy$$$$")); assert(!r4_match_stats("ponyyy", "p%+o.*yyy$$$$")); assert(!r4_match_stats("ponyyyd", "^p+o.*yyz$$$$")); assert(r4_match_stats("123", "[0-2][2-2][1-3]$")); assert(r4_match_stats("aaaabC5", "(a)(\\w)a*(a)\\w[A-Z][0-9]$")); assert(r4_match_stats("abcdeeeeee", "ab(cdeee)e")); assert(r4_match_stats("1234567", "12(.*)67$")); assert(r4_match_stats("12111678993", "12(.*)67(.*)3$")); assert(r4_match_stats("NL18RABO0322309700", "NL(.*)R(.*)0(.*)0(.*)$")); assert(r4_match_stats("NL18RABO0322309700", "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$")); assert(r4_match_stats("NL18RABO0322309700garbage", "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)")); assert(r4_match_stats("NL18RABO0322309700", "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$")); assert(r4_match_stats(" NL18RABO0322309700", "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$")); assert(r4_match_stats(" NL18RABO0322309700", "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$")); assert( r4_match_stats("NL18RABO0", "(\\w\\w)(\\d\\d)(\\w\\w\\w\\w\\d)$")); assert(r4_match_stats("q", "\\q$")); assert(r4_match_stats("ab123", "[a-z0-9]+$")); assert(r4_match_stats("ppppony", "p*pppony")); assert(r4_match_stats("aa", "a{2}$")); assert(r4_match_stats("A23", "[0-2A-z][2-2][1-3]$")); assert(r4_match_stats("z23", "[0-2A-z][2-2][1-3]$")); assert(r4_match_stats("r23", "[0-2Ar][2-2][1-3]$")); assert(r4_match_stats("test", "\\w\\w\\w\\w$")); assert(!r4_match_stats("test", "\\W\\w\\w\\w$")); assert(r4_match_stats("1est", "\\W\\w\\w\\w$")); assert(r4_match_stats("1est", "\\d\\w\\w\\w$")); assert(r4_match_stats("Aest", "\\D\\w\\w\\w$")); assert(r4_match_stats("abc", "[ab]+")); assert(!r4_match_stats("abc", "[ab]+$")); assert(r4_match_stats("abc", "[abc]+$")); assert(!r4_match_stats("a", "[^ba]")); assert(!r4_match_stats("a", "[^ab]")); assert(r4_match_stats(" ponyyzd", "p+o.*yyzd$$$$")); assert(r4_match_stats("abc", "def|gek|abc")); assert(!r4_match_stats("abc", "def|gek|abd")); assert(r4_match_stats("abc", "def|abc|def")); assert(r4_match_stats( "suwv", "[abcdesfghijklmnopqrtuvw][abcdefghijklmnopqrstuvw][" "abcdefghijklmnopqrstuvw][abcdefghijklmnopqrstuvw]")); assert(r4_match_stats("123", "(.*)(.*)(.*)")); assert(r4_match_stats("1234", "(.*)(.*)(.*)")); assert(r4_match_stats("#include \"test.c\"", "#include\\s+\"(.*)\"")); assert(r4_match_stats("#define TEST_JE VALUE", "#define\\s+([A-Za-z_0-9]+)\\s+([A-Za-z_0-9]+)")); // assert(r4_match_stats("bbb", "a*(bbb)")); // Tests added for coverage assert(!r4_match_stats("1", "[\\D]")); assert(!r4_match_stats("11", "\\D{2}")); assert(!r4_match_stats("ab", "ba")); assert(r4_match_stats("2", "[4-2]")); }); return 0; }