#include "rrex2.h" #include #include void cregex_repeat(char *s, char *r) { // Get object from shared data object. rbf is session variable of current // bench function. regex_t *regex = (regex_t *)rbf->data; // Only get's executed at beginning of the benchmark. Executed once. if (rbf->first) { // Set session data regex = (regex_t *)malloc(sizeof(regex_t)); rbf->data = regex; regcomp(regex, r, REG_EXTENDED); } // The code to benchmark rassert(!regexec(regex, s, 0, NULL, 0)) // Is executed only once at end of benchmark if (rbf->last) regfree(regex); } void rrex_repeat(char *s, char *r) { char *bdata = (char *)rbf->data; if (rbf->first) { bdata = (char *)malloc(4096); rrex_compile(r, bdata); rbf->data = bdata; } rassert(rrex_match(s, bdata)); if (rbf->last) { free(rbf->data); } } int wins_rrex = 0; int loss_rrex = 0; nsecs_t total_execution_time = 0; long total_times = 0; bool validate_dutch_zipcode_c(char *code) { if (strlen(code) != 7) return false; for (int i = 0; i < 4; i++) { if (!isdigit(code[i])) return false; } if (!iswhitespace(code[4])) { return false; } for (int i = 6; i < 7; i++) { if (!isalpha(code[i])) return false; } return true; } bool validate_dutch_zipcode_c_literal(char *code) { if (strlen(code) != 7) return false; return isdigit(code[0]) && isdigit(code[1]) && isdigit(code[2]) && isdigit(code[3]) && iswhitespace(code[4]) && isalpha(code[5]) && isalpha(code[6]); } void validate_dutch_zipcode_creg(char *s) { regex_t regex; char *pattern = "\\d{4} [a-zA-Z]{2}"; int ret = regcomp(®ex, pattern, REG_EXTENDED); if (ret) { printf("cregex comp error\b"); exit(0); } ret = regexec(®ex, s, 0, NULL, 0); if (!ret) { printf("cregex exec error\b"); exit(0); } regfree(®ex); } void validate_dutch_zipcode_creg_precompiled(char *s) { regex_t *regex = (regex_t *)rbf->data; // Only get's executed at beginning of the benchmark. Executed once. if (rbf->first) { // Set session data regex = (regex_t *)malloc(sizeof(regex_t)); rbf->data = regex; char *pattern = "\\d{4} [a-zA-Z]{2}"; regcomp(regex, pattern, REG_EXTENDED); } // The code to benchmark int ret = regexec(regex, s, 0, NULL, 0); if (!ret) { printf("cregex exec error\b"); exit(0); } // Is executed only once at end of benchmark if (rbf->last) regfree(regex); } void validate_dutch_zipcode_rrex_precompiled(char *s) { char *bcode = (char *)rbf->data; if (rbf->first) { bcode = (char *)malloc(20); rrex_compile("\\d{4} [a-zA-Z]{2}", bcode); rbf->data = bcode; } rrex_match(s, bcode); if (rbf->last) { free(bcode); } } bool validate_dutch_zipcode_rrex(char *s) { return rrex(s, "\\d{4} [a-zA-Z]{2}"); } void benchmark_dutch_zipcode(long times, char *s) { rbench_t *r = rbench_new(); r->show_progress = false; r->stdout = false; r->add_function(r, "rrex", "zipcode", (void *)validate_dutch_zipcode_rrex); r->add_function(r, "rrex compiled", "zipcode", (void *)validate_dutch_zipcode_rrex_precompiled); r->add_function(r, "creg", "zipcode", (void *)validate_dutch_zipcode_creg); r->add_function(r, "creg compiled", "zipcode", (void *)validate_dutch_zipcode_creg_precompiled); r->add_function(r, "native c", "zipcode c", (void *)validate_dutch_zipcode_c); r->add_function(r, "native c literal", "zipcode c", (void *)validate_dutch_zipcode_c_literal); printf("Benchmarking validation of %s with rrex and native c code.\n", s); r->execute1(r, times, s); rbench_free(r); } void benchmark(long times, char *s, char *e) { rprint("Benchmark \\l string:<%s> expr:<%s>\t\n", s, e); rbench_t *r; r = rbench_new(); r->show_progress = false; r->stdout = false; r->add_function(r, "executor", "rrex", (void *)rrex_repeat); r->add_function(r, "executor", "clib", (void *)cregex_repeat); if (r->execute2(r, times, s, e)->winner == 1) { wins_rrex++; } else { loss_rrex++; } total_execution_time += r->execution_time; total_times += times * 2; rbench_free(r); } void rrex_benchmark_tests(long times) { benchmark_dutch_zipcode(times / 10, "7245 SR"); benchmark_dutch_zipcode(times / 10, "A245 SR"); benchmark_dutch_zipcode(times / 10, "7245 S3"); benchmark(times, "abababc", "^(ab)+c$"); // c regex does not support: // benchmark(times,"123a33","\\d+a\\d+$"); benchmark(times, "9-3", "([3-9]-[3-9])"); benchmark(times, "1234A", "[1-4]{4}A"); benchmark(times, "abcdef", "abcd?ef"); benchmark(times, "ce", "(a|b|c|d)e"); benchmark(times, "a", "(a)"); benchmark(times, "aa", "(a){2}"); // benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaq", "[^xyzv]+q$"); benchmark(times, "abcabcabcabcabcabc", "[acb][acb]{4}"); benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "[1A-Z0-9a12345]{33}"); benchmark(times, "abcd", "abcd"); benchmark(times, "aaaaaaaaa", "a{9}"); benchmark(times, "a", "[abc]"); benchmark(times, "aa", "[abc]{2}"); benchmark(times, "ab", "[abc]{2}"); benchmark(times, "ac", "[abc]{2}"); benchmark(times, "c", "[abc]"); benchmark(times, "123", "[0-9][0-9][0-9]"); benchmark(times, "ab*", "[a-z]b."); benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ".{33}"); // benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaq", "[dbac]+q$"); benchmark(times, "#include \"test.h\"", "^#include *\"[a-z\\.]+\"$"); benchmark(times, "abcdefgh", "^.*gh$"); benchmark(times, "randomtextbeforeabcdefgh", "^random.*gh$"); benchmark(times, "abcdefg", "a?bcdf?ef?g"); printf("Times won: %d / %d\n", wins_rrex, wins_rrex + loss_rrex); printf("Total execution time: %s\n", format_time(total_execution_time)); printf("Total times: %s\n", rformat_number(total_times)); } void repl() { while (true) { char s[4096]; char e[4096]; rprint("%s", "Write a string to parse:\n"); rreadline(s, 1024, true); rprint("Write a reqular expression:\n"); rreadline(e, 1024, true); rprint("\\t"); bool valid = rrex(s, e); if (valid) { rprintgf(stdout, "\\T %s", valid ? "valid\n" : "invalid\n"); } else { rprintrf(stdout, "\\T %s", valid ? "valid\n" : "invalid\n"); } } } void rrex_tests() { rrex_functions_test(); rrex_compiler_tests(); rrex_executor_tests(); __attribute__((unused)) int res = rtest_end(""); rprintg("Tests passed.\n\n"); sleep(1); } int main(int argc, char *argv[]) { /* 20000000 for 140s (1 billion times) 16000000 for 100s 32000000 for 200s 4000000 for 30s 2000000 for 15s -> this is minimum to get consistent result 1000000 for 7.5s */ long times = 2000000; if (argc > 1) { if (!strcmp(argv[1], "cli")) { repl(); return 0; } else if (!strcmp(argv[1], "test")) { times = 20; } } rrex_tests(); rrex_benchmark_tests(times); return 0; }