|
#define R4_DEBUG_a
|
|
|
|
#include "rrex4.h"
|
|
#include "rlib.h"
|
|
#include <regex.h>
|
|
|
|
bool bench_r4(unsigned int times, char *str, char *expr) {
|
|
RBENCH(times, {
|
|
r4_t *r = r4(str, expr);
|
|
|
|
if (r->valid == false) {
|
|
|
|
printf("Bench r4 error\n");
|
|
exit(1);
|
|
}
|
|
|
|
r4_free(r);
|
|
});
|
|
return true;
|
|
}
|
|
|
|
void bench_c(unsigned int times, char *str, char *expr) {
|
|
regex_t regex;
|
|
if (regcomp(®ex, expr, REG_EXTENDED)) {
|
|
printf("Creg: error in regular expression.\n");
|
|
exit(1);
|
|
}
|
|
RBENCH(times, {
|
|
if (regexec(®ex, str, 0, NULL, 0)) {
|
|
printf("Creg: error executing regular expression.\n");
|
|
exit(1);
|
|
}
|
|
});
|
|
|
|
regfree(®ex);
|
|
}
|
|
|
|
bool bench(unsigned int times, char *str, char *expr) {
|
|
printf("%d:(%s)<%s>\n", times, str, expr);
|
|
printf("c:");
|
|
bench_c(times, str, expr);
|
|
printf("r:");
|
|
bench_r4(times, str, expr);
|
|
return true;
|
|
}
|
|
|
|
void test_r4_next() {
|
|
r4_t *r = r4_new();
|
|
char *str = "abcdefghijklmnop";
|
|
char *reg = "(\\w\\w\\w\\w)";
|
|
r = r4(str, reg);
|
|
assert(r->valid);
|
|
assert(r->match_count == 1);
|
|
assert(!strcmp(r->matches[0], "abcd"));
|
|
// Again with same regex as parameter
|
|
r = r4_next(r, reg);
|
|
assert(r->valid);
|
|
assert(r->match_count == 1);
|
|
assert(!strcmp(r->matches[0], "efgh"));
|
|
// Again with same regex as parameter
|
|
r = r4_next(r, reg);
|
|
assert(r->valid);
|
|
assert(r->match_count == 1);
|
|
assert(!strcmp(r->matches[0], "ijkl"));
|
|
// Reuse expression, NULL parameter
|
|
r = r4_next(r, NULL);
|
|
assert(r->valid);
|
|
assert(r->match_count == 1);
|
|
assert(!strcmp(r->matches[0], "mnop"));
|
|
// No results using r4_next
|
|
r = r4_next(r, NULL);
|
|
assert(r->valid);
|
|
assert(r->match_count == 0);
|
|
// Again no results using r4_next, Shouldn't crash
|
|
r = r4_next(r, NULL);
|
|
assert(r->valid);
|
|
assert(r->match_count == 0);
|
|
r4_free(r);
|
|
}
|
|
|
|
void bench_all(unsigned int times) {
|
|
assert(bench(times, "suvw",
|
|
"[abcdefghijklmnopqrstuvw][abcdefghijklmnopqrstuvw]["
|
|
"abcdefghijklmnopqrstuvw][abcdefghijklmnopqrstuvw]"));
|
|
assert(bench(times, "ponyyy", "^p+o.*yyy$$$$"));
|
|
assert(bench(times, " ponyyzd", "p+o.*yyzd$$$$"));
|
|
assert(bench(times, "abc", "def|gek|abc"));
|
|
assert(bench(times, "abc", "def|a?b?c|def"));
|
|
assert(bench(times, "NL18RABO0322309700",
|
|
"([A-Z]{2})([0-9]{2})([A-Z]{4}[0-9])([0-9]+)$"));
|
|
assert(bench(times, "a 1 b 2 c 3 d 4 ", "([A-Z0-9 ]+)"));
|
|
}
|
|
|
|
bool r4_match_stats(char *str, char *expr) {
|
|
r4_t *r = r4(str, expr);
|
|
bool result = r->valid;
|
|
printf("%d:(%s)<%s>\n", r->validation_count, r->_str, r->_expr);
|
|
if (result) {
|
|
printf(" - match(0)\t: \"%s\"\n", r->match);
|
|
}
|
|
for (unsigned i = 0; i < r->match_count; i++) {
|
|
printf(" - match(%d)\t: \"%s\"\n", i + 1, r->matches[i]);
|
|
}
|
|
r4_free(r);
|
|
return result;
|
|
}
|
|
|
|
void test_r4_bug_check_capture_overflow() {
|
|
// This is a former bug in r4.
|
|
|
|
// Case one
|
|
r4_t *r = r4("test", "(test)+");
|
|
assert(r->match_count == 1);
|
|
r4_free(r);
|
|
|
|
// Case two
|
|
r = r4("tester", "(t\\est\\e\\r)+");
|
|
assert(r->match_count == 1);
|
|
printf("%s\n", r->matches[0]);
|
|
r4_free(r);
|
|
|
|
// Case three
|
|
r = r4("test", "(t\\est\\e\\r)+");
|
|
assert(r->match_count == 0);
|
|
r4_free(r);
|
|
}
|
|
|
|
void test_r4_capture_main_group() {
|
|
// Case 1
|
|
r4_t *r = r4("testtesttesttest", "(test)+test$");
|
|
// printf("%s\n",r->match);
|
|
// assert(!strcmp(r->match,"testtesttesttest"));
|
|
assert(r->match_count == 3);
|
|
assert(!strcmp(r->matches[0], "test"));
|
|
assert(!strcmp(r->matches[1], "test"));
|
|
assert(!strcmp(r->matches[2], "test"));
|
|
r4_free(r);
|
|
// Case 2 (with search)
|
|
/*
|
|
r = r4(" testtesttesttest","(test)+test$");
|
|
printf("%s\n",r->match);
|
|
assert(!strcmp(r->match,"testtesttesttest"));
|
|
assert(r->match_count == 3);
|
|
assert(!strcmp(r->matches[0], "test"));
|
|
assert(!strcmp(r->matches[1], "test"));
|
|
assert(!strcmp(r->matches[2], "test"));
|
|
r4_free(r); */
|
|
}
|
|
|
|
char test_r4_capture_dynamic_amount() {
|
|
r4_t *r = r4("testtesttesttest", "(test)+test$");
|
|
assert(r->match_count == 3);
|
|
assert(!strcmp(r->matches[0], "test"));
|
|
assert(!strcmp(r->matches[1], "test"));
|
|
assert(!strcmp(r->matches[2], "test"));
|
|
r4_free(r);
|
|
|
|
return true;
|
|
// Some advanced capturing
|
|
// Fails
|
|
r = r4("testtesttesttest", "([tes]+)+test$");
|
|
printf("%d\n", r->match_count);
|
|
assert(r->match_count == 1);
|
|
assert(!strcmp(r->matches[0], "testtesttest"));
|
|
r4_free(r);
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
for (int i = 0; i < argc; i++) {
|
|
if (!strcmp(argv[i], "--debug")) {
|
|
r4_enable_debug();
|
|
}
|
|
}
|
|
|
|
// Has to be fixed
|
|
r4_match_stats("r4@r4.net",
|
|
"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]*$");
|
|
// r4_match_stats("r4@r4.net", "^[^@\\s]+@[^@\\s]+\\.[^@\\s]+$");
|
|
// exit(0);
|
|
|
|
test_r4_capture_main_group();
|
|
assert(r4_match_stats("testtesttesttest", "(test)+test$"));
|
|
assert(r4_match_stats("testtest", "test"));
|
|
|
|
// Group testing
|
|
assert(r4_match_stats("aaadddd", "(a+)(d+)$"));
|
|
assert(r4_match_stats("aaa", "(a+)$"));
|
|
assert(r4_match_stats("aaadddd", "(d+)$"));
|
|
assert(r4_match_stats("aaadddd", "(d+)"));
|
|
assert(r4_match_stats("aaa\"dddd\"", "\"(d+)\""));
|
|
assert(r4_match_stats("aaadddd", "(a*)(d+)$"));
|
|
assert(r4_match_stats("aaa", "(a*)$"));
|
|
assert(r4_match_stats("aaadddd", "(d*)$"));
|
|
assert(r4_match_stats("aaadddd", "(d*)"));
|
|
assert(r4_match_stats("aaa\"dddd\" ", "\"(d*)\"\\s*"));
|
|
|
|
// Words
|
|
assert(r4_match_stats("a", "\\w"));
|
|
assert(!r4_match_stats("1", "\\w"));
|
|
assert(r4_match_stats("1", "\\W"));
|
|
assert(!r4_match_stats("a", "\\W"));
|
|
assert(r4_match_stats("aa", "\\w{2}"));
|
|
assert(r4_match_stats("11", "\\W{2}"));
|
|
assert(r4_match_stats("1", "[\\W]"));
|
|
|
|
// Digits
|
|
assert(r4_match_stats("1", "\\d"));
|
|
assert(!r4_match_stats("a", "\\d"));
|
|
assert(r4_match_stats("a", "\\D"));
|
|
assert(!r4_match_stats("1", "\\D"));
|
|
assert(r4_match_stats("11", "\\d{2}$"));
|
|
assert(r4_match_stats("aa", "\\D{2}$"));
|
|
assert(r4_match_stats("a", "[\\D]"));
|
|
|
|
// Whitespace
|
|
assert(r4_match_stats(" ", "\\s"));
|
|
assert(r4_match_stats(" a", "\\s"));
|
|
assert(!r4_match_stats("a", "[\\s]"));
|
|
assert(r4_match_stats("a ", "[\\s]"));
|
|
assert(r4_match_stats("a", "\\S"));
|
|
assert(!r4_match_stats(" ", "\\S"));
|
|
assert(!r4_match_stats(" ", "[\\S]"));
|
|
assert(r4_match_stats("b ", "[\\S]"));
|
|
assert(r4_match_stats(" b", "[\\S]"));
|
|
|
|
// Boundaries
|
|
assert(r4_match_stats("a", "\\b"));
|
|
assert(r4_match_stats("a", "\\ba$"));
|
|
assert(r4_match_stats("a", "^\\ba$"));
|
|
assert(r4_match_stats("aa", "\\b"));
|
|
assert(!r4_match_stats("aa", "\\b$"));
|
|
assert(r4_match_stats("aa", "[\\b]"));
|
|
assert(r4_match_stats("a", "\\B"));
|
|
assert(r4_match_stats("a", "\\Ba$"));
|
|
assert(r4_match_stats("a", "^\\Ba$"));
|
|
assert(r4_match_stats("aa", "\\B"));
|
|
assert(!r4_match_stats("aa", "^\\B"));
|
|
assert(!r4_match_stats("a1", "a[\\B]$"));
|
|
|
|
// Optional
|
|
assert(!r4_match_stats("a", "?"));
|
|
assert(r4_match_stats("a", "a?"));
|
|
assert(r4_match_stats("a", "b?"));
|
|
assert(r4_match_stats("a", "^b?"));
|
|
assert(r4_match_stats("a", "a?$"));
|
|
assert(!r4_match_stats("a", "b?$"));
|
|
assert(r4_match_stats("a", "[def]?a$"));
|
|
|
|
// Range
|
|
|
|
assert(r4_match_stats("a", "a{1}"));
|
|
assert(r4_match_stats("ab", "a{1}"));
|
|
assert(r4_match_stats("aa", "a{2}"));
|
|
assert(!r4_match_stats("aab", "a{3}"));
|
|
assert(!r4_match_stats("a1", "a{2}"));
|
|
assert(r4_match_stats("ab", "a{1,2}"));
|
|
assert(r4_match_stats("aa", "a{2,}"));
|
|
|
|
// Group (Custom function set)
|
|
r4_match_stats("*?+$^.|\\[{()}]@ ", "[*?+$^.|\\\\[{()}]]+$@\\s");
|
|
|
|
// Miscellaneous tests
|
|
bool debug_mode_original = _r4_debug;
|
|
_r4_debug = false;
|
|
r4_enable_debug();
|
|
assert(_r4_debug);
|
|
r4_disable_debug();
|
|
assert(!_r4_debug);
|
|
_r4_debug = debug_mode_original;
|
|
|
|
assert(r4_match("a", "a"));
|
|
assert(!r4_match("b", "a"));
|
|
r4_init(NULL);
|
|
r4_free(NULL);
|
|
r4_free_matches(NULL);
|
|
|
|
// Next tests
|
|
test_r4_next();
|
|
|
|
// Check if former known bugs are still fixed
|
|
test_r4_bug_check_capture_overflow();
|
|
|
|
// Check if capture amount is dynamic
|
|
test_r4_capture_dynamic_amount();
|
|
|
|
char *c_function_regex =
|
|
"(\\w[\\w\\d]*[\\s\\*]*)\\s*\\w[\\w\\d]*\\s*\\((.*)\\)\\s*\\{";
|
|
r4_match_stats("int **main() {}", c_function_regex);
|
|
r4_match_stats("int main(int argc, char *argv[],(void *)aaa) {}",
|
|
c_function_regex);
|
|
|
|
assert(r4_match_stats("NL18RABO0322309700",
|
|
"(\\w{2})(\\d{2})(\\w{4}\\d)(\\d{10})"));
|
|
|
|
// exit(0);
|
|
unsigned int times = 1;
|
|
bench_all(times);
|
|
|
|
RBENCH(1, {
|
|
assert(r4_match_stats("#define DEFINETEST 1",
|
|
"#define\\s(+[\\w\\d_]+)\\s+[\\w\\d_]+"));
|
|
// assert(r4_match_stats("#define DEFINETEST 1\n",
|
|
// s "#define\\s+\\w[\\d\\w_]+\\s+[\\w\\d_]\\s*"));
|
|
|
|
assert(!r4_match_stats("aa", "aaaa"));
|
|
assert(r4_match_stats("ponyyy", "^p+o.*yyy$$$$"));
|
|
assert(!r4_match_stats("ponyyy", "p%+o.*yyy$$$$"));
|
|
assert(!r4_match_stats("ponyyyd", "^p+o.*yyz$$$$"));
|
|
assert(r4_match_stats("123", "[0-2][2-2][1-3]$"));
|
|
assert(r4_match_stats("aaaabC5", "(a)(\\w)a*(a)\\w[A-Z][0-9]$"));
|
|
assert(r4_match_stats("abcdeeeeee", "ab(cdeee)e"));
|
|
assert(r4_match_stats("1234567", "12(.*)67$"));
|
|
assert(r4_match_stats("12111678993", "12(.*)67(.*)3$"));
|
|
assert(r4_match_stats("NL18RABO0322309700", "NL(.*)R(.*)0(.*)0(.*)$"));
|
|
|
|
assert(r4_match_stats("NL18RABO0322309700",
|
|
"(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$"));
|
|
assert(r4_match_stats("NL18RABO0322309700garbage",
|
|
"(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)"));
|
|
assert(r4_match_stats("NL18RABO0322309700",
|
|
"(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$"));
|
|
assert(r4_match_stats(" NL18RABO0322309700",
|
|
"(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$"));
|
|
assert(r4_match_stats(" NL18RABO0322309700",
|
|
"(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$"));
|
|
assert(
|
|
r4_match_stats("NL18RABO0", "(\\w\\w)(\\d\\d)(\\w\\w\\w\\w\\d)$"));
|
|
assert(r4_match_stats("q", "\\q$"));
|
|
assert(r4_match_stats("ab123", "[a-z0-9]+$"));
|
|
assert(r4_match_stats("ppppony", "p*pppony"));
|
|
assert(r4_match_stats("aa", "a{2}$"));
|
|
assert(r4_match_stats("A23", "[0-2A-z][2-2][1-3]$"));
|
|
assert(r4_match_stats("z23", "[0-2A-z][2-2][1-3]$"));
|
|
assert(r4_match_stats("r23", "[0-2Ar][2-2][1-3]$"));
|
|
assert(r4_match_stats("test", "\\w\\w\\w\\w$"));
|
|
assert(!r4_match_stats("test", "\\W\\w\\w\\w$"));
|
|
assert(r4_match_stats("1est", "\\W\\w\\w\\w$"));
|
|
assert(r4_match_stats("1est", "\\d\\w\\w\\w$"));
|
|
assert(r4_match_stats("Aest", "\\D\\w\\w\\w$"));
|
|
assert(r4_match_stats("abc", "[ab]+"));
|
|
assert(!r4_match_stats("abc", "[ab]+$"));
|
|
assert(r4_match_stats("abc", "[abc]+$"));
|
|
assert(!r4_match_stats("a", "[^ba]"));
|
|
assert(!r4_match_stats("a", "[^ab]"));
|
|
assert(r4_match_stats(" ponyyzd", "p+o.*yyzd$$$$"));
|
|
assert(r4_match_stats("abc", "def|gek|abc"));
|
|
assert(!r4_match_stats("abc", "def|gek|abd"));
|
|
assert(r4_match_stats("abc", "def|abc|def"));
|
|
assert(r4_match_stats(
|
|
"suwv", "[abcdesfghijklmnopqrtuvw][abcdefghijklmnopqrstuvw]["
|
|
"abcdefghijklmnopqrstuvw][abcdefghijklmnopqrstuvw]"));
|
|
|
|
assert(r4_match_stats("123", "(.*)(.*)(.*)"));
|
|
assert(r4_match_stats("1234", "(.*)(.*)(.*)"));
|
|
|
|
assert(r4_match_stats("#include \"test.c\"", "#include\\s+\"(.*)\""));
|
|
assert(r4_match_stats("#define TEST_JE VALUE",
|
|
"#define\\s+([A-Za-z_0-9]+)\\s+([A-Za-z_0-9]+)"));
|
|
//
|
|
assert(r4_match_stats("bbb", "a*(bbb)"));
|
|
|
|
// Tests added for coverage
|
|
assert(!r4_match_stats("1", "[\\D]"));
|
|
assert(!r4_match_stats("11", "\\D{2}"));
|
|
assert(!r4_match_stats("ab", "ba"));
|
|
assert(r4_match_stats("2", "[4-2]"));
|
|
});
|
|
|
|
return 0;
|
|
} |