/* retoor <retoor@molodetz.nl> */
#include "../include/lorex.h"
#include <stdio.h>
#include <string.h>
static int passed = 0;
static int failed = 0;
#define MATCH(pat, txt) test_match(pat, txt, 1, __LINE__)
#define NO_MATCH(pat, txt) test_match(pat, txt, 0, __LINE__)
static void test_match(const char *pattern, const char *text, int expect, int line) {
lorex_error_t err;
lorex_regex_t *re = lorex_compile(pattern, &err);
if (!re) {
printf("FAIL line %d: compile error for '%s': %s\n", line, pattern, lorex_error_string(err));
failed++;
return;
}
lorex_match_t m;
int result = lorex_search(re, text, &m) ? 1 : 0;
if (result != expect) {
printf("FAIL line %d: '%s' vs '%s' expected %s\n", line, pattern, text, expect ? "match" : "no match");
failed++;
} else {
passed++;
}
lorex_free(re);
}
static void test_literals(void) {
printf(" literals...\n");
MATCH("a", "a");
MATCH("a", "ba");
MATCH("a", "ab");
MATCH("abc", "abc");
MATCH("abc", "xabcy");
MATCH("hello", "hello world");
MATCH("world", "hello world");
MATCH("lo wo", "hello world");
NO_MATCH("abc", "ab");
NO_MATCH("abc", "abd");
NO_MATCH("xyz", "abc");
NO_MATCH("hello", "helo");
MATCH("", "anything");
MATCH("", "");
MATCH("a", "aaa");
MATCH("aa", "aaa");
MATCH("aaa", "aaa");
NO_MATCH("aaaa", "aaa");
}
static void test_dot(void) {
printf(" dot metacharacter...\n");
MATCH(".", "a");
MATCH(".", "x");
MATCH(".", "5");
MATCH(".", " ");
MATCH("..", "ab");
MATCH("...", "abc");
MATCH("a.c", "abc");
MATCH("a.c", "aXc");
MATCH("a.c", "a9c");
MATCH("a.c", "a c");
NO_MATCH("a.c", "ac");
NO_MATCH("a.c", "abbc");
MATCH("....", "test");
MATCH(".", "!");
MATCH(".", "@");
MATCH("a..b", "aXYb");
MATCH("a...b", "a123b");
NO_MATCH("a..b", "aXb");
}
static void test_anchors(void) {
printf(" anchors...\n");
MATCH("^a", "a");
MATCH("^a", "abc");
NO_MATCH("^a", "ba");
NO_MATCH("^a", " a");
MATCH("a$", "a");
MATCH("a$", "ba");
NO_MATCH("a$", "ab");
NO_MATCH("a$", "a ");
MATCH("^abc$", "abc");
NO_MATCH("^abc$", "xabc");
NO_MATCH("^abc$", "abcx");
NO_MATCH("^abc$", " abc");
NO_MATCH("^abc$", "abc ");
MATCH("^$", "");
NO_MATCH("^$", "a");
MATCH("^hello$", "hello");
MATCH("^hello world$", "hello world");
NO_MATCH("^hello world$", "hello world!");
MATCH("^a.*z$", "abcdefghijklmnopqrstuvwxyz");
MATCH("^.", "x");
MATCH(".$", "x");
}
static void test_star(void) {
printf(" star quantifier...\n");
MATCH("a*", "");
MATCH("a*", "a");
MATCH("a*", "aa");
MATCH("a*", "aaa");
MATCH("a*", "aaaaaaaaaa");
MATCH("a*", "b");
MATCH("a*b", "b");
MATCH("a*b", "ab");
MATCH("a*b", "aab");
MATCH("a*b", "aaaaaab");
NO_MATCH("a*b", "a");
MATCH("ba*", "b");
MATCH("ba*", "ba");
MATCH("ba*", "baaa");
MATCH(".*", "");
MATCH(".*", "anything at all");
MATCH("a.*b", "ab");
MATCH("a.*b", "aXb");
MATCH("a.*b", "aXXXXXb");
MATCH("a.*b", "a b");
MATCH("x*y*z*", "");
MATCH("x*y*z*", "xyz");
MATCH("x*y*z*", "xxxyyyzzz");
MATCH("ab*c", "ac");
MATCH("ab*c", "abc");
MATCH("ab*c", "abbbbc");
}
static void test_plus(void) {
printf(" plus quantifier...\n");
NO_MATCH("a+", "");
MATCH("a+", "a");
MATCH("a+", "aa");
MATCH("a+", "aaa");
MATCH("a+", "aaaaaaaaaa");
MATCH("a+", "ba");
MATCH("a+b", "ab");
MATCH("a+b", "aab");
MATCH("a+b", "aaaaaab");
NO_MATCH("a+b", "b");
NO_MATCH("a+b", "a");
MATCH("ba+", "ba");
MATCH("ba+", "baaa");
NO_MATCH("ba+", "b");
MATCH(".+", "a");
MATCH(".+", "anything");
NO_MATCH(".+", "");
MATCH("a.+b", "aXb");
MATCH("a.+b", "aXXXXXb");
NO_MATCH("a.+b", "ab");
MATCH("ab+c", "abc");
MATCH("ab+c", "abbbbc");
NO_MATCH("ab+c", "ac");
}
static void test_question(void) {
printf(" question quantifier...\n");
MATCH("a?", "");
MATCH("a?", "a");
MATCH("a?", "aa");
MATCH("a?b", "b");
MATCH("a?b", "ab");
MATCH("a?b", "aab");
MATCH("colou?r", "color");
MATCH("colou?r", "colour");
NO_MATCH("colou?r", "colouur");
MATCH("ab?c", "ac");
MATCH("ab?c", "abc");
NO_MATCH("ab?c", "abbc");
MATCH("https?://", "http://");
MATCH("https?://", "https://");
MATCH(".?", "");
MATCH(".?", "x");
}
static void test_alternation(void) {
printf(" alternation...\n");
MATCH("a|b", "a");
MATCH("a|b", "b");
NO_MATCH("a|b", "c");
MATCH("cat|dog", "cat");
MATCH("cat|dog", "dog");
NO_MATCH("cat|dog", "rat");
MATCH("cat|dog", "my cat");
MATCH("cat|dog", "my dog");
MATCH("a|b|c", "a");
MATCH("a|b|c", "b");
MATCH("a|b|c", "c");
NO_MATCH("a|b|c", "d");
MATCH("ab|cd", "ab");
MATCH("ab|cd", "cd");
NO_MATCH("ab|cd", "ac");
MATCH("abc|def|ghi", "abc");
MATCH("abc|def|ghi", "def");
MATCH("abc|def|ghi", "ghi");
MATCH("a|ab|abc", "abc");
MATCH("abc|ab|a", "abc");
MATCH("red|green|blue", "the red car");
MATCH("red|green|blue", "green light");
MATCH("red|green|blue", "blue sky");
}
static void test_groups(void) {
printf(" groups...\n");
MATCH("(a)", "a");
MATCH("(ab)", "ab");
MATCH("(abc)", "abc");
MATCH("(a)(b)", "ab");
MATCH("(a)(b)(c)", "abc");
MATCH("(ab)+", "ab");
MATCH("(ab)+", "abab");
MATCH("(ab)+", "ababab");
NO_MATCH("(ab)+", "a");
NO_MATCH("(ab)+", "ba");
MATCH("(ab)*", "");
MATCH("(ab)*", "ab");
MATCH("(ab)*", "abab");
MATCH("(ab)?", "");
MATCH("(ab)?", "ab");
MATCH("(a|b)+", "a");
MATCH("(a|b)+", "b");
MATCH("(a|b)+", "ab");
MATCH("(a|b)+", "ba");
MATCH("(a|b)+", "aabb");
MATCH("(a|b)+", "abba");
MATCH("((a))", "a");
MATCH("((ab))", "ab");
MATCH("(a(b)c)", "abc");
MATCH("(a(b(c)))", "abc");
MATCH("((a)(b))", "ab");
MATCH("(red|blue) car", "red car");
MATCH("(red|blue) car", "blue car");
NO_MATCH("(red|blue) car", "green car");
}
static void test_bracket_simple(void) {
printf(" bracket expressions (simple)...\n");
MATCH("[a]", "a");
NO_MATCH("[a]", "b");
MATCH("[ab]", "a");
MATCH("[ab]", "b");
NO_MATCH("[ab]", "c");
MATCH("[abc]", "a");
MATCH("[abc]", "b");
MATCH("[abc]", "c");
NO_MATCH("[abc]", "d");
MATCH("[aeiou]", "a");
MATCH("[aeiou]", "e");
MATCH("[aeiou]", "i");
MATCH("[aeiou]", "o");
MATCH("[aeiou]", "u");
NO_MATCH("[aeiou]", "b");
MATCH("[abc]+", "aaa");
MATCH("[abc]+", "abc");
MATCH("[abc]+", "cba");
MATCH("[abc]+", "abcabc");
MATCH("[xyz]*", "");
MATCH("[xyz]*", "xyz");
}
static void test_bracket_ranges(void) {
printf(" bracket expressions (ranges)...\n");
MATCH("[a-z]", "a");
MATCH("[a-z]", "m");
MATCH("[a-z]", "z");
NO_MATCH("[a-z]", "A");
NO_MATCH("[a-z]", "0");
MATCH("[A-Z]", "A");
MATCH("[A-Z]", "M");
MATCH("[A-Z]", "Z");
NO_MATCH("[A-Z]", "a");
MATCH("[0-9]", "0");
MATCH("[0-9]", "5");
MATCH("[0-9]", "9");
NO_MATCH("[0-9]", "a");
MATCH("[a-zA-Z]", "a");
MATCH("[a-zA-Z]", "Z");
NO_MATCH("[a-zA-Z]", "5");
MATCH("[a-zA-Z0-9]", "a");
MATCH("[a-zA-Z0-9]", "Z");
MATCH("[a-zA-Z0-9]", "5");
NO_MATCH("[a-zA-Z0-9]", "!");
MATCH("[a-z]+", "hello");
MATCH("[A-Z]+", "HELLO");
MATCH("[0-9]+", "12345");
MATCH("[a-z0-9]+", "abc123");
}
static void test_bracket_negated(void) {
printf(" bracket expressions (negated)...\n");
NO_MATCH("[^a]", "a");
MATCH("[^a]", "b");
MATCH("[^a]", "x");
NO_MATCH("[^abc]", "a");
NO_MATCH("[^abc]", "b");
NO_MATCH("[^abc]", "c");
MATCH("[^abc]", "d");
MATCH("[^abc]", "x");
NO_MATCH("[^a-z]", "a");
NO_MATCH("[^a-z]", "m");
NO_MATCH("[^a-z]", "z");
MATCH("[^a-z]", "A");
MATCH("[^a-z]", "5");
MATCH("[^a-z]", "!");
NO_MATCH("[^0-9]", "5");
MATCH("[^0-9]", "a");
MATCH("[^0-9]+", "hello");
NO_MATCH("[^aeiou]+", "aaa");
MATCH("[^aeiou]+", "xyz");
}
static void test_character_classes(void) {
printf(" character classes...\n");
MATCH("\\d", "0");
MATCH("\\d", "5");
MATCH("\\d", "9");
NO_MATCH("\\d", "a");
NO_MATCH("\\d", " ");
MATCH("\\d+", "123");
MATCH("\\d+", "0");
MATCH("\\d+", "9876543210");
NO_MATCH("\\d+", "");
NO_MATCH("\\d+", "abc");
MATCH("\\D", "a");
MATCH("\\D", " ");
MATCH("\\D", "!");
NO_MATCH("\\D", "5");
MATCH("\\w", "a");
MATCH("\\w", "Z");
MATCH("\\w", "0");
MATCH("\\w", "_");
NO_MATCH("\\w", " ");
NO_MATCH("\\w", "!");
MATCH("\\w+", "hello");
MATCH("\\w+", "Hello123");
MATCH("\\w+", "var_name");
MATCH("\\W", " ");
MATCH("\\W", "!");
MATCH("\\W", "@");
NO_MATCH("\\W", "a");
NO_MATCH("\\W", "_");
MATCH("\\s", " ");
MATCH("\\s", "\t");
MATCH("\\s", "\n");
NO_MATCH("\\s", "a");
NO_MATCH("\\s", "5");
MATCH("\\s+", " ");
MATCH("\\s+", " \t\n");
MATCH("\\S", "a");
MATCH("\\S", "5");
MATCH("\\S", "!");
NO_MATCH("\\S", " ");
NO_MATCH("\\S", "\t");
}
static void test_quantifier_braces(void) {
printf(" brace quantifiers...\n");
MATCH("a{3}", "aaa");
MATCH("a{3}", "aaaa");
NO_MATCH("a{3}", "aa");
MATCH("a{1}", "a");
MATCH("a{1}", "aa");
NO_MATCH("a{1}", "");
MATCH("a{0}", "");
MATCH("a{0}", "b");
MATCH("a{2,4}", "aa");
MATCH("a{2,4}", "aaa");
MATCH("a{2,4}", "aaaa");
MATCH("a{2,4}", "aaaaa");
NO_MATCH("a{2,4}", "a");
MATCH("a{2,}", "aa");
MATCH("a{2,}", "aaa");
MATCH("a{2,}", "aaaaaaaaaa");
NO_MATCH("a{2,}", "a");
MATCH("a{0,2}", "");
MATCH("a{0,2}", "a");
MATCH("a{0,2}", "aa");
MATCH("a{0,2}", "aaa");
MATCH("[0-9]{3}", "123");
MATCH("[0-9]{3}", "000");
NO_MATCH("[0-9]{3}", "12");
MATCH("(ab){2}", "abab");
MATCH("(ab){2}", "ababab");
NO_MATCH("(ab){2}", "ab");
}
static void test_escape_sequences(void) {
printf(" escape sequences...\n");
MATCH("\\.", ".");
NO_MATCH("\\.", "a");
MATCH("\\*", "*");
NO_MATCH("\\*", "a");
MATCH("\\+", "+");
MATCH("\\?", "?");
MATCH("\\|", "|");
MATCH("\\(", "(");
MATCH("\\)", ")");
MATCH("\\[", "[");
MATCH("\\]", "]");
MATCH("\\{", "{");
MATCH("\\}", "}");
MATCH("\\^", "^");
MATCH("\\$", "$");
MATCH("\\\\", "\\");
MATCH("a\\.b", "a.b");
NO_MATCH("a\\.b", "aXb");
MATCH("\\d\\.\\d", "1.5");
MATCH("c\\+\\+", "c++");
MATCH("\\(test\\)", "(test)");
MATCH("\\[0\\]", "[0]");
}
static void test_complex_patterns(void) {
printf(" complex patterns...\n");
MATCH("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", "user@example.com");
MATCH("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", "test.user@mail.example.org");
NO_MATCH("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", "invalid");
NO_MATCH("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", "@example.com");
MATCH("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}", "192.168.1.1");
MATCH("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}", "10.0.0.1");
MATCH("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}", "255.255.255.255");
NO_MATCH("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}", "1.2.3");
MATCH("https?://[a-zA-Z0-9.-]+(/[a-zA-Z0-9./-]*)?", "http://example.com");
MATCH("https?://[a-zA-Z0-9.-]+(/[a-zA-Z0-9./-]*)?", "https://example.com");
MATCH("https?://[a-zA-Z0-9.-]+(/[a-zA-Z0-9./-]*)?", "http://example.com/path");
MATCH("https?://[a-zA-Z0-9.-]+(/[a-zA-Z0-9./-]*)?", "https://example.com/path/to/page");
MATCH("\\d{3}-\\d{3}-\\d{4}", "123-456-7890");
MATCH("\\d{3}-\\d{3}-\\d{4}", "555-123-4567");
NO_MATCH("\\d{3}-\\d{3}-\\d{4}", "12-345-6789");
NO_MATCH("\\d{3}-\\d{3}-\\d{4}", "1234567890");
MATCH("\\(\\d{3}\\) \\d{3}-\\d{4}", "(123) 456-7890");
MATCH("[A-Z]{2}\\d{6}", "AB123456");
NO_MATCH("[A-Z]{2}\\d{6}", "A1234567");
MATCH("\\d{4}-\\d{2}-\\d{2}", "2024-01-15");
MATCH("\\d{2}/\\d{2}/\\d{4}", "01/15/2024");
MATCH("\\d{1,2}:\\d{2}(:\\d{2})?", "12:30");
MATCH("\\d{1,2}:\\d{2}(:\\d{2})?", "12:30:45");
MATCH("\\d{1,2}:\\d{2}(:\\d{2})?", "9:05");
}
static void test_word_boundaries(void) {
printf(" word patterns...\n");
MATCH("\\w+", "hello");
MATCH("\\w+", "hello123");
MATCH("\\w+", "test_var");
MATCH("[a-zA-Z_][a-zA-Z0-9_]*", "variable");
MATCH("[a-zA-Z_][a-zA-Z0-9_]*", "_private");
MATCH("[a-zA-Z_][a-zA-Z0-9_]*", "var123");
NO_MATCH("^[a-zA-Z_][a-zA-Z0-9_]*$", "123var");
MATCH("\\w+\\s+\\w+", "hello world");
MATCH("\\w+\\s+\\w+", "foo bar");
NO_MATCH("\\w+\\s+\\w+", "hello");
}
static void test_greedy_vs_nongreedy(void) {
printf(" greedy vs non-greedy...\n");
MATCH("a+", "aaa");
MATCH("a+?", "aaa");
MATCH("a*", "aaa");
MATCH("a*?", "aaa");
MATCH("a?", "a");
MATCH("a??", "a");
MATCH("a{2,4}", "aaaa");
MATCH("a{2,4}?", "aaaa");
MATCH(".*x", "abcx");
MATCH(".*?x", "abcx");
}
static void test_empty_and_edge_cases(void) {
printf(" empty and edge cases...\n");
MATCH("", "");
MATCH("", "abc");
MATCH("a*", "");
MATCH("a?", "");
MATCH("(a*)*", "");
MATCH("(a*)+", "");
MATCH("(a+)*", "");
MATCH("(a|b)*", "");
MATCH("[a-z]*", "");
NO_MATCH("a+", "");
NO_MATCH(".+", "");
NO_MATCH("[a-z]+", "");
MATCH("^", "");
MATCH("$", "");
MATCH("^$", "");
NO_MATCH("^$", "a");
MATCH("a*b*c*", "");
MATCH("a*b*c*", "abc");
MATCH("a*b*c*", "aabbcc");
MATCH("a*b*c*", "c");
MATCH("a*b*c*", "b");
}
static void test_special_characters_in_text(void) {
printf(" special characters in text...\n");
MATCH("a", "a\nb");
MATCH("b", "a\nb");
MATCH("a.b", "a\tb");
NO_MATCH("a.b", "a\nb");
MATCH("\\.", "3.14");
MATCH("\\+", "1+2");
MATCH("\\*", "2*3");
MATCH("\\?", "why?");
MATCH("\\(\\)", "func()");
MATCH("\\[\\]", "array[]");
MATCH("\\{\\}", "object{}");
MATCH("\\^", "x^2");
MATCH("\\$", "$100");
MATCH("\\|", "a|b");
}
static void test_repetition_combinations(void) {
printf(" repetition combinations...\n");
MATCH("a+b+", "ab");
MATCH("a+b+", "aabb");
MATCH("a+b+", "aaabbb");
NO_MATCH("a+b+", "a");
NO_MATCH("a+b+", "b");
MATCH("a*b+", "b");
MATCH("a*b+", "ab");
MATCH("a*b+", "aab");
MATCH("a+b*", "a");
MATCH("a+b*", "ab");
MATCH("a+b*", "abb");
MATCH("a*b*", "");
MATCH("a*b*", "a");
MATCH("a*b*", "b");
MATCH("a*b*", "ab");
MATCH("(ab)+c+", "abc");
MATCH("(ab)+c+", "ababcc");
MATCH("(a+b)+", "ab");
MATCH("(a+b)+", "aabaaab");
MATCH("((a+)+)+", "a");
MATCH("((a+)+)+", "aaa");
}
static void test_alternation_combinations(void) {
printf(" alternation combinations...\n");
MATCH("a|b|c|d|e", "a");
MATCH("a|b|c|d|e", "e");
NO_MATCH("a|b|c|d|e", "f");
MATCH("(a|b)(c|d)", "ac");
MATCH("(a|b)(c|d)", "ad");
MATCH("(a|b)(c|d)", "bc");
MATCH("(a|b)(c|d)", "bd");
NO_MATCH("(a|b)(c|d)", "ab");
MATCH("(cat|dog)s?", "cat");
MATCH("(cat|dog)s?", "cats");
MATCH("(cat|dog)s?", "dog");
MATCH("(cat|dog)s?", "dogs");
MATCH("(red|green|blue)\\s+(car|truck)", "red car");
MATCH("(red|green|blue)\\s+(car|truck)", "green truck");
MATCH("(a|aa|aaa)", "aaa");
MATCH("(aaa|aa|a)", "aaa");
}
static void test_nested_groups(void) {
printf(" nested groups...\n");
MATCH("((a))", "a");
MATCH("(((a)))", "a");
MATCH("((a)(b))", "ab");
MATCH("((a(b))c)", "abc");
MATCH("(a(b(c)))", "abc");
MATCH("((a|b)(c|d))", "ac");
MATCH("(a(b|c)d)", "abd");
MATCH("(a(b|c)d)", "acd");
MATCH("((ab)+)", "abab");
MATCH("(a(bc)*d)", "ad");
MATCH("(a(bc)*d)", "abcd");
MATCH("(a(bc)*d)", "abcbcd");
MATCH("((a+)(b+))", "aabb");
MATCH("(((a|b)+)c)", "ababc");
}
static void test_real_world_patterns(void) {
printf(" real world patterns...\n");
MATCH("[a-zA-Z]+", "Hello");
MATCH("[a-zA-Z]+", "WORLD");
MATCH("[a-zA-Z]+", "test");
MATCH("-?\\d+", "123");
MATCH("-?\\d+", "-456");
MATCH("-?\\d+", "0");
MATCH("-?\\d+\\.?\\d*", "3.14");
MATCH("-?\\d+\\.?\\d*", "-2.5");
MATCH("-?\\d+\\.?\\d*", "42");
MATCH("[a-fA-F0-9]+", "deadbeef");
MATCH("[a-fA-F0-9]+", "CAFEBABE");
MATCH("[a-fA-F0-9]+", "123abc");
MATCH("[01]+", "101010");
MATCH("[01]+", "11110000");
MATCH("[A-Z][a-z]+", "Hello");
MATCH("[A-Z][a-z]+", "World");
NO_MATCH("[A-Z][a-z]+", "hello");
MATCH("\"[^\"]*\"", "\"hello\"");
MATCH("\"[^\"]*\"", "\"hello world\"");
MATCH("\"[^\"]*\"", "\"\"");
MATCH("'[^']*'", "'test'");
MATCH("#[a-fA-F0-9]{6}", "#ff0000");
MATCH("#[a-fA-F0-9]{6}", "#00FF00");
MATCH("#[a-fA-F0-9]{3}", "#f00");
}
static void test_pathological_patterns(void) {
printf(" stress test patterns...\n");
MATCH("a?a?a?aaa", "aaa");
MATCH("(a+)+", "aaaa");
MATCH("(a*)*", "aaaa");
MATCH("(a|a)+", "aaaa");
MATCH("((a*)*)*", "aaaa");
MATCH("a*a*a*a*a*b", "aaaaab");
MATCH(".*.*.*.*.*", "test");
MATCH("(a?){5}a{5}", "aaaaa");
}
static void test_anchored_match(void) {
printf(" anchored match (lorex_match)...\n");
lorex_error_t err;
lorex_match_t m;
lorex_regex_t *re = lorex_compile("abc", &err);
if (re) {
if (lorex_match(re, "abc", &m)) {
passed++;
} else {
printf("FAIL: lorex_match should match 'abc' against 'abc'\n");
failed++;
}
if (!lorex_match(re, "xabc", &m)) {
passed++;
} else {
printf("FAIL: lorex_match should not match 'abc' against 'xabc'\n");
failed++;
}
if (lorex_match(re, "abcx", &m)) {
passed++;
} else {
printf("FAIL: lorex_match should match 'abc' at start of 'abcx'\n");
failed++;
}
lorex_free(re);
}
re = lorex_compile("^abc$", &err);
if (re) {
if (lorex_match(re, "abc", &m)) {
passed++;
} else {
printf("FAIL: lorex_match should match '^abc$' against 'abc'\n");
failed++;
}
if (!lorex_match(re, "abcx", &m)) {
passed++;
} else {
printf("FAIL: lorex_match should not match '^abc$' against 'abcx'\n");
failed++;
}
lorex_free(re);
}
re = lorex_compile("a.*z", &err);
if (re) {
if (lorex_match(re, "abcz", &m)) {
passed++;
} else {
printf("FAIL: lorex_match should match 'a.*z' against 'abcz'\n");
failed++;
}
if (!lorex_match(re, "xabcz", &m)) {
passed++;
} else {
printf("FAIL: lorex_match should not match 'a.*z' against 'xabcz'\n");
failed++;
}
lorex_free(re);
}
}
static void test_error_strings(void) {
printf(" error strings...\n");
if (strcmp(lorex_error_string(LOREX_OK), "success") == 0) {
passed++;
} else {
printf("FAIL: LOREX_OK should return 'success'\n");
failed++;
}
if (strcmp(lorex_error_string(LOREX_ERR_INVALID_PATTERN), "invalid pattern") == 0) {
passed++;
} else {
printf("FAIL: LOREX_ERR_INVALID_PATTERN error string\n");
failed++;
}
if (strcmp(lorex_error_string(LOREX_ERR_UNBALANCED_PAREN), "unbalanced parentheses") == 0) {
passed++;
} else {
printf("FAIL: LOREX_ERR_UNBALANCED_PAREN error string\n");
failed++;
}
if (strcmp(lorex_error_string(LOREX_ERR_EMPTY_GROUP), "empty group") == 0) {
passed++;
} else {
printf("FAIL: LOREX_ERR_EMPTY_GROUP error string\n");
failed++;
}
if (strcmp(lorex_error_string(LOREX_ERR_INVALID_QUANTIFIER), "invalid quantifier") == 0) {
passed++;
} else {
printf("FAIL: LOREX_ERR_INVALID_QUANTIFIER error string\n");
failed++;
}
if (strcmp(lorex_error_string(LOREX_ERR_INVALID_ESCAPE), "invalid escape sequence") == 0) {
passed++;
} else {
printf("FAIL: LOREX_ERR_INVALID_ESCAPE error string\n");
failed++;
}
if (strcmp(lorex_error_string(LOREX_ERR_OUT_OF_MEMORY), "out of memory") == 0) {
passed++;
} else {
printf("FAIL: LOREX_ERR_OUT_OF_MEMORY error string\n");
failed++;
}
if (strcmp(lorex_error_string(LOREX_ERR_STATE_OVERFLOW), "state overflow") == 0) {
passed++;
} else {
printf("FAIL: LOREX_ERR_STATE_OVERFLOW error string\n");
failed++;
}
if (strcmp(lorex_error_string((lorex_error_t)99), "unknown error") == 0) {
passed++;
} else {
printf("FAIL: unknown error code should return 'unknown error'\n");
failed++;
}
}
static void test_parser_errors(void) {
printf(" parser errors...\n");
lorex_error_t err;
lorex_regex_t *re;
re = lorex_compile("(abc", &err);
if (re == NULL && err == LOREX_ERR_UNBALANCED_PAREN) {
passed++;
} else {
printf("FAIL: '(abc' should fail with unbalanced paren\n");
failed++;
if (re) lorex_free(re);
}
re = lorex_compile("((a)", &err);
if (re == NULL && err == LOREX_ERR_UNBALANCED_PAREN) {
passed++;
} else {
printf("FAIL: '((a)' should fail with unbalanced paren\n");
failed++;
if (re) lorex_free(re);
}
re = lorex_compile("a{5,2}", &err);
if (re == NULL) {
passed++;
} else {
printf("FAIL: 'a{5,2}' should fail (min > max)\n");
failed++;
lorex_free(re);
}
re = lorex_compile("*abc", &err);
if (re == NULL) {
passed++;
} else {
printf("FAIL: '*abc' should fail\n");
failed++;
lorex_free(re);
}
re = lorex_compile("+abc", &err);
if (re == NULL) {
passed++;
} else {
printf("FAIL: '+abc' should fail\n");
failed++;
lorex_free(re);
}
re = lorex_compile("?abc", &err);
if (re == NULL) {
passed++;
} else {
printf("FAIL: '?abc' should fail\n");
failed++;
lorex_free(re);
}
}
static void test_bracket_char_classes(void) {
printf(" bracket character classes...\n");
MATCH("[\\d]", "5");
MATCH("[\\d]+", "12345");
NO_MATCH("[\\d]", "a");
MATCH("[\\w]", "a");
MATCH("[\\w]", "Z");
MATCH("[\\w]", "5");
MATCH("[\\w]", "_");
NO_MATCH("[\\w]", " ");
MATCH("[\\s]", " ");
MATCH("[\\s]", "\t");
NO_MATCH("[\\s]", "a");
MATCH("[a\\d]", "a");
MATCH("[a\\d]", "5");
NO_MATCH("[a\\d]", "b");
MATCH("[\\da-z]", "5");
MATCH("[\\da-z]", "m");
NO_MATCH("[\\da-z]", "M");
MATCH("[\\w\\s]+", "hello world");
MATCH("[0-9\\s]+", "1 2 3");
MATCH("[\\w-]+", "hello-world");
}
static void test_special_escapes(void) {
printf(" special escape sequences...\n");
MATCH("\\n", "\n");
MATCH("a\\nb", "a\nb");
MATCH("\\t", "\t");
MATCH("a\\tb", "a\tb");
MATCH("\\r", "\r");
MATCH("a\\rb", "a\rb");
MATCH("\\n\\t\\r", "\n\t\r");
MATCH("[\\n]", "\n");
MATCH("[\\t]", "\t");
MATCH("[\\r]", "\r");
MATCH("[\\n\\t]+", "\n\t\n");
NO_MATCH("\\n", "n");
NO_MATCH("\\t", "t");
NO_MATCH("\\r", "r");
}
int main(void) {
printf("lorex integration tests\n");
printf("=======================\n\n");
test_literals();
test_dot();
test_anchors();
test_star();
test_plus();
test_question();
test_alternation();
test_groups();
test_bracket_simple();
test_bracket_ranges();
test_bracket_negated();
test_character_classes();
test_quantifier_braces();
test_escape_sequences();
test_complex_patterns();
test_word_boundaries();
test_greedy_vs_nongreedy();
test_empty_and_edge_cases();
test_special_characters_in_text();
test_repetition_combinations();
test_alternation_combinations();
test_nested_groups();
test_real_world_patterns();
test_pathological_patterns();
test_anchored_match();
test_error_strings();
test_parser_errors();
test_bracket_char_classes();
test_special_escapes();
printf("\n=======================\n");
printf("integration: %d passed, %d failed\n", passed, failed);
printf("total tests: %d\n", passed + failed);
return failed > 0 ? 1 : 0;
}