449 lines
17 KiB
C
Raw Normal View History

2026-01-04 01:58:43 +01:00
/* retoor <retoor@molodetz.nl> */
#define _POSIX_C_SOURCE 200809L
#include "../include/lorex.h"
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#define ITERATIONS 10000
#define WARMUP 1000
typedef struct {
const char *name;
const char *pattern;
const char *text;
int expect_match;
} benchmark_t;
typedef struct {
double lorex_compile_us;
double lorex_match_us;
double lorex_total_us;
double posix_compile_us;
double posix_match_us;
double posix_total_us;
int lorex_matched;
int posix_matched;
int lorex_failed;
int posix_failed;
} result_t;
static benchmark_t benchmarks[] = {
{"literal_short", "hello", "hello world", 1},
{"literal_medium", "the quick brown", "the quick brown fox jumps over the lazy dog", 1},
{"literal_long", "Lorem ipsum dolor sit amet", "Lorem ipsum dolor sit amet, consectetur adipiscing elit", 1},
{"literal_nomatch", "xyz", "the quick brown fox jumps over the lazy dog", 0},
{"literal_end", "dog", "the quick brown fox jumps over the lazy dog", 1},
{"literal_repeated", "abcabc", "xyzabcabcdef", 1},
{"dot_single", "a.c", "abc", 1},
{"dot_multiple", "a..b", "aXYb", 1},
{"dot_many", "a.....b", "a12345b", 1},
{"dot_star", "a.*b", "aXXXXXXXXXXb", 1},
{"dot_plus", "a.+b", "aXXXXXXXXXXb", 1},
{"anchor_start", "^the", "the quick brown fox", 1},
{"anchor_end", "fox$", "the quick brown fox", 1},
{"anchor_both", "^hello$", "hello", 1},
{"anchor_start_nomatch", "^fox", "the quick brown fox", 0},
{"anchor_end_nomatch", "the$", "the quick brown fox", 0},
{"star_simple", "ab*c", "abbbbc", 1},
{"star_zero", "ab*c", "ac", 1},
{"star_greedy", "a.*b", "aXbXbXb", 1},
{"star_repeated", "a*b*c*", "aaabbbccc", 1},
{"star_empty", "a*", "", 1},
{"plus_simple", "ab+c", "abbbbc", 1},
{"plus_one", "ab+c", "abc", 1},
{"plus_nomatch", "ab+c", "ac", 0},
{"plus_greedy", "a.+b", "aXbXbXb", 1},
{"question_present", "colou?r", "colour", 1},
{"question_absent", "colou?r", "color", 1},
{"question_multiple", "a?b?c?d", "abcd", 1},
{"class_vowels", "[aeiou]", "hello", 1},
{"class_digits", "[0-9]+", "abc123def", 1},
{"class_alpha", "[a-zA-Z]+", "HelloWorld", 1},
{"class_alnum", "[a-zA-Z0-9]+", "Test123", 1},
{"class_neg_digit", "[^0-9]+", "hello", 1},
{"class_neg_alpha", "[^a-zA-Z]+", "12345", 1},
{"class_complex", "[a-zA-Z_][a-zA-Z0-9_]*", "variable_name_123", 1},
{"alt_simple", "cat|dog", "I have a cat", 1},
{"alt_simple2", "cat|dog", "I have a dog", 1},
{"alt_three", "red|green|blue", "the color is green", 1},
{"alt_nomatch", "cat|dog", "I have a bird", 0},
{"alt_words", "hello|world|test", "this is a test", 1},
{"group_simple", "(ab)+", "ababab", 1},
{"group_alt", "(cat|dog)s?", "cats", 1},
{"group_nested", "((a)(b))+", "ababab", 1},
{"group_complex", "(a(b(c)))+", "abcabc", 1},
{"quant_exact", "a{3}", "aaa", 1},
{"quant_exact_long", "a{10}", "aaaaaaaaaa", 1},
{"quant_range", "a{2,4}", "aaa", 1},
{"quant_min", "a{3,}", "aaaaa", 1},
{"quant_combined", "[0-9]{3}-[0-9]{4}", "555-1234", 1},
{"email_simple", "[a-z]+@[a-z]+\\.[a-z]+", "test@example.com", 1},
{"email_complex", "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", "user.name+tag@sub.example.com", 1},
{"ip_address", "[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}", "192.168.1.100", 1},
{"url_http", "https?://[a-zA-Z0-9.-]+", "https://www.example.com", 1},
{"phone_us", "[0-9]{3}-[0-9]{3}-[0-9]{4}", "555-123-4567", 1},
{"date_iso", "[0-9]{4}-[0-9]{2}-[0-9]{2}", "2024-01-15", 1},
{"time_hms", "[0-9]{2}:[0-9]{2}:[0-9]{2}", "14:30:45", 1},
{"hex_color", "#[0-9a-fA-F]{6}", "#ff00ff", 1},
{"word_boundary", "[a-zA-Z]+", "hello world test", 1},
{"whitespace", "[ \\t\\n]+", "hello world", 1},
{"identifier", "[a-zA-Z_][a-zA-Z0-9_]*", "_privateVar123", 1},
{"number_int", "-?[0-9]+", "-12345", 1},
{"number_float", "-?[0-9]+\\.[0-9]+", "3.14159", 1},
{"long_text_start", "^The", "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.", 1},
{"long_text_end", "dog\\.$", "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.", 1},
{"long_text_middle", "fox", "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.", 1},
{"long_text_nomatch", "elephant", "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.", 0},
{"repeated_ab", "(ab){5}", "ababababab", 1},
{"repeated_word", "(hello ){3}", "hello hello hello ", 1},
{"alternation_long", "one|two|three|four|five|six|seven|eight|nine|ten", "the number is seven", 1},
{"escape_dot", "3\\.14", "pi is 3.14", 1},
{"escape_star", "a\\*b", "a*b", 1},
{"escape_plus", "c\\+\\+", "c++", 1},
{"escape_parens", "\\(test\\)", "(test)", 1},
{"escape_brackets", "\\[0\\]", "array[0]", 1},
{"stress_star", "a*a*a*a*a*b", "aaaaab", 1},
{"stress_plus", "a+a+a+a+a+b", "aaaaab", 1},
{"stress_nested", "((a+)+)+b", "aaaab", 1},
{"stress_alt", "(a|aa|aaa|aaaa)+b", "aaaab", 1},
{"nomatch_literal", "notfound", "the quick brown fox", 0},
{"nomatch_pattern", "^end", "start middle end", 0},
{"nomatch_class", "[0-9]+", "no digits here", 0},
{NULL, NULL, NULL, 0}
};
static double get_time_us(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec * 1000000.0 + tv.tv_usec;
}
static result_t run_benchmark(benchmark_t *bench) {
result_t res = {0};
double start, end;
for (int i = 0; i < WARMUP; i++) {
lorex_error_t err;
lorex_regex_t *re = lorex_compile(bench->pattern, &err);
if (re) {
lorex_match_t m;
lorex_search(re, bench->text, &m);
lorex_free(re);
}
}
start = get_time_us();
for (int i = 0; i < ITERATIONS; i++) {
lorex_error_t err;
lorex_regex_t *re = lorex_compile(bench->pattern, &err);
if (!re) {
res.lorex_failed = 1;
break;
}
lorex_free(re);
}
end = get_time_us();
res.lorex_compile_us = (end - start) / ITERATIONS;
start = get_time_us();
{
lorex_error_t err;
lorex_regex_t *re = lorex_compile(bench->pattern, &err);
if (re) {
for (int i = 0; i < ITERATIONS; i++) {
lorex_match_t m;
res.lorex_matched = lorex_search(re, bench->text, &m) ? 1 : 0;
}
lorex_free(re);
}
}
end = get_time_us();
res.lorex_match_us = (end - start) / ITERATIONS;
res.lorex_total_us = res.lorex_compile_us + res.lorex_match_us;
for (int i = 0; i < WARMUP; i++) {
regex_t preg;
if (regcomp(&preg, bench->pattern, REG_EXTENDED) == 0) {
regmatch_t pmatch[1];
regexec(&preg, bench->text, 1, pmatch, 0);
regfree(&preg);
}
}
start = get_time_us();
for (int i = 0; i < ITERATIONS; i++) {
regex_t preg;
if (regcomp(&preg, bench->pattern, REG_EXTENDED) != 0) {
res.posix_failed = 1;
break;
}
regfree(&preg);
}
end = get_time_us();
res.posix_compile_us = (end - start) / ITERATIONS;
start = get_time_us();
{
regex_t preg;
if (regcomp(&preg, bench->pattern, REG_EXTENDED) == 0) {
for (int i = 0; i < ITERATIONS; i++) {
regmatch_t pmatch[1];
res.posix_matched = (regexec(&preg, bench->text, 1, pmatch, 0) == 0) ? 1 : 0;
}
regfree(&preg);
}
}
end = get_time_us();
res.posix_match_us = (end - start) / ITERATIONS;
res.posix_total_us = res.posix_compile_us + res.posix_match_us;
return res;
}
int main(void) {
printf("================================================================================\n");
printf(" LOREX vs POSIX REGEX PERFORMANCE BENCHMARK\n");
printf("================================================================================\n\n");
printf("Configuration:\n");
printf(" Iterations per test: %d\n", ITERATIONS);
printf(" Warmup iterations: %d\n", WARMUP);
printf("\n");
int total_tests = 0;
int lorex_wins = 0;
int posix_wins = 0;
int ties = 0;
double total_lorex_time = 0;
double total_posix_time = 0;
int lorex_compile_wins = 0;
int posix_compile_wins = 0;
int lorex_match_wins = 0;
int posix_match_wins = 0;
printf("================================================================================\n");
printf("%-25s | %-12s | %-12s | %-12s | %-8s\n", "TEST NAME", "LOREX (us)", "POSIX (us)", "SPEEDUP", "WINNER");
printf("================================================================================\n");
for (int i = 0; benchmarks[i].name != NULL; i++) {
benchmark_t *bench = &benchmarks[i];
result_t res = run_benchmark(bench);
if (res.lorex_failed || res.posix_failed) {
printf("%-25s | %-12s | %-12s | %-12s | %-8s\n",
bench->name,
res.lorex_failed ? "FAILED" : "OK",
res.posix_failed ? "FAILED" : "OK",
"-", "-");
continue;
}
total_tests++;
total_lorex_time += res.lorex_total_us;
total_posix_time += res.posix_total_us;
double speedup = res.posix_total_us / res.lorex_total_us;
const char *winner;
if (speedup > 1.05) {
winner = "LOREX";
lorex_wins++;
} else if (speedup < 0.95) {
winner = "POSIX";
posix_wins++;
} else {
winner = "TIE";
ties++;
}
if (res.lorex_compile_us < res.posix_compile_us) lorex_compile_wins++;
else posix_compile_wins++;
if (res.lorex_match_us < res.posix_match_us) lorex_match_wins++;
else posix_match_wins++;
printf("%-25s | %10.3f | %10.3f | %10.2fx | %-8s\n",
bench->name,
res.lorex_total_us,
res.posix_total_us,
speedup,
winner);
}
printf("================================================================================\n\n");
printf("================================================================================\n");
printf(" DETAILED RESULTS\n");
printf("================================================================================\n\n");
printf("%-25s | %-20s | %-20s\n", "TEST NAME", "LOREX (compile/match)", "POSIX (compile/match)");
printf("--------------------------------------------------------------------------------\n");
for (int i = 0; benchmarks[i].name != NULL; i++) {
benchmark_t *bench = &benchmarks[i];
result_t res = run_benchmark(bench);
if (res.lorex_failed || res.posix_failed) continue;
printf("%-25s | %8.3f / %8.3f | %8.3f / %8.3f\n",
bench->name,
res.lorex_compile_us, res.lorex_match_us,
res.posix_compile_us, res.posix_match_us);
}
printf("\n================================================================================\n");
printf(" SUMMARY\n");
printf("================================================================================\n\n");
printf("Total tests: %d\n", total_tests);
printf("\n");
printf("Overall wins:\n");
printf(" LOREX wins: %d (%.1f%%)\n", lorex_wins, 100.0 * lorex_wins / total_tests);
printf(" POSIX wins: %d (%.1f%%)\n", posix_wins, 100.0 * posix_wins / total_tests);
printf(" Ties: %d (%.1f%%)\n", ties, 100.0 * ties / total_tests);
printf("\n");
printf("Compilation phase wins:\n");
printf(" LOREX faster: %d\n", lorex_compile_wins);
printf(" POSIX faster: %d\n", posix_compile_wins);
printf("\n");
printf("Matching phase wins:\n");
printf(" LOREX faster: %d\n", lorex_match_wins);
printf(" POSIX faster: %d\n", posix_match_wins);
printf("\n");
printf("Total time (all tests):\n");
printf(" LOREX: %.3f us\n", total_lorex_time);
printf(" POSIX: %.3f us\n", total_posix_time);
printf(" Overall speedup: %.2fx %s\n",
total_posix_time > total_lorex_time ? total_posix_time / total_lorex_time : total_lorex_time / total_posix_time,
total_posix_time > total_lorex_time ? "(LOREX faster)" : "(POSIX faster)");
printf("\n================================================================================\n");
printf(" CATEGORY BREAKDOWN\n");
printf("================================================================================\n\n");
typedef struct {
const char *category;
const char *prefix;
double lorex_total;
double posix_total;
int count;
} category_t;
category_t categories[] = {
{"Literal matching", "literal_", 0, 0, 0},
{"Dot metacharacter", "dot_", 0, 0, 0},
{"Anchors", "anchor_", 0, 0, 0},
{"Star quantifier", "star_", 0, 0, 0},
{"Plus quantifier", "plus_", 0, 0, 0},
{"Question quantifier", "question_", 0, 0, 0},
{"Character classes", "class_", 0, 0, 0},
{"Alternation", "alt_", 0, 0, 0},
{"Groups", "group_", 0, 0, 0},
{"Brace quantifiers", "quant_", 0, 0, 0},
{"Real-world patterns", "email_", 0, 0, 0},
{"Escape sequences", "escape_", 0, 0, 0},
{"Stress tests", "stress_", 0, 0, 0},
{"No-match tests", "nomatch_", 0, 0, 0},
{NULL, NULL, 0, 0, 0}
};
for (int i = 0; benchmarks[i].name != NULL; i++) {
benchmark_t *bench = &benchmarks[i];
result_t res = run_benchmark(bench);
if (res.lorex_failed || res.posix_failed) continue;
for (int j = 0; categories[j].category != NULL; j++) {
if (strncmp(bench->name, categories[j].prefix, strlen(categories[j].prefix)) == 0) {
categories[j].lorex_total += res.lorex_total_us;
categories[j].posix_total += res.posix_total_us;
categories[j].count++;
break;
}
}
}
printf("%-25s | %-12s | %-12s | %-12s | %-8s\n", "CATEGORY", "LOREX (us)", "POSIX (us)", "SPEEDUP", "WINNER");
printf("--------------------------------------------------------------------------------\n");
for (int i = 0; categories[i].category != NULL; i++) {
if (categories[i].count == 0) continue;
double speedup = categories[i].posix_total / categories[i].lorex_total;
const char *winner = speedup > 1.0 ? "LOREX" : "POSIX";
printf("%-25s | %10.3f | %10.3f | %10.2fx | %-8s\n",
categories[i].category,
categories[i].lorex_total,
categories[i].posix_total,
speedup > 1.0 ? speedup : 1.0 / speedup,
winner);
}
printf("\n================================================================================\n");
printf(" PATTERN DETAILS\n");
printf("================================================================================\n\n");
for (int i = 0; benchmarks[i].name != NULL; i++) {
benchmark_t *bench = &benchmarks[i];
result_t res = run_benchmark(bench);
printf("Test: %s\n", bench->name);
printf(" Pattern: %s\n", bench->pattern);
printf(" Text: %.50s%s\n", bench->text, strlen(bench->text) > 50 ? "..." : "");
printf(" Expected: %s\n", bench->expect_match ? "MATCH" : "NO MATCH");
if (res.lorex_failed) {
printf(" LOREX: FAILED TO COMPILE\n");
} else {
printf(" LOREX: %s (compile: %.3f us, match: %.3f us, total: %.3f us)\n",
res.lorex_matched ? "MATCHED" : "NO MATCH",
res.lorex_compile_us, res.lorex_match_us, res.lorex_total_us);
}
if (res.posix_failed) {
printf(" POSIX: FAILED TO COMPILE\n");
} else {
printf(" POSIX: %s (compile: %.3f us, match: %.3f us, total: %.3f us)\n",
res.posix_matched ? "MATCHED" : "NO MATCH",
res.posix_compile_us, res.posix_match_us, res.posix_total_us);
}
if (!res.lorex_failed && !res.posix_failed) {
double speedup = res.posix_total_us / res.lorex_total_us;
if (speedup > 1.0) {
printf(" Result: LOREX is %.2fx faster\n", speedup);
} else {
printf(" Result: POSIX is %.2fx faster\n", 1.0 / speedup);
}
}
printf("\n");
}
printf("================================================================================\n");
printf(" BENCHMARK COMPLETE\n");
printf("================================================================================\n");
return 0;
}