chore: update c, h, md files

2026-01-04 00:04:48 +01:00 · 2026-01-04 00:04:48 +01:00 · 3d9c4aa00b
commit 3d9c4aa00b
26 changed files with 4459 additions and 0 deletions
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@ -0,0 +1,93 @@
+# retoor <retoor@molodetz.nl>
+name: CI
+
+on:
+  push:
+    branches:
+      - main
+      - master
+  pull_request:
+    branches:
+      - main
+      - master
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y gcc make valgrind
+
+      - name: Build release
+        run: make
+
+      - name: Build debug
+        run: make debug
+
+  test:
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y gcc make
+
+      - name: Run tests
+        run: make test
+
+  valgrind:
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y gcc make valgrind
+
+      - name: Build test binaries
+        run: make build/test_integration build/test_all
+
+      - name: Valgrind comprehensive tests
+        run: |
+          valgrind --leak-check=full --show-leak-kinds=all \
+            --track-origins=yes --error-exitcode=1 \
+            ./build/test_all
+
+      - name: Valgrind integration tests
+        run: |
+          valgrind --leak-check=full --show-leak-kinds=all \
+            --track-origins=yes --error-exitcode=1 \
+            ./build/test_integration
+
+  coverage:
+    runs-on: ubuntu-latest
+    needs: test
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y gcc make gcovr
+
+      - name: Generate coverage
+        run: make coverage
+
+      - name: Upload coverage artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-report
+          path: build/coverage/
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,29 @@
+# Build
+build/
+*.o
+*.a
+*.so
+*.dylib
+
+# Binary
+loreg
+
+# Coverage
+*.gcov
+*.gcda
+*.gcno
+
+# Profiling
+gmon.out
+*.prof
+
+# Editor
+*~
+*.swp
+*.swo
+.vscode/
+.idea/
+
+# OS
+.DS_Store
+Thumbs.db
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,10 @@
+# Changelog
+
+
+
+## Version 0.1.0 - 2026-01-04
+
+update c, h, md files
+
+**Changes:** 25 files, 4449 lines
+**Languages:** C (3989 lines), Markdown (181 lines), Other (186 lines), YAML (93 lines)
--- a/157
+++ b/157
@ -0,0 +1,157 @@
+# retoor <retoor@molodetz.nl>
+
+CC = gcc
+CFLAGS = -Wall -Wextra -Werror -pedantic -std=c11 -O3 -march=native -flto
+CFLAGS_DEBUG = -Wall -Wextra -pedantic -std=c11 -g -O0 -DDEBUG
+CFLAGS_COV = -Wall -Wextra -pedantic -std=c11 -g -O0 --coverage -fprofile-arcs -ftest-coverage
+CFLAGS_PROF = -Wall -Wextra -pedantic -std=c11 -O2 -pg
+
+INCLUDES = -Iinclude
+LDFLAGS = -flto
+LDFLAGS_COV = --coverage
+
+SRC_DIR = src
+INC_DIR = include
+BUILD_DIR = build
+TEST_DIR = tests
+
+SRCS = $(SRC_DIR)/lexer.c $(SRC_DIR)/ast.c $(SRC_DIR)/parser.c \
+       $(SRC_DIR)/nfa.c $(SRC_DIR)/matcher.c $(SRC_DIR)/loreg.c \
+       $(SRC_DIR)/repl.c $(SRC_DIR)/main.c
+
+LIB_SRCS = $(SRC_DIR)/lexer.c $(SRC_DIR)/ast.c $(SRC_DIR)/parser.c \
+           $(SRC_DIR)/nfa.c $(SRC_DIR)/matcher.c $(SRC_DIR)/loreg.c
+
+OBJS = $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%.o,$(SRCS))
+LIB_OBJS = $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%.o,$(LIB_SRCS))
+
+TARGET = loreg
+LIB_TARGET = libloreg.a
+
+TEST_SRCS = $(TEST_DIR)/test_lexer.c $(TEST_DIR)/test_parser.c \
+            $(TEST_DIR)/test_nfa.c $(TEST_DIR)/test_matcher.c \
+            $(TEST_DIR)/test_all.c $(TEST_DIR)/test_integration.c
+
+TEST_BINS = $(BUILD_DIR)/test_lexer $(BUILD_DIR)/test_parser \
+            $(BUILD_DIR)/test_nfa $(BUILD_DIR)/test_matcher \
+            $(BUILD_DIR)/test_all $(BUILD_DIR)/test_integration
+
+.PHONY: all clean test debug coverage profile valgrind help install
+
+all: $(BUILD_DIR) $(TARGET)
+
+$(BUILD_DIR):
+	mkdir -p $(BUILD_DIR)
+
+$(BUILD_DIR)/%.o: $(SRC_DIR)/%.c | $(BUILD_DIR)
+	$(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@
+
+$(TARGET): $(OBJS)
+	$(CC) $(OBJS) -o $@ $(LDFLAGS)
+
+$(LIB_TARGET): $(LIB_OBJS)
+	ar rcs $@ $(LIB_OBJS)
+
+debug: CFLAGS = $(CFLAGS_DEBUG)
+debug: clean $(TARGET)
+
+$(BUILD_DIR)/test_lexer: $(TEST_DIR)/test_lexer.c $(LIB_SRCS) | $(BUILD_DIR)
+	$(CC) $(CFLAGS_DEBUG) $(INCLUDES) $< $(LIB_SRCS) -o $@
+
+$(BUILD_DIR)/test_parser: $(TEST_DIR)/test_parser.c $(LIB_SRCS) | $(BUILD_DIR)
+	$(CC) $(CFLAGS_DEBUG) $(INCLUDES) $< $(LIB_SRCS) -o $@
+
+$(BUILD_DIR)/test_nfa: $(TEST_DIR)/test_nfa.c $(LIB_SRCS) | $(BUILD_DIR)
+	$(CC) $(CFLAGS_DEBUG) $(INCLUDES) $< $(LIB_SRCS) -o $@
+
+$(BUILD_DIR)/test_matcher: $(TEST_DIR)/test_matcher.c $(LIB_SRCS) | $(BUILD_DIR)
+	$(CC) $(CFLAGS_DEBUG) $(INCLUDES) $< $(LIB_SRCS) -o $@
+
+$(BUILD_DIR)/test_all: $(TEST_DIR)/test_all.c $(LIB_SRCS) | $(BUILD_DIR)
+	$(CC) $(CFLAGS_DEBUG) $(INCLUDES) $< $(LIB_SRCS) -o $@
+
+$(BUILD_DIR)/test_integration: $(TEST_DIR)/test_integration.c $(LIB_SRCS) | $(BUILD_DIR)
+	$(CC) $(CFLAGS_DEBUG) $(INCLUDES) $< $(LIB_SRCS) -o $@
+
+test: $(TEST_BINS)
+	@echo "running lexer tests..."
+	@$(BUILD_DIR)/test_lexer
+	@echo ""
+	@echo "running parser tests..."
+	@$(BUILD_DIR)/test_parser
+	@echo ""
+	@echo "running nfa tests..."
+	@$(BUILD_DIR)/test_nfa
+	@echo ""
+	@echo "running matcher tests..."
+	@$(BUILD_DIR)/test_matcher
+	@echo ""
+	@echo "running comprehensive tests..."
+	@$(BUILD_DIR)/test_all
+	@echo ""
+	@echo "running integration tests..."
+	@$(BUILD_DIR)/test_integration
+
+coverage: CFLAGS = $(CFLAGS_COV)
+coverage: LDFLAGS = $(LDFLAGS_COV)
+coverage: clean $(BUILD_DIR)
+	$(CC) $(CFLAGS_COV) $(INCLUDES) $(TEST_DIR)/test_all.c $(LIB_SRCS) -o $(BUILD_DIR)/test_coverage $(LDFLAGS_COV)
+	$(BUILD_DIR)/test_coverage
+	gcov -b $(LIB_SRCS)
+	@echo ""
+	@echo "coverage report generated"
+	@mkdir -p $(BUILD_DIR)/coverage
+	@mv *.gcov $(BUILD_DIR)/coverage/ 2>/dev/null || true
+	@mv *.gcda $(BUILD_DIR)/coverage/ 2>/dev/null || true
+	@mv *.gcno $(BUILD_DIR)/coverage/ 2>/dev/null || true
+
+profile: CFLAGS = $(CFLAGS_PROF)
+profile: clean $(BUILD_DIR)
+	$(CC) $(CFLAGS_PROF) $(INCLUDES) $(TEST_DIR)/test_all.c $(LIB_SRCS) -o $(BUILD_DIR)/test_profile
+	$(BUILD_DIR)/test_profile
+	gprof $(BUILD_DIR)/test_profile gmon.out > $(BUILD_DIR)/profile.txt
+	@echo ""
+	@echo "profile report: $(BUILD_DIR)/profile.txt"
+	@mv gmon.out $(BUILD_DIR)/ 2>/dev/null || true
+
+valgrind: $(BUILD_DIR)/test_all
+	valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes \
+		--error-exitcode=1 $(BUILD_DIR)/test_all
+
+valgrind-verbose: $(BUILD_DIR)/test_all
+	valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes \
+		--verbose --log-file=$(BUILD_DIR)/valgrind.log $(BUILD_DIR)/test_all
+	@echo "valgrind log: $(BUILD_DIR)/valgrind.log"
+
+benchmark: $(TARGET)
+	@echo "benchmarking..."
+	@echo "pattern: [a-z]+@[a-z]+\\.[a-z]+"
+	@time -p sh -c 'for i in $$(seq 1 1000); do ./$(TARGET) "[a-z]+@[a-z]+\\.[a-z]+" "test@example.com" > /dev/null; done'
+	@echo ""
+	@echo "pattern: (a|b)*abb"
+	@time -p sh -c 'for i in $$(seq 1 1000); do ./$(TARGET) "(a|b)*abb" "aabababb" > /dev/null; done'
+
+install: $(TARGET)
+	install -d $(DESTDIR)/usr/local/bin
+	install -m 755 $(TARGET) $(DESTDIR)/usr/local/bin/
+
+uninstall:
+	rm -f $(DESTDIR)/usr/local/bin/$(TARGET)
+
+clean:
+	rm -rf $(BUILD_DIR) $(TARGET) $(LIB_TARGET)
+	rm -f *.gcov *.gcda *.gcno gmon.out
+
+help:
+	@echo "loreg makefile targets:"
+	@echo "  all         build optimized release binary"
+	@echo "  debug       build with debug symbols"
+	@echo "  test        run all tests"
+	@echo "  coverage    run tests with coverage analysis"
+	@echo "  profile     run tests with profiling"
+	@echo "  valgrind    run tests under valgrind"
+	@echo "  benchmark   run simple benchmarks"
+	@echo "  install     install to /usr/local/bin"
+	@echo "  uninstall   remove from /usr/local/bin"
+	@echo "  clean       remove build artifacts"
+	@echo "  help        show this message"
--- a/README.md
+++ b/README.md
@ -0,0 +1,181 @@
+# loreg
+
+retoor <retoor@molodetz.nl>
+
+A high-performance regular expression interpreter implemented from scratch in plain C. The engine uses Thompson's NFA construction algorithm for efficient pattern matching.
+
+## CI
+
+The project includes Gitea Actions CI that runs on every push and pull request:
+- Build verification (release and debug)
+- Full test suite (569 tests)
+- Valgrind memory leak detection
+- Code coverage generation
+
+## Features
+
+- Full regex syntax support: literals, metacharacters, quantifiers, character classes, groups, alternation, anchors
+- NFA-based matching engine with Thompson construction
+- Capturing groups with match position tracking
+- Interactive REPL for testing patterns
+- Zero external dependencies
+- Comprehensive test suite with 569 tests
+- Memory-safe implementation verified with Valgrind
+
+## Building
+
+```sh
+make            # optimized release build
+make debug      # debug build with symbols
+make test       # run all tests
+make coverage   # generate coverage report
+make profile    # generate profiling report
+make valgrind   # run under valgrind
+```
+
+## Usage
+
+### Command Line
+
+```sh
+./loreg "pattern" "text"           # search for pattern in text
+./loreg -m "pattern" "text"        # full match mode
+./loreg -i                         # start REPL
+./loreg                            # start REPL (default)
+```
+
+### REPL Commands
+
+```
+:p <pattern>  compile and set pattern
+:m <text>     match text (anchored)
+:s <text>     search for pattern in text
+<text>        search (default)
+:h            help
+:q            quit
+```
+
+### C API
+
+```c
+#include "loreg.h"
+
+loreg_error_t err;
+loreg_regex_t *re = loreg_compile("\\d{3}-\\d{4}", &err);
+if (!re) {
+    fprintf(stderr, "error: %s\n", loreg_error_string(err));
+    return 1;
+}
+
+loreg_match_t result;
+if (loreg_search(re, "call 555-1234 now", &result)) {
+    printf("match at [%zu-%zu]\n", result.match_start, result.match_end);
+}
+
+loreg_free(re);
+```
+
+## Supported Syntax
+
+| Pattern | Description |
+|---------|-------------|
+| `.` | any character except newline |
+| `*` | zero or more |
+| `+` | one or more |
+| `?` | zero or one |
+| `\|` | alternation |
+| `()` | grouping and capture |
+| `[]` | character class |
+| `[^]` | negated character class |
+| `[a-z]` | character range |
+| `^` | start anchor |
+| `$` | end anchor |
+| `{n}` | exactly n |
+| `{n,}` | n or more |
+| `{n,m}` | n to m |
+| `\d` | digit [0-9] |
+| `\w` | word [a-zA-Z0-9_] |
+| `\s` | whitespace |
+| `\D` | non-digit |
+| `\W` | non-word |
+| `\S` | non-whitespace |
+| `*?` `+?` `??` | non-greedy quantifiers |
+
+## Architecture
+
+```
+src/
+├── lexer.c     tokenizer for regex patterns
+├── parser.c    recursive descent parser producing AST
+├── ast.c       abstract syntax tree node types
+├── nfa.c       Thompson NFA construction
+├── matcher.c   NFA simulation with epsilon closure
+├── loreg.c     public API
+├── repl.c      interactive REPL
+└── main.c      CLI entry point
+
+include/
+├── loreg.h     public header
+├── lexer.h     lexer interface
+├── parser.h    parser interface
+├── ast.h       AST types
+├── nfa.h       NFA types
+├── matcher.h   matcher interface
+└── repl.h      REPL interface
+
+tests/
+├── test_lexer.c       lexer unit tests (10 tests)
+├── test_parser.c      parser unit tests (20 tests)
+├── test_nfa.c         NFA construction tests (14 tests)
+├── test_matcher.c     matching tests (27 tests)
+├── test_all.c         comprehensive tests (9 tests)
+└── test_integration.c integration tests (489 tests)
+```
+
+## Test Suite
+
+The test suite contains 569 tests covering:
+
+| Category | Description |
+|----------|-------------|
+| Lexer | Tokenization of patterns |
+| Parser | AST construction and error handling |
+| NFA | State machine construction |
+| Matcher | Pattern matching correctness |
+| Integration | Real-world regex patterns |
+
+Integration tests cover:
+- Literal matching and concatenation
+- Dot metacharacter and wildcards
+- Start/end anchors
+- All quantifiers (*, +, ?, {n,m})
+- Alternation and grouping
+- Character classes and ranges
+- Negated character classes
+- Escape sequences
+- Email, IP, URL, phone patterns
+- Greedy vs non-greedy matching
+- Nested groups and complex nesting
+- Edge cases and boundary conditions
+- Pathological/stress patterns
+
+Run tests with Valgrind verification:
+```sh
+make test           # run all 569 tests
+make valgrind       # verify zero memory leaks
+```
+
+## Algorithm
+
+The implementation uses Thompson's construction to convert regex patterns to NFAs:
+
+1. **Lexer**: Tokenizes the pattern into a stream of tokens
+2. **Parser**: Builds an AST using recursive descent parsing
+3. **NFA Construction**: Converts AST to NFA using Thompson's algorithm
+4. **Matching**: Simulates NFA with epsilon closure for linear-time matching
+
+Time complexity: O(n*m) where n is pattern length and m is text length.
+
+## License
+
+MIT
--- a/include/ast.h
+++ b/include/ast.h
@ -0,0 +1,80 @@
+/* retoor <retoor@molodetz.nl> */
+#ifndef LOREG_AST_H
+#define LOREG_AST_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+typedef enum {
+    AST_CHAR,
+    AST_DOT,
+    AST_CONCAT,
+    AST_ALTER,
+    AST_STAR,
+    AST_PLUS,
+    AST_QUESTION,
+    AST_GROUP,
+    AST_ANCHOR_START,
+    AST_ANCHOR_END,
+    AST_BRACKET,
+    AST_QUANTIFIER,
+    AST_CLASS_DIGIT,
+    AST_CLASS_WORD,
+    AST_CLASS_SPACE,
+    AST_CLASS_NDIGIT,
+    AST_CLASS_NWORD,
+    AST_CLASS_NSPACE
+} ast_type_t;
+
+typedef struct {
+    char start;
+    char end;
+} char_range_t;
+
+typedef struct {
+    char_range_t *ranges;
+    size_t count;
+    size_t capacity;
+    bool negated;
+} bracket_class_t;
+
+typedef struct {
+    int min;
+    int max;
+    bool greedy;
+} quantifier_t;
+
+typedef struct ast_node ast_node_t;
+
+struct ast_node {
+    ast_type_t type;
+    char value;
+    ast_node_t *left;
+    ast_node_t *right;
+    int group_id;
+    bracket_class_t *bracket;
+    quantifier_t quant;
+};
+
+ast_node_t *ast_create_char(char c);
+ast_node_t *ast_create_dot(void);
+ast_node_t *ast_create_concat(ast_node_t *left, ast_node_t *right);
+ast_node_t *ast_create_alter(ast_node_t *left, ast_node_t *right);
+ast_node_t *ast_create_star(ast_node_t *child, bool greedy);
+ast_node_t *ast_create_plus(ast_node_t *child, bool greedy);
+ast_node_t *ast_create_question(ast_node_t *child, bool greedy);
+ast_node_t *ast_create_group(ast_node_t *child, int group_id);
+ast_node_t *ast_create_anchor_start(void);
+ast_node_t *ast_create_anchor_end(void);
+ast_node_t *ast_create_bracket(bracket_class_t *bracket);
+ast_node_t *ast_create_quantifier(ast_node_t *child, int min, int max, bool greedy);
+ast_node_t *ast_create_class(ast_type_t type);
+void ast_free(ast_node_t *node);
+
+bracket_class_t *bracket_create(void);
+void bracket_add_char(bracket_class_t *bracket, char c);
+void bracket_add_range(bracket_class_t *bracket, char start, char end);
+void bracket_free(bracket_class_t *bracket);
+bool bracket_matches(bracket_class_t *bracket, char c);
+
+#endif
--- a/include/lexer.h
+++ b/include/lexer.h
@ -0,0 +1,52 @@
+/* retoor <retoor@molodetz.nl> */
+#ifndef LOREG_LEXER_H
+#define LOREG_LEXER_H
+
+#include <stddef.h>
+#include <stdbool.h>
+
+typedef enum {
+    TOKEN_CHAR,
+    TOKEN_DOT,
+    TOKEN_STAR,
+    TOKEN_PLUS,
+    TOKEN_QUESTION,
+    TOKEN_PIPE,
+    TOKEN_LPAREN,
+    TOKEN_RPAREN,
+    TOKEN_LBRACKET,
+    TOKEN_RBRACKET,
+    TOKEN_CARET,
+    TOKEN_DOLLAR,
+    TOKEN_LBRACE,
+    TOKEN_RBRACE,
+    TOKEN_BACKSLASH,
+    TOKEN_DASH,
+    TOKEN_CLASS_DIGIT,
+    TOKEN_CLASS_WORD,
+    TOKEN_CLASS_SPACE,
+    TOKEN_CLASS_NDIGIT,
+    TOKEN_CLASS_NWORD,
+    TOKEN_CLASS_NSPACE,
+    TOKEN_EOF
+} token_type_t;
+
+typedef struct {
+    token_type_t type;
+    char value;
+    size_t position;
+} token_t;
+
+typedef struct {
+    const char *pattern;
+    size_t length;
+    size_t position;
+    bool in_bracket;
+} lexer_t;
+
+void lexer_init(lexer_t *lexer, const char *pattern);
+token_t lexer_next(lexer_t *lexer);
+token_t lexer_peek(lexer_t *lexer);
+bool lexer_eof(lexer_t *lexer);
+
+#endif
--- a/include/loreg.h
+++ b/include/loreg.h
@ -0,0 +1,45 @@
+/* retoor <retoor@molodetz.nl> */
+#ifndef LOREG_H
+#define LOREG_H
+
+#include <stddef.h>
+#include <stdbool.h>
+
+#define LOREG_VERSION "1.0.0"
+#define LOREG_MAX_STATES 4096
+#define LOREG_MAX_GROUPS 32
+
+typedef enum {
+    LOREG_OK = 0,
+    LOREG_ERR_INVALID_PATTERN,
+    LOREG_ERR_UNBALANCED_PAREN,
+    LOREG_ERR_EMPTY_GROUP,
+    LOREG_ERR_INVALID_QUANTIFIER,
+    LOREG_ERR_INVALID_ESCAPE,
+    LOREG_ERR_OUT_OF_MEMORY,
+    LOREG_ERR_STATE_OVERFLOW
+} loreg_error_t;
+
+typedef struct {
+    size_t start;
+    size_t end;
+    bool matched;
+} loreg_group_t;
+
+typedef struct {
+    bool matched;
+    size_t match_start;
+    size_t match_end;
+    loreg_group_t groups[LOREG_MAX_GROUPS];
+    size_t group_count;
+} loreg_match_t;
+
+typedef struct loreg_regex loreg_regex_t;
+
+loreg_regex_t *loreg_compile(const char *pattern, loreg_error_t *error);
+void loreg_free(loreg_regex_t *regex);
+bool loreg_match(loreg_regex_t *regex, const char *text, loreg_match_t *result);
+bool loreg_search(loreg_regex_t *regex, const char *text, loreg_match_t *result);
+const char *loreg_error_string(loreg_error_t error);
+
+#endif
--- a/include/matcher.h
+++ b/include/matcher.h
@ -0,0 +1,26 @@
+/* retoor <retoor@molodetz.nl> */
+#ifndef LOREG_MATCHER_H
+#define LOREG_MATCHER_H
+
+#include "nfa.h"
+#include "loreg.h"
+
+typedef struct {
+    nfa_state_t **states;
+    size_t count;
+    size_t capacity;
+    size_t *group_starts;
+    size_t *group_ends;
+    int group_count;
+} state_set_t;
+
+state_set_t *state_set_create(size_t initial_capacity, int group_count);
+void state_set_free(state_set_t *set);
+void state_set_clear(state_set_t *set);
+void state_set_add(state_set_t *set, nfa_state_t *state);
+bool state_set_contains(state_set_t *set, nfa_state_t *state);
+
+bool nfa_match(nfa_t *nfa, const char *text, size_t start_pos, loreg_match_t *result);
+bool nfa_search(nfa_t *nfa, const char *text, loreg_match_t *result);
+
+#endif
--- a/include/nfa.h
+++ b/include/nfa.h
@ -0,0 +1,69 @@
+/* retoor <retoor@molodetz.nl> */
+#ifndef LOREG_NFA_H
+#define LOREG_NFA_H
+
+#include "ast.h"
+#include "loreg.h"
+#include <stdbool.h>
+#include <stddef.h>
+
+#define EPSILON '\0'
+#define NFA_MAX_TRANSITIONS 256
+
+typedef struct nfa_state nfa_state_t;
+
+typedef enum {
+    TRANS_CHAR,
+    TRANS_EPSILON,
+    TRANS_DOT,
+    TRANS_BRACKET,
+    TRANS_CLASS_DIGIT,
+    TRANS_CLASS_WORD,
+    TRANS_CLASS_SPACE,
+    TRANS_CLASS_NDIGIT,
+    TRANS_CLASS_NWORD,
+    TRANS_CLASS_NSPACE,
+    TRANS_GROUP_START,
+    TRANS_GROUP_END,
+    TRANS_ANCHOR_START,
+    TRANS_ANCHOR_END
+} transition_type_t;
+
+typedef struct {
+    transition_type_t type;
+    char value;
+    nfa_state_t *target;
+    bracket_class_t *bracket;
+    int group_id;
+} transition_t;
+
+struct nfa_state {
+    int id;
+    bool accepting;
+    transition_t *transitions;
+    size_t trans_count;
+    size_t trans_capacity;
+};
+
+typedef struct {
+    nfa_state_t *start;
+    nfa_state_t *accept;
+} nfa_fragment_t;
+
+typedef struct {
+    nfa_state_t **states;
+    size_t state_count;
+    size_t capacity;
+    nfa_state_t *start;
+    int group_count;
+} nfa_t;
+
+nfa_t *nfa_create(void);
+void nfa_free(nfa_t *nfa);
+nfa_state_t *nfa_add_state(nfa_t *nfa);
+void nfa_add_transition(nfa_state_t *from, nfa_state_t *to, transition_type_t type, char value);
+void nfa_add_bracket_transition(nfa_state_t *from, nfa_state_t *to, bracket_class_t *bracket);
+void nfa_add_group_transition(nfa_state_t *from, nfa_state_t *to, transition_type_t type, int group_id);
+nfa_t *nfa_from_ast(ast_node_t *ast, loreg_error_t *error);
+
+#endif
--- a/include/parser.h
+++ b/include/parser.h
@ -0,0 +1,20 @@
+/* retoor <retoor@molodetz.nl> */
+#ifndef LOREG_PARSER_H
+#define LOREG_PARSER_H
+
+#include "ast.h"
+#include "lexer.h"
+#include "loreg.h"
+
+typedef struct {
+    lexer_t lexer;
+    token_t current;
+    loreg_error_t error;
+    int group_count;
+} parser_t;
+
+void parser_init(parser_t *parser, const char *pattern);
+ast_node_t *parser_parse(parser_t *parser);
+loreg_error_t parser_get_error(parser_t *parser);
+
+#endif
--- a/include/repl.h
+++ b/include/repl.h
@ -0,0 +1,7 @@
+/* retoor <retoor@molodetz.nl> */
+#ifndef LOREG_REPL_H
+#define LOREG_REPL_H
+
+void repl_run(void);
+
+#endif
--- a/src/ast.c
+++ b/src/ast.c
@ -0,0 +1,169 @@
+/* retoor <retoor@molodetz.nl> */
+#include "ast.h"
+#include <stdlib.h>
+#include <ctype.h>
+
+static ast_node_t *ast_create_node(ast_type_t type) {
+    ast_node_t *node = malloc(sizeof(ast_node_t));
+    if (!node) return NULL;
+    node->type = type;
+    node->value = '\0';
+    node->left = NULL;
+    node->right = NULL;
+    node->group_id = -1;
+    node->bracket = NULL;
+    node->quant.min = 0;
+    node->quant.max = -1;
+    node->quant.greedy = true;
+    return node;
+}
+
+ast_node_t *ast_create_char(char c) {
+    ast_node_t *node = ast_create_node(AST_CHAR);
+    if (node) node->value = c;
+    return node;
+}
+
+ast_node_t *ast_create_dot(void) {
+    return ast_create_node(AST_DOT);
+}
+
+ast_node_t *ast_create_concat(ast_node_t *left, ast_node_t *right) {
+    ast_node_t *node = ast_create_node(AST_CONCAT);
+    if (node) {
+        node->left = left;
+        node->right = right;
+    }
+    return node;
+}
+
+ast_node_t *ast_create_alter(ast_node_t *left, ast_node_t *right) {
+    ast_node_t *node = ast_create_node(AST_ALTER);
+    if (node) {
+        node->left = left;
+        node->right = right;
+    }
+    return node;
+}
+
+ast_node_t *ast_create_star(ast_node_t *child, bool greedy) {
+    ast_node_t *node = ast_create_node(AST_STAR);
+    if (node) {
+        node->left = child;
+        node->quant.greedy = greedy;
+    }
+    return node;
+}
+
+ast_node_t *ast_create_plus(ast_node_t *child, bool greedy) {
+    ast_node_t *node = ast_create_node(AST_PLUS);
+    if (node) {
+        node->left = child;
+        node->quant.greedy = greedy;
+    }
+    return node;
+}
+
+ast_node_t *ast_create_question(ast_node_t *child, bool greedy) {
+    ast_node_t *node = ast_create_node(AST_QUESTION);
+    if (node) {
+        node->left = child;
+        node->quant.greedy = greedy;
+    }
+    return node;
+}
+
+ast_node_t *ast_create_group(ast_node_t *child, int group_id) {
+    ast_node_t *node = ast_create_node(AST_GROUP);
+    if (node) {
+        node->left = child;
+        node->group_id = group_id;
+    }
+    return node;
+}
+
+ast_node_t *ast_create_anchor_start(void) {
+    return ast_create_node(AST_ANCHOR_START);
+}
+
+ast_node_t *ast_create_anchor_end(void) {
+    return ast_create_node(AST_ANCHOR_END);
+}
+
+ast_node_t *ast_create_bracket(bracket_class_t *bracket) {
+    ast_node_t *node = ast_create_node(AST_BRACKET);
+    if (node) node->bracket = bracket;
+    return node;
+}
+
+ast_node_t *ast_create_quantifier(ast_node_t *child, int min, int max, bool greedy) {
+    ast_node_t *node = ast_create_node(AST_QUANTIFIER);
+    if (node) {
+        node->left = child;
+        node->quant.min = min;
+        node->quant.max = max;
+        node->quant.greedy = greedy;
+    }
+    return node;
+}
+
+ast_node_t *ast_create_class(ast_type_t type) {
+    return ast_create_node(type);
+}
+
+void ast_free(ast_node_t *node) {
+    if (!node) return;
+    ast_free(node->left);
+    ast_free(node->right);
+    if (node->bracket) bracket_free(node->bracket);
+    free(node);
+}
+
+bracket_class_t *bracket_create(void) {
+    bracket_class_t *bracket = malloc(sizeof(bracket_class_t));
+    if (!bracket) return NULL;
+    bracket->ranges = NULL;
+    bracket->count = 0;
+    bracket->capacity = 0;
+    bracket->negated = false;
+    return bracket;
+}
+
+static bool bracket_grow(bracket_class_t *bracket) {
+    size_t new_cap = bracket->capacity == 0 ? 8 : bracket->capacity * 2;
+    char_range_t *new_ranges = realloc(bracket->ranges, new_cap * sizeof(char_range_t));
+    if (!new_ranges) return false;
+    bracket->ranges = new_ranges;
+    bracket->capacity = new_cap;
+    return true;
+}
+
+void bracket_add_char(bracket_class_t *bracket, char c) {
+    bracket_add_range(bracket, c, c);
+}
+
+void bracket_add_range(bracket_class_t *bracket, char start, char end) {
+    if (bracket->count >= bracket->capacity) {
+        if (!bracket_grow(bracket)) return;
+    }
+    bracket->ranges[bracket->count].start = start;
+    bracket->ranges[bracket->count].end = end;
+    bracket->count++;
+}
+
+void bracket_free(bracket_class_t *bracket) {
+    if (!bracket) return;
+    free(bracket->ranges);
+    free(bracket);
+}
+
+bool bracket_matches(bracket_class_t *bracket, char c) {
+    bool found = false;
+    for (size_t i = 0; i < bracket->count; i++) {
+        if (c >= bracket->ranges[i].start && c <= bracket->ranges[i].end) {
+            found = true;
+            break;
+        }
+    }
+    return bracket->negated ? !found : found;
+}
--- a/src/lexer.c
+++ b/src/lexer.c
@ -0,0 +1,125 @@
+/* retoor <retoor@molodetz.nl> */
+#include "lexer.h"
+#include <string.h>
+
+void lexer_init(lexer_t *lexer, const char *pattern) {
+    lexer->pattern = pattern;
+    lexer->length = strlen(pattern);
+    lexer->position = 0;
+    lexer->in_bracket = false;
+}
+
+static token_t make_token(token_type_t type, char value, size_t pos) {
+    token_t token;
+    token.type = type;
+    token.value = value;
+    token.position = pos;
+    return token;
+}
+
+token_t lexer_next(lexer_t *lexer) {
+    if (lexer->position >= lexer->length) {
+        return make_token(TOKEN_EOF, '\0', lexer->position);
+    }
+
+    char c = lexer->pattern[lexer->position];
+    size_t pos = lexer->position;
+    lexer->position++;
+
+    if (c == '[' && !lexer->in_bracket) {
+        lexer->in_bracket = true;
+        return make_token(TOKEN_LBRACKET, c, pos);
+    }
+
+    if (c == ']' && lexer->in_bracket) {
+        lexer->in_bracket = false;
+        return make_token(TOKEN_RBRACKET, c, pos);
+    }
+
+    if (lexer->in_bracket) {
+        if (c == '-') {
+            return make_token(TOKEN_DASH, c, pos);
+        }
+        if (c == '^' && pos > 0 && lexer->pattern[pos - 1] == '[') {
+            return make_token(TOKEN_CARET, c, pos);
+        }
+        if (c == '\\' && lexer->position < lexer->length) {
+            char next = lexer->pattern[lexer->position];
+            lexer->position++;
+            switch (next) {
+                case 'd': return make_token(TOKEN_CLASS_DIGIT, 'd', pos);
+                case 'w': return make_token(TOKEN_CLASS_WORD, 'w', pos);
+                case 's': return make_token(TOKEN_CLASS_SPACE, 's', pos);
+                case 'D': return make_token(TOKEN_CLASS_NDIGIT, 'D', pos);
+                case 'W': return make_token(TOKEN_CLASS_NWORD, 'W', pos);
+                case 'S': return make_token(TOKEN_CLASS_NSPACE, 'S', pos);
+                case 'n': return make_token(TOKEN_CHAR, '\n', pos);
+                case 't': return make_token(TOKEN_CHAR, '\t', pos);
+                case 'r': return make_token(TOKEN_CHAR, '\r', pos);
+                default: return make_token(TOKEN_CHAR, next, pos);
+            }
+        }
+        return make_token(TOKEN_CHAR, c, pos);
+    }
+
+    if (c == '\\' && lexer->position < lexer->length) {
+        char next = lexer->pattern[lexer->position];
+        lexer->position++;
+        switch (next) {
+            case 'd': return make_token(TOKEN_CLASS_DIGIT, 'd', pos);
+            case 'w': return make_token(TOKEN_CLASS_WORD, 'w', pos);
+            case 's': return make_token(TOKEN_CLASS_SPACE, 's', pos);
+            case 'D': return make_token(TOKEN_CLASS_NDIGIT, 'D', pos);
+            case 'W': return make_token(TOKEN_CLASS_NWORD, 'W', pos);
+            case 'S': return make_token(TOKEN_CLASS_NSPACE, 'S', pos);
+            case 'n': return make_token(TOKEN_CHAR, '\n', pos);
+            case 't': return make_token(TOKEN_CHAR, '\t', pos);
+            case 'r': return make_token(TOKEN_CHAR, '\r', pos);
+            case '.':
+            case '*':
+            case '+':
+            case '?':
+            case '|':
+            case '(':
+            case ')':
+            case '[':
+            case ']':
+            case '{':
+            case '}':
+            case '^':
+            case '$':
+            case '\\':
+                return make_token(TOKEN_CHAR, next, pos);
+            default:
+                return make_token(TOKEN_CHAR, next, pos);
+        }
+    }
+
+    switch (c) {
+        case '.': return make_token(TOKEN_DOT, c, pos);
+        case '*': return make_token(TOKEN_STAR, c, pos);
+        case '+': return make_token(TOKEN_PLUS, c, pos);
+        case '?': return make_token(TOKEN_QUESTION, c, pos);
+        case '|': return make_token(TOKEN_PIPE, c, pos);
+        case '(': return make_token(TOKEN_LPAREN, c, pos);
+        case ')': return make_token(TOKEN_RPAREN, c, pos);
+        case '^': return make_token(TOKEN_CARET, c, pos);
+        case '$': return make_token(TOKEN_DOLLAR, c, pos);
+        case '{': return make_token(TOKEN_LBRACE, c, pos);
+        case '}': return make_token(TOKEN_RBRACE, c, pos);
+        default: return make_token(TOKEN_CHAR, c, pos);
+    }
+}
+
+token_t lexer_peek(lexer_t *lexer) {
+    size_t saved_pos = lexer->position;
+    bool saved_bracket = lexer->in_bracket;
+    token_t token = lexer_next(lexer);
+    lexer->position = saved_pos;
+    lexer->in_bracket = saved_bracket;
+    return token;
+}
+
+bool lexer_eof(lexer_t *lexer) {
+    return lexer->position >= lexer->length;
+}
--- a/src/loreg.c
+++ b/src/loreg.c
@ -0,0 +1,71 @@
+/* retoor <retoor@molodetz.nl> */
+#include "loreg.h"
+#include "parser.h"
+#include "nfa.h"
+#include "matcher.h"
+#include <stdlib.h>
+
+struct loreg_regex {
+    nfa_t *nfa;
+    ast_node_t *ast;
+};
+
+loreg_regex_t *loreg_compile(const char *pattern, loreg_error_t *error) {
+    *error = LOREG_OK;
+
+    loreg_regex_t *regex = malloc(sizeof(loreg_regex_t));
+    if (!regex) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return NULL;
+    }
+
+    parser_t parser;
+    parser_init(&parser, pattern);
+
+    regex->ast = parser_parse(&parser);
+    *error = parser_get_error(&parser);
+
+    if (*error != LOREG_OK) {
+        ast_free(regex->ast);
+        free(regex);
+        return NULL;
+    }
+
+    regex->nfa = nfa_from_ast(regex->ast, error);
+    if (*error != LOREG_OK) {
+        ast_free(regex->ast);
+        free(regex);
+        return NULL;
+    }
+
+    return regex;
+}
+
+void loreg_free(loreg_regex_t *regex) {
+    if (!regex) return;
+    nfa_free(regex->nfa);
+    ast_free(regex->ast);
+    free(regex);
+}
+
+bool loreg_match(loreg_regex_t *regex, const char *text, loreg_match_t *result) {
+    return nfa_match(regex->nfa, text, 0, result);
+}
+
+bool loreg_search(loreg_regex_t *regex, const char *text, loreg_match_t *result) {
+    return nfa_search(regex->nfa, text, result);
+}
+
+const char *loreg_error_string(loreg_error_t error) {
+    switch (error) {
+        case LOREG_OK: return "success";
+        case LOREG_ERR_INVALID_PATTERN: return "invalid pattern";
+        case LOREG_ERR_UNBALANCED_PAREN: return "unbalanced parentheses";
+        case LOREG_ERR_EMPTY_GROUP: return "empty group";
+        case LOREG_ERR_INVALID_QUANTIFIER: return "invalid quantifier";
+        case LOREG_ERR_INVALID_ESCAPE: return "invalid escape sequence";
+        case LOREG_ERR_OUT_OF_MEMORY: return "out of memory";
+        case LOREG_ERR_STATE_OVERFLOW: return "state overflow";
+        default: return "unknown error";
+    }
+}
--- a/src/main.c
+++ b/src/main.c
@ -0,0 +1,107 @@
+/* retoor <retoor@molodetz.nl> */
+#include "loreg.h"
+#include "repl.h"
+#include <stdio.h>
+#include <string.h>
+
+static void print_usage(const char *program) {
+    printf("usage: %s [options] [pattern] [text]\n", program);
+    printf("options:\n");
+    printf("  -h, --help     show this help\n");
+    printf("  -v, --version  show version\n");
+    printf("  -m, --match    full match mode (default is search)\n");
+    printf("  -i             start interactive REPL\n");
+    printf("\n");
+    printf("examples:\n");
+    printf("  %s                    start REPL\n", program);
+    printf("  %s -i                 start REPL\n", program);
+    printf("  %s \"a+b\" \"aaab\"       search pattern in text\n", program);
+    printf("  %s -m \"a+b\" \"aaab\"    match pattern against text\n", program);
+}
+
+static void print_version(void) {
+    printf("loreg %s\n", LOREG_VERSION);
+}
+
+static void print_match(const char *text, loreg_match_t *result) {
+    if (!result->matched) {
+        printf("no match\n");
+        return;
+    }
+
+    printf("match: \"");
+    for (size_t i = result->match_start; i < result->match_end; i++) {
+        printf("%c", text[i]);
+    }
+    printf("\" [%zu-%zu]\n", result->match_start, result->match_end);
+
+    for (size_t i = 0; i < result->group_count; i++) {
+        if (result->groups[i].matched) {
+            printf("  group %zu: \"", i);
+            for (size_t j = result->groups[i].start; j < result->groups[i].end; j++) {
+                printf("%c", text[j]);
+            }
+            printf("\" [%zu-%zu]\n", result->groups[i].start, result->groups[i].end);
+        }
+    }
+}
+
+int main(int argc, char *argv[]) {
+    if (argc == 1) {
+        repl_run();
+        return 0;
+    }
+
+    bool match_mode = false;
+    int arg_idx = 1;
+
+    while (arg_idx < argc && argv[arg_idx][0] == '-') {
+        if (strcmp(argv[arg_idx], "-h") == 0 || strcmp(argv[arg_idx], "--help") == 0) {
+            print_usage(argv[0]);
+            return 0;
+        }
+        if (strcmp(argv[arg_idx], "-v") == 0 || strcmp(argv[arg_idx], "--version") == 0) {
+            print_version();
+            return 0;
+        }
+        if (strcmp(argv[arg_idx], "-m") == 0 || strcmp(argv[arg_idx], "--match") == 0) {
+            match_mode = true;
+            arg_idx++;
+            continue;
+        }
+        if (strcmp(argv[arg_idx], "-i") == 0) {
+            repl_run();
+            return 0;
+        }
+        fprintf(stderr, "unknown option: %s\n", argv[arg_idx]);
+        return 1;
+    }
+
+    if (argc - arg_idx < 2) {
+        fprintf(stderr, "error: pattern and text required\n");
+        print_usage(argv[0]);
+        return 1;
+    }
+
+    const char *pattern = argv[arg_idx];
+    const char *text = argv[arg_idx + 1];
+
+    loreg_error_t error;
+    loreg_regex_t *regex = loreg_compile(pattern, &error);
+    if (!regex) {
+        fprintf(stderr, "error: %s\n", loreg_error_string(error));
+        return 1;
+    }
+
+    loreg_match_t result;
+    if (match_mode) {
+        loreg_match(regex, text, &result);
+    } else {
+        loreg_search(regex, text, &result);
+    }
+
+    print_match(text, &result);
+
+    loreg_free(regex);
+    return result.matched ? 0 : 1;
+}
--- a/src/matcher.c
+++ b/src/matcher.c
@ -0,0 +1,411 @@
+/* retoor <retoor@molodetz.nl> */
+#include "matcher.h"
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+state_set_t *state_set_create(size_t initial_capacity, int group_count) {
+    state_set_t *set = malloc(sizeof(state_set_t));
+    if (!set) return NULL;
+
+    set->states = calloc(initial_capacity, sizeof(nfa_state_t *));
+    if (!set->states) {
+        free(set);
+        return NULL;
+    }
+
+    set->count = 0;
+    set->capacity = initial_capacity;
+    set->group_count = group_count;
+
+    if (group_count > 0) {
+        set->group_starts = calloc(group_count, sizeof(size_t));
+        set->group_ends = calloc(group_count, sizeof(size_t));
+        if (!set->group_starts || !set->group_ends) {
+            free(set->group_starts);
+            free(set->group_ends);
+            free(set->states);
+            free(set);
+            return NULL;
+        }
+        for (int i = 0; i < group_count; i++) {
+            set->group_starts[i] = (size_t)-1;
+            set->group_ends[i] = (size_t)-1;
+        }
+    } else {
+        set->group_starts = NULL;
+        set->group_ends = NULL;
+    }
+
+    return set;
+}
+
+void state_set_free(state_set_t *set) {
+    if (!set) return;
+    free(set->states);
+    free(set->group_starts);
+    free(set->group_ends);
+    free(set);
+}
+
+void state_set_clear(state_set_t *set) {
+    memset(set->states, 0, set->capacity * sizeof(nfa_state_t *));
+    set->count = 0;
+}
+
+static bool state_set_grow(state_set_t *set) {
+    size_t new_cap = set->capacity * 2;
+    nfa_state_t **new_states = realloc(set->states, new_cap * sizeof(nfa_state_t *));
+    if (!new_states) return false;
+    memset(new_states + set->capacity, 0, set->capacity * sizeof(nfa_state_t *));
+    set->states = new_states;
+    set->capacity = new_cap;
+    return true;
+}
+
+void state_set_add(state_set_t *set, nfa_state_t *state) {
+    if (state_set_contains(set, state)) return;
+    if (set->count >= set->capacity) {
+        if (!state_set_grow(set)) return;
+    }
+    set->states[set->count++] = state;
+}
+
+bool state_set_contains(state_set_t *set, nfa_state_t *state) {
+    for (size_t i = 0; i < set->count; i++) {
+        if (set->states[i] == state) return true;
+    }
+    return false;
+}
+
+static bool is_digit(char c) {
+    return c >= '0' && c <= '9';
+}
+
+static bool is_word(char c) {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+           (c >= '0' && c <= '9') || c == '_';
+}
+
+static bool is_space(char c) {
+    return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
+}
+
+static bool transition_matches(transition_t *t, char c, size_t pos, size_t len) {
+    switch (t->type) {
+        case TRANS_CHAR:
+            return t->value == c;
+        case TRANS_DOT:
+            return c != '\n' && c != '\0';
+        case TRANS_BRACKET:
+            return bracket_matches(t->bracket, c);
+        case TRANS_CLASS_DIGIT:
+            return is_digit(c);
+        case TRANS_CLASS_WORD:
+            return is_word(c);
+        case TRANS_CLASS_SPACE:
+            return is_space(c);
+        case TRANS_CLASS_NDIGIT:
+            return !is_digit(c) && c != '\0';
+        case TRANS_CLASS_NWORD:
+            return !is_word(c) && c != '\0';
+        case TRANS_CLASS_NSPACE:
+            return !is_space(c) && c != '\0';
+        case TRANS_ANCHOR_START:
+            return pos == 0;
+        case TRANS_ANCHOR_END:
+            return pos == len;
+        default:
+            return false;
+    }
+}
+
+typedef struct {
+    nfa_state_t *state;
+    size_t *group_starts;
+    size_t *group_ends;
+} thread_t;
+
+typedef struct {
+    thread_t *threads;
+    size_t count;
+    size_t capacity;
+    int group_count;
+} thread_list_t;
+
+static thread_list_t *thread_list_create(size_t capacity, int group_count) {
+    thread_list_t *list = malloc(sizeof(thread_list_t));
+    if (!list) return NULL;
+
+    list->threads = malloc(capacity * sizeof(thread_t));
+    if (!list->threads) {
+        free(list);
+        return NULL;
+    }
+
+    for (size_t i = 0; i < capacity; i++) {
+        if (group_count > 0) {
+            list->threads[i].group_starts = malloc(group_count * sizeof(size_t));
+            list->threads[i].group_ends = malloc(group_count * sizeof(size_t));
+            if (!list->threads[i].group_starts || !list->threads[i].group_ends) {
+                for (size_t j = 0; j <= i; j++) {
+                    free(list->threads[j].group_starts);
+                    free(list->threads[j].group_ends);
+                }
+                free(list->threads);
+                free(list);
+                return NULL;
+            }
+        } else {
+            list->threads[i].group_starts = NULL;
+            list->threads[i].group_ends = NULL;
+        }
+    }
+
+    list->count = 0;
+    list->capacity = capacity;
+    list->group_count = group_count;
+    return list;
+}
+
+static void thread_list_free(thread_list_t *list) {
+    if (!list) return;
+    for (size_t i = 0; i < list->capacity; i++) {
+        free(list->threads[i].group_starts);
+        free(list->threads[i].group_ends);
+    }
+    free(list->threads);
+    free(list);
+}
+
+static void thread_list_clear(thread_list_t *list) {
+    list->count = 0;
+}
+
+static bool thread_list_contains_state(thread_list_t *list, nfa_state_t *state) {
+    for (size_t i = 0; i < list->count; i++) {
+        if (list->threads[i].state == state) return true;
+    }
+    return false;
+}
+
+static void add_thread(thread_list_t *list, nfa_state_t *state,
+                       size_t *group_starts, size_t *group_ends);
+
+static void follow_epsilons(thread_list_t *list, nfa_state_t *state,
+                           size_t *group_starts, size_t *group_ends,
+                           size_t pos, size_t len, bool *visited) {
+    if (!state || visited[state->id]) return;
+    visited[state->id] = true;
+
+    for (size_t i = 0; i < state->trans_count; i++) {
+        transition_t *t = &state->transitions[i];
+
+        if (t->type == TRANS_EPSILON) {
+            follow_epsilons(list, t->target, group_starts, group_ends,
+                          pos, len, visited);
+        } else if (t->type == TRANS_GROUP_START) {
+            size_t *new_starts = malloc(list->group_count * sizeof(size_t));
+            size_t *new_ends = malloc(list->group_count * sizeof(size_t));
+            if (new_starts && new_ends) {
+                memcpy(new_starts, group_starts, list->group_count * sizeof(size_t));
+                memcpy(new_ends, group_ends, list->group_count * sizeof(size_t));
+                new_starts[t->group_id] = pos;
+                follow_epsilons(list, t->target, new_starts, new_ends,
+                              pos, len, visited);
+            }
+            free(new_starts);
+            free(new_ends);
+        } else if (t->type == TRANS_GROUP_END) {
+            size_t *new_starts = malloc(list->group_count * sizeof(size_t));
+            size_t *new_ends = malloc(list->group_count * sizeof(size_t));
+            if (new_starts && new_ends) {
+                memcpy(new_starts, group_starts, list->group_count * sizeof(size_t));
+                memcpy(new_ends, group_ends, list->group_count * sizeof(size_t));
+                new_ends[t->group_id] = pos;
+                follow_epsilons(list, t->target, new_starts, new_ends,
+                              pos, len, visited);
+            }
+            free(new_starts);
+            free(new_ends);
+        } else if (t->type == TRANS_ANCHOR_START || t->type == TRANS_ANCHOR_END) {
+            if (transition_matches(t, '\0', pos, len)) {
+                follow_epsilons(list, t->target, group_starts, group_ends,
+                              pos, len, visited);
+            }
+        }
+    }
+
+    add_thread(list, state, group_starts, group_ends);
+}
+
+static void add_thread(thread_list_t *list, nfa_state_t *state,
+                      size_t *group_starts, size_t *group_ends) {
+    if (!state) return;
+    if (thread_list_contains_state(list, state)) return;
+
+    if (list->count >= list->capacity) return;
+
+    thread_t *thread = &list->threads[list->count++];
+    thread->state = state;
+    if (list->group_count > 0) {
+        memcpy(thread->group_starts, group_starts, list->group_count * sizeof(size_t));
+        memcpy(thread->group_ends, group_ends, list->group_count * sizeof(size_t));
+    }
+}
+
+bool nfa_match(nfa_t *nfa, const char *text, size_t start_pos, loreg_match_t *result) {
+    size_t len = strlen(text);
+    size_t num_states = nfa->state_count;
+    int group_count = nfa->group_count > 0 ? nfa->group_count : 1;
+
+    thread_list_t *current = thread_list_create(num_states, group_count);
+    thread_list_t *next = thread_list_create(num_states, group_count);
+    bool *visited = calloc(num_states, sizeof(bool));
+
+    if (!current || !next || !visited) {
+        thread_list_free(current);
+        thread_list_free(next);
+        free(visited);
+        return false;
+    }
+
+    size_t *init_starts = calloc(group_count, sizeof(size_t));
+    size_t *init_ends = calloc(group_count, sizeof(size_t));
+    if (!init_starts || !init_ends) {
+        free(init_starts);
+        free(init_ends);
+        thread_list_free(current);
+        thread_list_free(next);
+        free(visited);
+        return false;
+    }
+
+    for (int i = 0; i < group_count; i++) {
+        init_starts[i] = (size_t)-1;
+        init_ends[i] = (size_t)-1;
+    }
+
+    memset(visited, 0, num_states * sizeof(bool));
+    follow_epsilons(current, nfa->start, init_starts, init_ends,
+                   start_pos, len, visited);
+
+    bool matched = false;
+    size_t match_end = start_pos;
+    size_t *best_starts = calloc(group_count, sizeof(size_t));
+    size_t *best_ends = calloc(group_count, sizeof(size_t));
+
+    if (!best_starts || !best_ends) {
+        free(init_starts);
+        free(init_ends);
+        free(best_starts);
+        free(best_ends);
+        thread_list_free(current);
+        thread_list_free(next);
+        free(visited);
+        return false;
+    }
+
+    for (int i = 0; i < group_count; i++) {
+        best_starts[i] = (size_t)-1;
+        best_ends[i] = (size_t)-1;
+    }
+
+    for (size_t i = 0; i < current->count; i++) {
+        if (current->threads[i].state->accepting) {
+            matched = true;
+            match_end = start_pos;
+            memcpy(best_starts, current->threads[i].group_starts, group_count * sizeof(size_t));
+            memcpy(best_ends, current->threads[i].group_ends, group_count * sizeof(size_t));
+            break;
+        }
+    }
+
+    for (size_t pos = start_pos; pos < len; pos++) {
+        char c = text[pos];
+        thread_list_clear(next);
+
+        for (size_t i = 0; i < current->count; i++) {
+            thread_t *thread = &current->threads[i];
+            nfa_state_t *state = thread->state;
+
+            for (size_t j = 0; j < state->trans_count; j++) {
+                transition_t *t = &state->transitions[j];
+
+                if (t->type != TRANS_EPSILON &&
+                    t->type != TRANS_GROUP_START &&
+                    t->type != TRANS_GROUP_END &&
+                    t->type != TRANS_ANCHOR_START &&
+                    t->type != TRANS_ANCHOR_END) {
+
+                    if (transition_matches(t, c, pos, len)) {
+                        memset(visited, 0, num_states * sizeof(bool));
+                        follow_epsilons(next, t->target,
+                                       thread->group_starts, thread->group_ends,
+                                       pos + 1, len, visited);
+                    }
+                }
+            }
+        }
+
+        if (next->count == 0) break;
+
+        thread_list_t *tmp = current;
+        current = next;
+        next = tmp;
+
+        for (size_t i = 0; i < current->count; i++) {
+            if (current->threads[i].state->accepting) {
+                matched = true;
+                match_end = pos + 1;
+                memcpy(best_starts, current->threads[i].group_starts, group_count * sizeof(size_t));
+                memcpy(best_ends, current->threads[i].group_ends, group_count * sizeof(size_t));
+                break;
+            }
+        }
+    }
+
+    if (result) {
+        result->matched = matched;
+        result->match_start = start_pos;
+        result->match_end = matched ? match_end : start_pos;
+        result->group_count = nfa->group_count;
+
+        for (int i = 0; i < LOREG_MAX_GROUPS && i < nfa->group_count; i++) {
+            result->groups[i].start = best_starts[i];
+            result->groups[i].end = best_ends[i];
+            result->groups[i].matched = (best_starts[i] != (size_t)-1 && best_ends[i] != (size_t)-1);
+        }
+    }
+
+    free(init_starts);
+    free(init_ends);
+    free(best_starts);
+    free(best_ends);
+    thread_list_free(current);
+    thread_list_free(next);
+    free(visited);
+
+    return matched;
+}
+
+bool nfa_search(nfa_t *nfa, const char *text, loreg_match_t *result) {
+    size_t len = strlen(text);
+
+    for (size_t i = 0; i <= len; i++) {
+        if (nfa_match(nfa, text, i, result)) {
+            if (result) {
+                result->match_start = i;
+            }
+            return true;
+        }
+    }
+
+    if (result) {
+        result->matched = false;
+        result->match_start = 0;
+        result->match_end = 0;
+        result->group_count = 0;
+    }
+    return false;
+}
--- a/src/nfa.c
+++ b/src/nfa.c
@ -0,0 +1,477 @@
+/* retoor <retoor@molodetz.nl> */
+#include "nfa.h"
+#include <stdlib.h>
+#include <string.h>
+
+nfa_t *nfa_create(void) {
+    nfa_t *nfa = malloc(sizeof(nfa_t));
+    if (!nfa) return NULL;
+    nfa->states = NULL;
+    nfa->state_count = 0;
+    nfa->capacity = 0;
+    nfa->start = NULL;
+    nfa->group_count = 0;
+    return nfa;
+}
+
+void nfa_free(nfa_t *nfa) {
+    if (!nfa) return;
+    for (size_t i = 0; i < nfa->state_count; i++) {
+        free(nfa->states[i]->transitions);
+        free(nfa->states[i]);
+    }
+    free(nfa->states);
+    free(nfa);
+}
+
+static bool nfa_grow(nfa_t *nfa) {
+    size_t new_cap = nfa->capacity == 0 ? 16 : nfa->capacity * 2;
+    if (new_cap > LOREG_MAX_STATES) {
+        if (nfa->capacity >= LOREG_MAX_STATES) return false;
+        new_cap = LOREG_MAX_STATES;
+    }
+    nfa_state_t **new_states = realloc(nfa->states, new_cap * sizeof(nfa_state_t *));
+    if (!new_states) return false;
+    nfa->states = new_states;
+    nfa->capacity = new_cap;
+    return true;
+}
+
+nfa_state_t *nfa_add_state(nfa_t *nfa) {
+    if (nfa->state_count >= nfa->capacity) {
+        if (!nfa_grow(nfa)) return NULL;
+    }
+
+    nfa_state_t *state = malloc(sizeof(nfa_state_t));
+    if (!state) return NULL;
+
+    state->id = (int)nfa->state_count;
+    state->accepting = false;
+    state->transitions = NULL;
+    state->trans_count = 0;
+    state->trans_capacity = 0;
+
+    nfa->states[nfa->state_count++] = state;
+    return state;
+}
+
+static bool transition_grow(nfa_state_t *state) {
+    size_t new_cap = state->trans_capacity == 0 ? 4 : state->trans_capacity * 2;
+    transition_t *new_trans = realloc(state->transitions, new_cap * sizeof(transition_t));
+    if (!new_trans) return false;
+    state->transitions = new_trans;
+    state->trans_capacity = new_cap;
+    return true;
+}
+
+void nfa_add_transition(nfa_state_t *from, nfa_state_t *to, transition_type_t type, char value) {
+    if (from->trans_count >= from->trans_capacity) {
+        if (!transition_grow(from)) return;
+    }
+    transition_t *t = &from->transitions[from->trans_count++];
+    t->type = type;
+    t->value = value;
+    t->target = to;
+    t->bracket = NULL;
+    t->group_id = -1;
+}
+
+void nfa_add_bracket_transition(nfa_state_t *from, nfa_state_t *to, bracket_class_t *bracket) {
+    if (from->trans_count >= from->trans_capacity) {
+        if (!transition_grow(from)) return;
+    }
+    transition_t *t = &from->transitions[from->trans_count++];
+    t->type = TRANS_BRACKET;
+    t->value = '\0';
+    t->target = to;
+    t->bracket = bracket;
+    t->group_id = -1;
+}
+
+void nfa_add_group_transition(nfa_state_t *from, nfa_state_t *to, transition_type_t type, int group_id) {
+    if (from->trans_count >= from->trans_capacity) {
+        if (!transition_grow(from)) return;
+    }
+    transition_t *t = &from->transitions[from->trans_count++];
+    t->type = type;
+    t->value = '\0';
+    t->target = to;
+    t->bracket = NULL;
+    t->group_id = group_id;
+}
+
+static nfa_fragment_t build_nfa(nfa_t *nfa, ast_node_t *ast, loreg_error_t *error);
+
+static nfa_fragment_t build_char(nfa_t *nfa, char c, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *start = nfa_add_state(nfa);
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!start || !accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+    nfa_add_transition(start, accept, TRANS_CHAR, c);
+    frag.start = start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_dot(nfa_t *nfa, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *start = nfa_add_state(nfa);
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!start || !accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+    nfa_add_transition(start, accept, TRANS_DOT, '\0');
+    frag.start = start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_class(nfa_t *nfa, transition_type_t type, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *start = nfa_add_state(nfa);
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!start || !accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+    nfa_add_transition(start, accept, type, '\0');
+    frag.start = start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_bracket(nfa_t *nfa, bracket_class_t *bracket, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *start = nfa_add_state(nfa);
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!start || !accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+    nfa_add_bracket_transition(start, accept, bracket);
+    frag.start = start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_concat(nfa_t *nfa, ast_node_t *left, ast_node_t *right, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_fragment_t left_frag = build_nfa(nfa, left, error);
+    if (*error != LOREG_OK) return frag;
+    nfa_fragment_t right_frag = build_nfa(nfa, right, error);
+    if (*error != LOREG_OK) return frag;
+
+    nfa_add_transition(left_frag.accept, right_frag.start, TRANS_EPSILON, '\0');
+    frag.start = left_frag.start;
+    frag.accept = right_frag.accept;
+    return frag;
+}
+
+static nfa_fragment_t build_alter(nfa_t *nfa, ast_node_t *left, ast_node_t *right, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *start = nfa_add_state(nfa);
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!start || !accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+
+    nfa_fragment_t left_frag = build_nfa(nfa, left, error);
+    if (*error != LOREG_OK) return frag;
+    nfa_fragment_t right_frag = build_nfa(nfa, right, error);
+    if (*error != LOREG_OK) return frag;
+
+    nfa_add_transition(start, left_frag.start, TRANS_EPSILON, '\0');
+    nfa_add_transition(start, right_frag.start, TRANS_EPSILON, '\0');
+    nfa_add_transition(left_frag.accept, accept, TRANS_EPSILON, '\0');
+    nfa_add_transition(right_frag.accept, accept, TRANS_EPSILON, '\0');
+
+    frag.start = start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_star(nfa_t *nfa, ast_node_t *child, bool greedy, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *start = nfa_add_state(nfa);
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!start || !accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+
+    nfa_fragment_t child_frag = build_nfa(nfa, child, error);
+    if (*error != LOREG_OK) return frag;
+
+    if (greedy) {
+        nfa_add_transition(start, child_frag.start, TRANS_EPSILON, '\0');
+        nfa_add_transition(start, accept, TRANS_EPSILON, '\0');
+    } else {
+        nfa_add_transition(start, accept, TRANS_EPSILON, '\0');
+        nfa_add_transition(start, child_frag.start, TRANS_EPSILON, '\0');
+    }
+    nfa_add_transition(child_frag.accept, child_frag.start, TRANS_EPSILON, '\0');
+    nfa_add_transition(child_frag.accept, accept, TRANS_EPSILON, '\0');
+
+    frag.start = start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_plus(nfa_t *nfa, ast_node_t *child, bool greedy, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+
+    nfa_fragment_t child_frag = build_nfa(nfa, child, error);
+    if (*error != LOREG_OK) return frag;
+
+    if (greedy) {
+        nfa_add_transition(child_frag.accept, child_frag.start, TRANS_EPSILON, '\0');
+        nfa_add_transition(child_frag.accept, accept, TRANS_EPSILON, '\0');
+    } else {
+        nfa_add_transition(child_frag.accept, accept, TRANS_EPSILON, '\0');
+        nfa_add_transition(child_frag.accept, child_frag.start, TRANS_EPSILON, '\0');
+    }
+
+    frag.start = child_frag.start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_question(nfa_t *nfa, ast_node_t *child, bool greedy, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *start = nfa_add_state(nfa);
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!start || !accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+
+    nfa_fragment_t child_frag = build_nfa(nfa, child, error);
+    if (*error != LOREG_OK) return frag;
+
+    if (greedy) {
+        nfa_add_transition(start, child_frag.start, TRANS_EPSILON, '\0');
+        nfa_add_transition(start, accept, TRANS_EPSILON, '\0');
+    } else {
+        nfa_add_transition(start, accept, TRANS_EPSILON, '\0');
+        nfa_add_transition(start, child_frag.start, TRANS_EPSILON, '\0');
+    }
+    nfa_add_transition(child_frag.accept, accept, TRANS_EPSILON, '\0');
+
+    frag.start = start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_group(nfa_t *nfa, ast_node_t *child, int group_id, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *start = nfa_add_state(nfa);
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!start || !accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+
+    nfa_fragment_t child_frag = build_nfa(nfa, child, error);
+    if (*error != LOREG_OK) return frag;
+
+    nfa_add_group_transition(start, child_frag.start, TRANS_GROUP_START, group_id);
+    nfa_add_group_transition(child_frag.accept, accept, TRANS_GROUP_END, group_id);
+
+    if (group_id + 1 > nfa->group_count) {
+        nfa->group_count = group_id + 1;
+    }
+
+    frag.start = start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_anchor(nfa_t *nfa, transition_type_t type, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+    nfa_state_t *start = nfa_add_state(nfa);
+    nfa_state_t *accept = nfa_add_state(nfa);
+    if (!start || !accept) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+    nfa_add_transition(start, accept, type, '\0');
+    frag.start = start;
+    frag.accept = accept;
+    return frag;
+}
+
+static nfa_fragment_t build_quantifier(nfa_t *nfa, ast_node_t *child, int min, int max, bool greedy, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+
+    if (min == 0 && max == 0) {
+        nfa_state_t *state = nfa_add_state(nfa);
+        if (!state) {
+            *error = LOREG_ERR_OUT_OF_MEMORY;
+            return frag;
+        }
+        frag.start = state;
+        frag.accept = state;
+        return frag;
+    }
+
+    nfa_state_t *start = nfa_add_state(nfa);
+    if (!start) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return frag;
+    }
+
+    nfa_state_t *current = start;
+
+    for (int i = 0; i < min; i++) {
+        nfa_fragment_t rep = build_nfa(nfa, child, error);
+        if (*error != LOREG_OK) return frag;
+        nfa_add_transition(current, rep.start, TRANS_EPSILON, '\0');
+        current = rep.accept;
+    }
+
+    if (max < 0) {
+        nfa_state_t *loop_start = nfa_add_state(nfa);
+        nfa_state_t *accept = nfa_add_state(nfa);
+        if (!loop_start || !accept) {
+            *error = LOREG_ERR_OUT_OF_MEMORY;
+            return frag;
+        }
+
+        nfa_add_transition(current, loop_start, TRANS_EPSILON, '\0');
+
+        nfa_fragment_t rep = build_nfa(nfa, child, error);
+        if (*error != LOREG_OK) return frag;
+
+        if (greedy) {
+            nfa_add_transition(loop_start, rep.start, TRANS_EPSILON, '\0');
+            nfa_add_transition(loop_start, accept, TRANS_EPSILON, '\0');
+        } else {
+            nfa_add_transition(loop_start, accept, TRANS_EPSILON, '\0');
+            nfa_add_transition(loop_start, rep.start, TRANS_EPSILON, '\0');
+        }
+        nfa_add_transition(rep.accept, loop_start, TRANS_EPSILON, '\0');
+
+        frag.start = start;
+        frag.accept = accept;
+    } else {
+        nfa_state_t *accept = nfa_add_state(nfa);
+        if (!accept) {
+            *error = LOREG_ERR_OUT_OF_MEMORY;
+            return frag;
+        }
+
+        nfa_add_transition(current, accept, TRANS_EPSILON, '\0');
+
+        for (int i = min; i < max; i++) {
+            nfa_fragment_t rep = build_nfa(nfa, child, error);
+            if (*error != LOREG_OK) return frag;
+
+            if (greedy) {
+                nfa_add_transition(current, rep.start, TRANS_EPSILON, '\0');
+            } else {
+                nfa_add_transition(current, accept, TRANS_EPSILON, '\0');
+                nfa_add_transition(current, rep.start, TRANS_EPSILON, '\0');
+            }
+
+            if (greedy) {
+                nfa_add_transition(rep.accept, accept, TRANS_EPSILON, '\0');
+            }
+            current = rep.accept;
+        }
+
+        if (!greedy) {
+            nfa_add_transition(current, accept, TRANS_EPSILON, '\0');
+        }
+
+        frag.start = start;
+        frag.accept = accept;
+    }
+
+    return frag;
+}
+
+static nfa_fragment_t build_nfa(nfa_t *nfa, ast_node_t *ast, loreg_error_t *error) {
+    nfa_fragment_t frag = {NULL, NULL};
+
+    if (!ast) {
+        nfa_state_t *state = nfa_add_state(nfa);
+        if (!state) {
+            *error = LOREG_ERR_OUT_OF_MEMORY;
+            return frag;
+        }
+        frag.start = state;
+        frag.accept = state;
+        return frag;
+    }
+
+    switch (ast->type) {
+        case AST_CHAR:
+            return build_char(nfa, ast->value, error);
+        case AST_DOT:
+            return build_dot(nfa, error);
+        case AST_CONCAT:
+            return build_concat(nfa, ast->left, ast->right, error);
+        case AST_ALTER:
+            return build_alter(nfa, ast->left, ast->right, error);
+        case AST_STAR:
+            return build_star(nfa, ast->left, ast->quant.greedy, error);
+        case AST_PLUS:
+            return build_plus(nfa, ast->left, ast->quant.greedy, error);
+        case AST_QUESTION:
+            return build_question(nfa, ast->left, ast->quant.greedy, error);
+        case AST_GROUP:
+            return build_group(nfa, ast->left, ast->group_id, error);
+        case AST_ANCHOR_START:
+            return build_anchor(nfa, TRANS_ANCHOR_START, error);
+        case AST_ANCHOR_END:
+            return build_anchor(nfa, TRANS_ANCHOR_END, error);
+        case AST_BRACKET:
+            return build_bracket(nfa, ast->bracket, error);
+        case AST_QUANTIFIER:
+            return build_quantifier(nfa, ast->left, ast->quant.min, ast->quant.max, ast->quant.greedy, error);
+        case AST_CLASS_DIGIT:
+            return build_class(nfa, TRANS_CLASS_DIGIT, error);
+        case AST_CLASS_WORD:
+            return build_class(nfa, TRANS_CLASS_WORD, error);
+        case AST_CLASS_SPACE:
+            return build_class(nfa, TRANS_CLASS_SPACE, error);
+        case AST_CLASS_NDIGIT:
+            return build_class(nfa, TRANS_CLASS_NDIGIT, error);
+        case AST_CLASS_NWORD:
+            return build_class(nfa, TRANS_CLASS_NWORD, error);
+        case AST_CLASS_NSPACE:
+            return build_class(nfa, TRANS_CLASS_NSPACE, error);
+    }
+
+    return frag;
+}
+
+nfa_t *nfa_from_ast(ast_node_t *ast, loreg_error_t *error) {
+    *error = LOREG_OK;
+    nfa_t *nfa = nfa_create();
+    if (!nfa) {
+        *error = LOREG_ERR_OUT_OF_MEMORY;
+        return NULL;
+    }
+
+    nfa_fragment_t frag = build_nfa(nfa, ast, error);
+    if (*error != LOREG_OK) {
+        nfa_free(nfa);
+        return NULL;
+    }
+
+    nfa->start = frag.start;
+    frag.accept->accepting = true;
+
+    return nfa;
+}
--- a/src/parser.c
+++ b/src/parser.c
@ -0,0 +1,309 @@
+/* retoor <retoor@molodetz.nl> */
+#include "parser.h"
+#include <stdlib.h>
+#include <ctype.h>
+
+static void parser_advance(parser_t *parser) {
+    parser->current = lexer_next(&parser->lexer);
+}
+
+void parser_init(parser_t *parser, const char *pattern) {
+    lexer_init(&parser->lexer, pattern);
+    parser->current = lexer_next(&parser->lexer);
+    parser->error = LOREG_OK;
+    parser->group_count = 0;
+}
+
+loreg_error_t parser_get_error(parser_t *parser) {
+    return parser->error;
+}
+
+static ast_node_t *parse_expr(parser_t *parser);
+static ast_node_t *parse_term(parser_t *parser);
+static ast_node_t *parse_factor(parser_t *parser);
+static ast_node_t *parse_atom(parser_t *parser);
+static ast_node_t *parse_bracket(parser_t *parser);
+static int parse_number(parser_t *parser);
+
+static ast_node_t *parse_expr(parser_t *parser) {
+    ast_node_t *left = parse_term(parser);
+    if (!left || parser->error != LOREG_OK) return left;
+
+    while (parser->current.type == TOKEN_PIPE) {
+        parser_advance(parser);
+        ast_node_t *right = parse_term(parser);
+        if (!right) {
+            ast_free(left);
+            return NULL;
+        }
+        left = ast_create_alter(left, right);
+        if (!left) {
+            parser->error = LOREG_ERR_OUT_OF_MEMORY;
+            return NULL;
+        }
+    }
+    return left;
+}
+
+static ast_node_t *parse_term(parser_t *parser) {
+    ast_node_t *left = NULL;
+
+    while (parser->current.type != TOKEN_EOF &&
+           parser->current.type != TOKEN_PIPE &&
+           parser->current.type != TOKEN_RPAREN) {
+        ast_node_t *factor = parse_factor(parser);
+        if (!factor) {
+            ast_free(left);
+            return NULL;
+        }
+        if (left == NULL) {
+            left = factor;
+        } else {
+            left = ast_create_concat(left, factor);
+            if (!left) {
+                parser->error = LOREG_ERR_OUT_OF_MEMORY;
+                return NULL;
+            }
+        }
+    }
+    return left;
+}
+
+static ast_node_t *parse_factor(parser_t *parser) {
+    ast_node_t *atom = parse_atom(parser);
+    if (!atom || parser->error != LOREG_OK) return atom;
+
+    while (parser->current.type == TOKEN_STAR ||
+           parser->current.type == TOKEN_PLUS ||
+           parser->current.type == TOKEN_QUESTION ||
+           parser->current.type == TOKEN_LBRACE) {
+
+        token_type_t quant_type = parser->current.type;
+        parser_advance(parser);
+
+        bool greedy = true;
+        if (parser->current.type == TOKEN_QUESTION) {
+            greedy = false;
+            parser_advance(parser);
+        }
+
+        if (quant_type == TOKEN_STAR) {
+            atom = ast_create_star(atom, greedy);
+        } else if (quant_type == TOKEN_PLUS) {
+            atom = ast_create_plus(atom, greedy);
+        } else if (quant_type == TOKEN_QUESTION) {
+            atom = ast_create_question(atom, greedy);
+        } else if (quant_type == TOKEN_LBRACE) {
+            int min = parse_number(parser);
+            int max = min;
+
+            if (parser->current.type == TOKEN_CHAR && parser->current.value == ',') {
+                parser_advance(parser);
+                if (parser->current.type == TOKEN_RBRACE) {
+                    max = -1;
+                } else {
+                    max = parse_number(parser);
+                }
+            }
+
+            if (parser->current.type != TOKEN_RBRACE) {
+                parser->error = LOREG_ERR_INVALID_QUANTIFIER;
+                ast_free(atom);
+                return NULL;
+            }
+            parser_advance(parser);
+
+            if (parser->current.type == TOKEN_QUESTION) {
+                greedy = false;
+                parser_advance(parser);
+            }
+
+            atom = ast_create_quantifier(atom, min, max, greedy);
+        }
+
+        if (!atom) {
+            parser->error = LOREG_ERR_OUT_OF_MEMORY;
+            return NULL;
+        }
+    }
+    return atom;
+}
+
+static int parse_number(parser_t *parser) {
+    int num = 0;
+    while (parser->current.type == TOKEN_CHAR && isdigit(parser->current.value)) {
+        num = num * 10 + (parser->current.value - '0');
+        parser_advance(parser);
+    }
+    return num;
+}
+
+static ast_node_t *parse_atom(parser_t *parser) {
+    ast_node_t *node = NULL;
+
+    switch (parser->current.type) {
+        case TOKEN_CHAR:
+            node = ast_create_char(parser->current.value);
+            parser_advance(parser);
+            break;
+
+        case TOKEN_DOT:
+            node = ast_create_dot();
+            parser_advance(parser);
+            break;
+
+        case TOKEN_CARET:
+            node = ast_create_anchor_start();
+            parser_advance(parser);
+            break;
+
+        case TOKEN_DOLLAR:
+            node = ast_create_anchor_end();
+            parser_advance(parser);
+            break;
+
+        case TOKEN_LPAREN: {
+            parser_advance(parser);
+            int group_id = parser->group_count++;
+            ast_node_t *inner = parse_expr(parser);
+            if (parser->current.type != TOKEN_RPAREN) {
+                parser->error = LOREG_ERR_UNBALANCED_PAREN;
+                ast_free(inner);
+                return NULL;
+            }
+            parser_advance(parser);
+            node = ast_create_group(inner, group_id);
+            break;
+        }
+
+        case TOKEN_LBRACKET:
+            node = parse_bracket(parser);
+            break;
+
+        case TOKEN_CLASS_DIGIT:
+            node = ast_create_class(AST_CLASS_DIGIT);
+            parser_advance(parser);
+            break;
+
+        case TOKEN_CLASS_WORD:
+            node = ast_create_class(AST_CLASS_WORD);
+            parser_advance(parser);
+            break;
+
+        case TOKEN_CLASS_SPACE:
+            node = ast_create_class(AST_CLASS_SPACE);
+            parser_advance(parser);
+            break;
+
+        case TOKEN_CLASS_NDIGIT:
+            node = ast_create_class(AST_CLASS_NDIGIT);
+            parser_advance(parser);
+            break;
+
+        case TOKEN_CLASS_NWORD:
+            node = ast_create_class(AST_CLASS_NWORD);
+            parser_advance(parser);
+            break;
+
+        case TOKEN_CLASS_NSPACE:
+            node = ast_create_class(AST_CLASS_NSPACE);
+            parser_advance(parser);
+            break;
+
+        case TOKEN_EOF:
+        case TOKEN_PIPE:
+        case TOKEN_RPAREN:
+            return NULL;
+
+        default:
+            parser->error = LOREG_ERR_INVALID_PATTERN;
+            return NULL;
+    }
+
+    if (!node && parser->error == LOREG_OK) {
+        parser->error = LOREG_ERR_OUT_OF_MEMORY;
+    }
+    return node;
+}
+
+static ast_node_t *parse_bracket(parser_t *parser) {
+    parser_advance(parser);
+
+    bracket_class_t *bracket = bracket_create();
+    if (!bracket) {
+        parser->error = LOREG_ERR_OUT_OF_MEMORY;
+        return NULL;
+    }
+
+    if (parser->current.type == TOKEN_CARET) {
+        bracket->negated = true;
+        parser_advance(parser);
+    }
+
+    while (parser->current.type != TOKEN_RBRACKET && parser->current.type != TOKEN_EOF) {
+        if (parser->current.type == TOKEN_CLASS_DIGIT ||
+            parser->current.type == TOKEN_CLASS_WORD ||
+            parser->current.type == TOKEN_CLASS_SPACE ||
+            parser->current.type == TOKEN_CLASS_NDIGIT ||
+            parser->current.type == TOKEN_CLASS_NWORD ||
+            parser->current.type == TOKEN_CLASS_NSPACE) {
+
+            switch (parser->current.type) {
+                case TOKEN_CLASS_DIGIT:
+                    bracket_add_range(bracket, '0', '9');
+                    break;
+                case TOKEN_CLASS_WORD:
+                    bracket_add_range(bracket, 'a', 'z');
+                    bracket_add_range(bracket, 'A', 'Z');
+                    bracket_add_range(bracket, '0', '9');
+                    bracket_add_char(bracket, '_');
+                    break;
+                case TOKEN_CLASS_SPACE:
+                    bracket_add_char(bracket, ' ');
+                    bracket_add_char(bracket, '\t');
+                    bracket_add_char(bracket, '\n');
+                    bracket_add_char(bracket, '\r');
+                    bracket_add_char(bracket, '\f');
+                    bracket_add_char(bracket, '\v');
+                    break;
+                default:
+                    break;
+            }
+            parser_advance(parser);
+            continue;
+        }
+
+        char start = parser->current.value;
+        parser_advance(parser);
+
+        if (parser->current.type == TOKEN_DASH) {
+            parser_advance(parser);
+            if (parser->current.type == TOKEN_RBRACKET || parser->current.type == TOKEN_EOF) {
+                bracket_add_char(bracket, start);
+                bracket_add_char(bracket, '-');
+            } else {
+                char end = parser->current.value;
+                bracket_add_range(bracket, start, end);
+                parser_advance(parser);
+            }
+        } else {
+            bracket_add_char(bracket, start);
+        }
+    }
+
+    if (parser->current.type != TOKEN_RBRACKET) {
+        bracket_free(bracket);
+        parser->error = LOREG_ERR_INVALID_PATTERN;
+        return NULL;
+    }
+    parser_advance(parser);
+
+    return ast_create_bracket(bracket);
+}
+
+ast_node_t *parser_parse(parser_t *parser) {
+    if (parser->current.type == TOKEN_EOF) {
+        return NULL;
+    }
+    return parse_expr(parser);
+}
--- a/src/repl.c
+++ b/src/repl.c
@ -0,0 +1,170 @@
+/* retoor <retoor@molodetz.nl> */
+#include "repl.h"
+#include "loreg.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_INPUT 4096
+
+static void print_banner(void) {
+    printf("loreg v%s - regex interpreter\n", LOREG_VERSION);
+    printf("commands: :q quit, :h help, :p <pattern> set pattern, :m <text> match, :s <text> search\n\n");
+}
+
+static void print_help(void) {
+    printf("loreg REPL commands:\n");
+    printf("  :q          quit\n");
+    printf("  :h          show this help\n");
+    printf("  :p <regex>  compile and set pattern\n");
+    printf("  :m <text>   match text against pattern (anchored)\n");
+    printf("  :s <text>   search for pattern in text\n");
+    printf("  <text>      search for pattern in text\n\n");
+    printf("regex syntax:\n");
+    printf("  .           any character\n");
+    printf("  *           zero or more\n");
+    printf("  +           one or more\n");
+    printf("  ?           zero or one\n");
+    printf("  |           alternation\n");
+    printf("  ()          grouping\n");
+    printf("  []          character class\n");
+    printf("  [^]         negated class\n");
+    printf("  ^           start anchor\n");
+    printf("  $           end anchor\n");
+    printf("  {n}         exactly n\n");
+    printf("  {n,}        n or more\n");
+    printf("  {n,m}       n to m\n");
+    printf("  \\d          digit\n");
+    printf("  \\w          word character\n");
+    printf("  \\s          whitespace\n");
+    printf("  \\D \\W \\S    negated classes\n\n");
+}
+
+static void print_match(const char *text, loreg_match_t *result) {
+    if (!result->matched) {
+        printf("no match\n");
+        return;
+    }
+
+    printf("match: \"");
+    for (size_t i = result->match_start; i < result->match_end; i++) {
+        printf("%c", text[i]);
+    }
+    printf("\" [%zu-%zu]\n", result->match_start, result->match_end);
+
+    for (size_t i = 0; i < result->group_count; i++) {
+        if (result->groups[i].matched) {
+            printf("  group %zu: \"", i);
+            for (size_t j = result->groups[i].start; j < result->groups[i].end; j++) {
+                printf("%c", text[j]);
+            }
+            printf("\" [%zu-%zu]\n", result->groups[i].start, result->groups[i].end);
+        }
+    }
+}
+
+static char *read_line(void) {
+    static char buffer[MAX_INPUT];
+    printf("> ");
+    fflush(stdout);
+
+    if (!fgets(buffer, MAX_INPUT, stdin)) {
+        return NULL;
+    }
+
+    size_t len = strlen(buffer);
+    if (len > 0 && buffer[len - 1] == '\n') {
+        buffer[len - 1] = '\0';
+    }
+
+    return buffer;
+}
+
+void repl_run(void) {
+    print_banner();
+
+    loreg_regex_t *regex = NULL;
+    char *line;
+
+    while ((line = read_line()) != NULL) {
+        if (strlen(line) == 0) continue;
+
+        if (strcmp(line, ":q") == 0 || strcmp(line, ":quit") == 0) {
+            break;
+        }
+
+        if (strcmp(line, ":h") == 0 || strcmp(line, ":help") == 0) {
+            print_help();
+            continue;
+        }
+
+        if (strncmp(line, ":p ", 3) == 0) {
+            const char *pattern = line + 3;
+            while (*pattern == ' ') pattern++;
+
+            if (regex) {
+                loreg_free(regex);
+                regex = NULL;
+            }
+
+            loreg_error_t error;
+            regex = loreg_compile(pattern, &error);
+            if (!regex) {
+                printf("error: %s\n", loreg_error_string(error));
+            } else {
+                printf("pattern compiled: %s\n", pattern);
+            }
+            continue;
+        }
+
+        if (strncmp(line, ":m ", 3) == 0) {
+            if (!regex) {
+                printf("error: no pattern set (use :p <pattern>)\n");
+                continue;
+            }
+
+            const char *text = line + 3;
+            while (*text == ' ') text++;
+
+            loreg_match_t result;
+            loreg_match(regex, text, &result);
+            print_match(text, &result);
+            continue;
+        }
+
+        if (strncmp(line, ":s ", 3) == 0) {
+            if (!regex) {
+                printf("error: no pattern set (use :p <pattern>)\n");
+                continue;
+            }
+
+            const char *text = line + 3;
+            while (*text == ' ') text++;
+
+            loreg_match_t result;
+            loreg_search(regex, text, &result);
+            print_match(text, &result);
+            continue;
+        }
+
+        if (line[0] == ':') {
+            printf("unknown command: %s\n", line);
+            continue;
+        }
+
+        if (!regex) {
+            printf("error: no pattern set (use :p <pattern>)\n");
+            continue;
+        }
+
+        loreg_match_t result;
+        loreg_search(regex, line, &result);
+        print_match(line, &result);
+    }
+
+    if (regex) {
+        loreg_free(regex);
+    }
+
+    printf("\n");
+}
--- a/tests/test_all.c
+++ b/tests/test_all.c
@ -0,0 +1,252 @@
+/* retoor <retoor@molodetz.nl> */
+#include "../include/loreg.h"
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+
+static int total_passed = 0;
+static int total_failed = 0;
+
+#define ASSERT(cond, msg) do { \
+    if (!(cond)) { \
+        printf("  FAIL: %s\n", msg); \
+        total_failed++; \
+        return; \
+    } \
+} while(0)
+
+#define TEST(name) static void test_##name(void)
+#define RUN(name) do { \
+    test_##name(); \
+    total_passed++; \
+} while(0)
+
+TEST(basic_literals) {
+    loreg_error_t err;
+    loreg_regex_t *re = loreg_compile("hello", &err);
+    ASSERT(re != NULL, "compile hello");
+
+    loreg_match_t m;
+    ASSERT(loreg_search(re, "hello", &m), "match hello");
+    ASSERT(loreg_search(re, "say hello world", &m), "search hello");
+    ASSERT(!loreg_search(re, "helo", &m), "no match helo");
+
+    loreg_free(re);
+}
+
+TEST(metacharacters) {
+    loreg_error_t err;
+    loreg_match_t m;
+
+    loreg_regex_t *re = loreg_compile("a.c", &err);
+    ASSERT(re != NULL, "compile a.c");
+    ASSERT(loreg_search(re, "abc", &m), "match abc");
+    ASSERT(loreg_search(re, "axc", &m), "match axc");
+    ASSERT(!loreg_search(re, "ac", &m), "no match ac");
+    loreg_free(re);
+
+    re = loreg_compile("^start", &err);
+    ASSERT(re != NULL, "compile ^start");
+    ASSERT(loreg_search(re, "start here", &m), "match start here");
+    ASSERT(!loreg_search(re, "not start", &m), "no match not start");
+    loreg_free(re);
+
+    re = loreg_compile("end$", &err);
+    ASSERT(re != NULL, "compile end$");
+    ASSERT(loreg_search(re, "the end", &m), "match the end");
+    ASSERT(!loreg_search(re, "end here", &m), "no match end here");
+    loreg_free(re);
+}
+
+TEST(quantifiers) {
+    loreg_error_t err;
+    loreg_match_t m;
+
+    loreg_regex_t *re = loreg_compile("ab*c", &err);
+    ASSERT(re != NULL, "compile ab*c");
+    ASSERT(loreg_search(re, "ac", &m), "match ac");
+    ASSERT(loreg_search(re, "abc", &m), "match abc");
+    ASSERT(loreg_search(re, "abbbbc", &m), "match abbbbc");
+    loreg_free(re);
+
+    re = loreg_compile("ab+c", &err);
+    ASSERT(re != NULL, "compile ab+c");
+    ASSERT(!loreg_search(re, "ac", &m), "no match ac");
+    ASSERT(loreg_search(re, "abc", &m), "match abc");
+    ASSERT(loreg_search(re, "abbbbc", &m), "match abbbbc");
+    loreg_free(re);
+
+    re = loreg_compile("ab?c", &err);
+    ASSERT(re != NULL, "compile ab?c");
+    ASSERT(loreg_search(re, "ac", &m), "match ac");
+    ASSERT(loreg_search(re, "abc", &m), "match abc");
+    ASSERT(!loreg_search(re, "abbc", &m), "no match abbc");
+    loreg_free(re);
+
+    re = loreg_compile("a{3}", &err);
+    ASSERT(re != NULL, "compile a{3}");
+    ASSERT(loreg_search(re, "aaa", &m), "match aaa");
+    ASSERT(!loreg_search(re, "aa", &m), "no match aa");
+    loreg_free(re);
+
+    re = loreg_compile("a{2,4}", &err);
+    ASSERT(re != NULL, "compile a{2,4}");
+    ASSERT(loreg_search(re, "aa", &m), "match aa");
+    ASSERT(loreg_search(re, "aaa", &m), "match aaa");
+    ASSERT(loreg_search(re, "aaaa", &m), "match aaaa");
+    ASSERT(!loreg_search(re, "a", &m), "no match a");
+    loreg_free(re);
+}
+
+TEST(character_classes) {
+    loreg_error_t err;
+    loreg_match_t m;
+
+    loreg_regex_t *re = loreg_compile("[aeiou]", &err);
+    ASSERT(re != NULL, "compile [aeiou]");
+    ASSERT(loreg_search(re, "a", &m), "match a");
+    ASSERT(loreg_search(re, "test", &m), "match test");
+    ASSERT(!loreg_search(re, "xyz", &m), "no match xyz");
+    loreg_free(re);
+
+    re = loreg_compile("[a-z]", &err);
+    ASSERT(re != NULL, "compile [a-z]");
+    ASSERT(loreg_search(re, "m", &m), "match m");
+    ASSERT(!loreg_search(re, "5", &m), "no match 5");
+    loreg_free(re);
+
+    re = loreg_compile("[^0-9]", &err);
+    ASSERT(re != NULL, "compile [^0-9]");
+    ASSERT(loreg_search(re, "a", &m), "match a");
+    ASSERT(!loreg_search(re, "5", &m), "no match 5");
+    loreg_free(re);
+
+    re = loreg_compile("\\d", &err);
+    ASSERT(re != NULL, "compile \\d");
+    ASSERT(loreg_search(re, "5", &m), "match 5");
+    ASSERT(!loreg_search(re, "a", &m), "no match a");
+    loreg_free(re);
+
+    re = loreg_compile("\\w+", &err);
+    ASSERT(re != NULL, "compile \\w+");
+    ASSERT(loreg_search(re, "hello_123", &m), "match hello_123");
+    loreg_free(re);
+
+    re = loreg_compile("\\s", &err);
+    ASSERT(re != NULL, "compile \\s");
+    ASSERT(loreg_search(re, " ", &m), "match space");
+    ASSERT(loreg_search(re, "\t", &m), "match tab");
+    ASSERT(!loreg_search(re, "a", &m), "no match a");
+    loreg_free(re);
+}
+
+TEST(groups) {
+    loreg_error_t err;
+    loreg_match_t m;
+
+    loreg_regex_t *re = loreg_compile("(ab)+", &err);
+    ASSERT(re != NULL, "compile (ab)+");
+    ASSERT(loreg_search(re, "ab", &m), "match ab");
+    ASSERT(loreg_search(re, "abab", &m), "match abab");
+    ASSERT(!loreg_search(re, "a", &m), "no match a");
+    loreg_free(re);
+
+    re = loreg_compile("(\\d+)-(\\d+)", &err);
+    ASSERT(re != NULL, "compile groups");
+    ASSERT(loreg_search(re, "123-456", &m), "match 123-456");
+    ASSERT(m.group_count == 2, "2 groups");
+    ASSERT(m.groups[0].matched, "group 0 matched");
+    ASSERT(m.groups[1].matched, "group 1 matched");
+    loreg_free(re);
+}
+
+TEST(alternation) {
+    loreg_error_t err;
+    loreg_match_t m;
+
+    loreg_regex_t *re = loreg_compile("cat|dog", &err);
+    ASSERT(re != NULL, "compile cat|dog");
+    ASSERT(loreg_search(re, "cat", &m), "match cat");
+    ASSERT(loreg_search(re, "dog", &m), "match dog");
+    ASSERT(!loreg_search(re, "rat", &m), "no match rat");
+    loreg_free(re);
+
+    re = loreg_compile("(red|blue) car", &err);
+    ASSERT(re != NULL, "compile (red|blue) car");
+    ASSERT(loreg_search(re, "red car", &m), "match red car");
+    ASSERT(loreg_search(re, "blue car", &m), "match blue car");
+    ASSERT(!loreg_search(re, "green car", &m), "no match green car");
+    loreg_free(re);
+}
+
+TEST(escapes) {
+    loreg_error_t err;
+    loreg_match_t m;
+
+    loreg_regex_t *re = loreg_compile("1\\.5", &err);
+    ASSERT(re != NULL, "compile 1\\.5");
+    ASSERT(loreg_search(re, "1.5", &m), "match 1.5");
+    ASSERT(!loreg_search(re, "1x5", &m), "no match 1x5");
+    loreg_free(re);
+
+    re = loreg_compile("\\(test\\)", &err);
+    ASSERT(re != NULL, "compile \\(test\\)");
+    ASSERT(loreg_search(re, "(test)", &m), "match (test)");
+    loreg_free(re);
+}
+
+TEST(real_patterns) {
+    loreg_error_t err;
+    loreg_match_t m;
+
+    loreg_regex_t *re = loreg_compile("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", &err);
+    ASSERT(re != NULL, "compile email");
+    ASSERT(loreg_search(re, "user@example.com", &m), "match email");
+    ASSERT(!loreg_search(re, "invalid", &m), "no match invalid");
+    loreg_free(re);
+
+    re = loreg_compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}", &err);
+    ASSERT(re != NULL, "compile ip");
+    ASSERT(loreg_search(re, "192.168.1.1", &m), "match ip");
+    loreg_free(re);
+
+    re = loreg_compile("https?://[a-zA-Z0-9.-]+(/[a-zA-Z0-9./-]*)?", &err);
+    ASSERT(re != NULL, "compile url");
+    ASSERT(loreg_search(re, "http://example.com", &m), "match http");
+    ASSERT(loreg_search(re, "https://example.com/path", &m), "match https");
+    loreg_free(re);
+}
+
+TEST(error_handling) {
+    loreg_error_t err;
+
+    loreg_regex_t *re = loreg_compile("(abc", &err);
+    ASSERT(re == NULL, "unbalanced paren");
+    ASSERT(err == LOREG_ERR_UNBALANCED_PAREN, "correct error");
+}
+
+int main(void) {
+    printf("loreg comprehensive tests\n");
+    printf("========================\n\n");
+
+    clock_t start = clock();
+
+    RUN(basic_literals);
+    RUN(metacharacters);
+    RUN(quantifiers);
+    RUN(character_classes);
+    RUN(groups);
+    RUN(alternation);
+    RUN(escapes);
+    RUN(real_patterns);
+    RUN(error_handling);
+
+    clock_t end = clock();
+    double elapsed = (double)(end - start) / CLOCKS_PER_SEC;
+
+    printf("\n========================\n");
+    printf("passed: %d, failed: %d\n", total_passed, total_failed);
+    printf("time: %.3f seconds\n", elapsed);
+
+    return total_failed > 0 ? 1 : 0;
+}
--- a/tests/test_integration.c
+++ b/tests/test_integration.c
@ -0,0 +1,650 @@
+/* retoor <retoor@molodetz.nl> */
+#include "../include/loreg.h"
+#include <stdio.h>
+#include <string.h>
+
+static int passed = 0;
+static int failed = 0;
+
+#define MATCH(pat, txt) test_match(pat, txt, 1, __LINE__)
+#define NO_MATCH(pat, txt) test_match(pat, txt, 0, __LINE__)
+
+static void test_match(const char *pattern, const char *text, int expect, int line) {
+    loreg_error_t err;
+    loreg_regex_t *re = loreg_compile(pattern, &err);
+    if (!re) {
+        printf("FAIL line %d: compile error for '%s': %s\n", line, pattern, loreg_error_string(err));
+        failed++;
+        return;
+    }
+    loreg_match_t m;
+    int result = loreg_search(re, text, &m) ? 1 : 0;
+    if (result != expect) {
+        printf("FAIL line %d: '%s' vs '%s' expected %s\n", line, pattern, text, expect ? "match" : "no match");
+        failed++;
+    } else {
+        passed++;
+    }
+    loreg_free(re);
+}
+
+static void test_literals(void) {
+    printf("  literals...\n");
+    MATCH("a", "a");
+    MATCH("a", "ba");
+    MATCH("a", "ab");
+    MATCH("abc", "abc");
+    MATCH("abc", "xabcy");
+    MATCH("hello", "hello world");
+    MATCH("world", "hello world");
+    MATCH("lo wo", "hello world");
+    NO_MATCH("abc", "ab");
+    NO_MATCH("abc", "abd");
+    NO_MATCH("xyz", "abc");
+    NO_MATCH("hello", "helo");
+    MATCH("", "anything");
+    MATCH("", "");
+    MATCH("a", "aaa");
+    MATCH("aa", "aaa");
+    MATCH("aaa", "aaa");
+    NO_MATCH("aaaa", "aaa");
+}
+
+static void test_dot(void) {
+    printf("  dot metacharacter...\n");
+    MATCH(".", "a");
+    MATCH(".", "x");
+    MATCH(".", "5");
+    MATCH(".", " ");
+    MATCH("..", "ab");
+    MATCH("...", "abc");
+    MATCH("a.c", "abc");
+    MATCH("a.c", "aXc");
+    MATCH("a.c", "a9c");
+    MATCH("a.c", "a c");
+    NO_MATCH("a.c", "ac");
+    NO_MATCH("a.c", "abbc");
+    MATCH("....", "test");
+    MATCH(".", "!");
+    MATCH(".", "@");
+    MATCH("a..b", "aXYb");
+    MATCH("a...b", "a123b");
+    NO_MATCH("a..b", "aXb");
+}
+
+static void test_anchors(void) {
+    printf("  anchors...\n");
+    MATCH("^a", "a");
+    MATCH("^a", "abc");
+    NO_MATCH("^a", "ba");
+    NO_MATCH("^a", " a");
+    MATCH("a$", "a");
+    MATCH("a$", "ba");
+    NO_MATCH("a$", "ab");
+    NO_MATCH("a$", "a ");
+    MATCH("^abc$", "abc");
+    NO_MATCH("^abc$", "xabc");
+    NO_MATCH("^abc$", "abcx");
+    NO_MATCH("^abc$", " abc");
+    NO_MATCH("^abc$", "abc ");
+    MATCH("^$", "");
+    NO_MATCH("^$", "a");
+    MATCH("^hello$", "hello");
+    MATCH("^hello world$", "hello world");
+    NO_MATCH("^hello world$", "hello world!");
+    MATCH("^a.*z$", "abcdefghijklmnopqrstuvwxyz");
+    MATCH("^.", "x");
+    MATCH(".$", "x");
+}
+
+static void test_star(void) {
+    printf("  star quantifier...\n");
+    MATCH("a*", "");
+    MATCH("a*", "a");
+    MATCH("a*", "aa");
+    MATCH("a*", "aaa");
+    MATCH("a*", "aaaaaaaaaa");
+    MATCH("a*", "b");
+    MATCH("a*b", "b");
+    MATCH("a*b", "ab");
+    MATCH("a*b", "aab");
+    MATCH("a*b", "aaaaaab");
+    NO_MATCH("a*b", "a");
+    MATCH("ba*", "b");
+    MATCH("ba*", "ba");
+    MATCH("ba*", "baaa");
+    MATCH(".*", "");
+    MATCH(".*", "anything at all");
+    MATCH("a.*b", "ab");
+    MATCH("a.*b", "aXb");
+    MATCH("a.*b", "aXXXXXb");
+    MATCH("a.*b", "a b");
+    MATCH("x*y*z*", "");
+    MATCH("x*y*z*", "xyz");
+    MATCH("x*y*z*", "xxxyyyzzz");
+    MATCH("ab*c", "ac");
+    MATCH("ab*c", "abc");
+    MATCH("ab*c", "abbbbc");
+}
+
+static void test_plus(void) {
+    printf("  plus quantifier...\n");
+    NO_MATCH("a+", "");
+    MATCH("a+", "a");
+    MATCH("a+", "aa");
+    MATCH("a+", "aaa");
+    MATCH("a+", "aaaaaaaaaa");
+    MATCH("a+", "ba");
+    MATCH("a+b", "ab");
+    MATCH("a+b", "aab");
+    MATCH("a+b", "aaaaaab");
+    NO_MATCH("a+b", "b");
+    NO_MATCH("a+b", "a");
+    MATCH("ba+", "ba");
+    MATCH("ba+", "baaa");
+    NO_MATCH("ba+", "b");
+    MATCH(".+", "a");
+    MATCH(".+", "anything");
+    NO_MATCH(".+", "");
+    MATCH("a.+b", "aXb");
+    MATCH("a.+b", "aXXXXXb");
+    NO_MATCH("a.+b", "ab");
+    MATCH("ab+c", "abc");
+    MATCH("ab+c", "abbbbc");
+    NO_MATCH("ab+c", "ac");
+}
+
+static void test_question(void) {
+    printf("  question quantifier...\n");
+    MATCH("a?", "");
+    MATCH("a?", "a");
+    MATCH("a?", "aa");
+    MATCH("a?b", "b");
+    MATCH("a?b", "ab");
+    MATCH("a?b", "aab");
+    MATCH("colou?r", "color");
+    MATCH("colou?r", "colour");
+    NO_MATCH("colou?r", "colouur");
+    MATCH("ab?c", "ac");
+    MATCH("ab?c", "abc");
+    NO_MATCH("ab?c", "abbc");
+    MATCH("https?://", "http://");
+    MATCH("https?://", "https://");
+    MATCH(".?", "");
+    MATCH(".?", "x");
+}
+
+static void test_alternation(void) {
+    printf("  alternation...\n");
+    MATCH("a|b", "a");
+    MATCH("a|b", "b");
+    NO_MATCH("a|b", "c");
+    MATCH("cat|dog", "cat");
+    MATCH("cat|dog", "dog");
+    NO_MATCH("cat|dog", "rat");
+    MATCH("cat|dog", "my cat");
+    MATCH("cat|dog", "my dog");
+    MATCH("a|b|c", "a");
+    MATCH("a|b|c", "b");
+    MATCH("a|b|c", "c");
+    NO_MATCH("a|b|c", "d");
+    MATCH("ab|cd", "ab");
+    MATCH("ab|cd", "cd");
+    NO_MATCH("ab|cd", "ac");
+    MATCH("abc|def|ghi", "abc");
+    MATCH("abc|def|ghi", "def");
+    MATCH("abc|def|ghi", "ghi");
+    MATCH("a|ab|abc", "abc");
+    MATCH("abc|ab|a", "abc");
+    MATCH("red|green|blue", "the red car");
+    MATCH("red|green|blue", "green light");
+    MATCH("red|green|blue", "blue sky");
+}
+
+static void test_groups(void) {
+    printf("  groups...\n");
+    MATCH("(a)", "a");
+    MATCH("(ab)", "ab");
+    MATCH("(abc)", "abc");
+    MATCH("(a)(b)", "ab");
+    MATCH("(a)(b)(c)", "abc");
+    MATCH("(ab)+", "ab");
+    MATCH("(ab)+", "abab");
+    MATCH("(ab)+", "ababab");
+    NO_MATCH("(ab)+", "a");
+    NO_MATCH("(ab)+", "ba");
+    MATCH("(ab)*", "");
+    MATCH("(ab)*", "ab");
+    MATCH("(ab)*", "abab");
+    MATCH("(ab)?", "");
+    MATCH("(ab)?", "ab");
+    MATCH("(a|b)+", "a");
+    MATCH("(a|b)+", "b");
+    MATCH("(a|b)+", "ab");
+    MATCH("(a|b)+", "ba");
+    MATCH("(a|b)+", "aabb");
+    MATCH("(a|b)+", "abba");
+    MATCH("((a))", "a");
+    MATCH("((ab))", "ab");
+    MATCH("(a(b)c)", "abc");
+    MATCH("(a(b(c)))", "abc");
+    MATCH("((a)(b))", "ab");
+    MATCH("(red|blue) car", "red car");
+    MATCH("(red|blue) car", "blue car");
+    NO_MATCH("(red|blue) car", "green car");
+}
+
+static void test_bracket_simple(void) {
+    printf("  bracket expressions (simple)...\n");
+    MATCH("[a]", "a");
+    NO_MATCH("[a]", "b");
+    MATCH("[ab]", "a");
+    MATCH("[ab]", "b");
+    NO_MATCH("[ab]", "c");
+    MATCH("[abc]", "a");
+    MATCH("[abc]", "b");
+    MATCH("[abc]", "c");
+    NO_MATCH("[abc]", "d");
+    MATCH("[aeiou]", "a");
+    MATCH("[aeiou]", "e");
+    MATCH("[aeiou]", "i");
+    MATCH("[aeiou]", "o");
+    MATCH("[aeiou]", "u");
+    NO_MATCH("[aeiou]", "b");
+    MATCH("[abc]+", "aaa");
+    MATCH("[abc]+", "abc");
+    MATCH("[abc]+", "cba");
+    MATCH("[abc]+", "abcabc");
+    MATCH("[xyz]*", "");
+    MATCH("[xyz]*", "xyz");
+}
+
+static void test_bracket_ranges(void) {
+    printf("  bracket expressions (ranges)...\n");
+    MATCH("[a-z]", "a");
+    MATCH("[a-z]", "m");
+    MATCH("[a-z]", "z");
+    NO_MATCH("[a-z]", "A");
+    NO_MATCH("[a-z]", "0");
+    MATCH("[A-Z]", "A");
+    MATCH("[A-Z]", "M");
+    MATCH("[A-Z]", "Z");
+    NO_MATCH("[A-Z]", "a");
+    MATCH("[0-9]", "0");
+    MATCH("[0-9]", "5");
+    MATCH("[0-9]", "9");
+    NO_MATCH("[0-9]", "a");
+    MATCH("[a-zA-Z]", "a");
+    MATCH("[a-zA-Z]", "Z");
+    NO_MATCH("[a-zA-Z]", "5");
+    MATCH("[a-zA-Z0-9]", "a");
+    MATCH("[a-zA-Z0-9]", "Z");
+    MATCH("[a-zA-Z0-9]", "5");
+    NO_MATCH("[a-zA-Z0-9]", "!");
+    MATCH("[a-z]+", "hello");
+    MATCH("[A-Z]+", "HELLO");
+    MATCH("[0-9]+", "12345");
+    MATCH("[a-z0-9]+", "abc123");
+}
+
+static void test_bracket_negated(void) {
+    printf("  bracket expressions (negated)...\n");
+    NO_MATCH("[^a]", "a");
+    MATCH("[^a]", "b");
+    MATCH("[^a]", "x");
+    NO_MATCH("[^abc]", "a");
+    NO_MATCH("[^abc]", "b");
+    NO_MATCH("[^abc]", "c");
+    MATCH("[^abc]", "d");
+    MATCH("[^abc]", "x");
+    NO_MATCH("[^a-z]", "a");
+    NO_MATCH("[^a-z]", "m");
+    NO_MATCH("[^a-z]", "z");
+    MATCH("[^a-z]", "A");
+    MATCH("[^a-z]", "5");
+    MATCH("[^a-z]", "!");
+    NO_MATCH("[^0-9]", "5");
+    MATCH("[^0-9]", "a");
+    MATCH("[^0-9]+", "hello");
+    NO_MATCH("[^aeiou]+", "aaa");
+    MATCH("[^aeiou]+", "xyz");
+}
+
+static void test_character_classes(void) {
+    printf("  character classes...\n");
+    MATCH("\\d", "0");
+    MATCH("\\d", "5");
+    MATCH("\\d", "9");
+    NO_MATCH("\\d", "a");
+    NO_MATCH("\\d", " ");
+    MATCH("\\d+", "123");
+    MATCH("\\d+", "0");
+    MATCH("\\d+", "9876543210");
+    NO_MATCH("\\d+", "");
+    NO_MATCH("\\d+", "abc");
+    MATCH("\\D", "a");
+    MATCH("\\D", " ");
+    MATCH("\\D", "!");
+    NO_MATCH("\\D", "5");
+    MATCH("\\w", "a");
+    MATCH("\\w", "Z");
+    MATCH("\\w", "0");
+    MATCH("\\w", "_");
+    NO_MATCH("\\w", " ");
+    NO_MATCH("\\w", "!");
+    MATCH("\\w+", "hello");
+    MATCH("\\w+", "Hello123");
+    MATCH("\\w+", "var_name");
+    MATCH("\\W", " ");
+    MATCH("\\W", "!");
+    MATCH("\\W", "@");
+    NO_MATCH("\\W", "a");
+    NO_MATCH("\\W", "_");
+    MATCH("\\s", " ");
+    MATCH("\\s", "\t");
+    MATCH("\\s", "\n");
+    NO_MATCH("\\s", "a");
+    NO_MATCH("\\s", "5");
+    MATCH("\\s+", "   ");
+    MATCH("\\s+", " \t\n");
+    MATCH("\\S", "a");
+    MATCH("\\S", "5");
+    MATCH("\\S", "!");
+    NO_MATCH("\\S", " ");
+    NO_MATCH("\\S", "\t");
+}
+
+static void test_quantifier_braces(void) {
+    printf("  brace quantifiers...\n");
+    MATCH("a{3}", "aaa");
+    MATCH("a{3}", "aaaa");
+    NO_MATCH("a{3}", "aa");
+    MATCH("a{1}", "a");
+    MATCH("a{1}", "aa");
+    NO_MATCH("a{1}", "");
+    MATCH("a{0}", "");
+    MATCH("a{0}", "b");
+    MATCH("a{2,4}", "aa");
+    MATCH("a{2,4}", "aaa");
+    MATCH("a{2,4}", "aaaa");
+    MATCH("a{2,4}", "aaaaa");
+    NO_MATCH("a{2,4}", "a");
+    MATCH("a{2,}", "aa");
+    MATCH("a{2,}", "aaa");
+    MATCH("a{2,}", "aaaaaaaaaa");
+    NO_MATCH("a{2,}", "a");
+    MATCH("a{0,2}", "");
+    MATCH("a{0,2}", "a");
+    MATCH("a{0,2}", "aa");
+    MATCH("a{0,2}", "aaa");
+    MATCH("[0-9]{3}", "123");
+    MATCH("[0-9]{3}", "000");
+    NO_MATCH("[0-9]{3}", "12");
+    MATCH("(ab){2}", "abab");
+    MATCH("(ab){2}", "ababab");
+    NO_MATCH("(ab){2}", "ab");
+}
+
+static void test_escape_sequences(void) {
+    printf("  escape sequences...\n");
+    MATCH("\\.", ".");
+    NO_MATCH("\\.", "a");
+    MATCH("\\*", "*");
+    NO_MATCH("\\*", "a");
+    MATCH("\\+", "+");
+    MATCH("\\?", "?");
+    MATCH("\\|", "|");
+    MATCH("\\(", "(");
+    MATCH("\\)", ")");
+    MATCH("\\[", "[");
+    MATCH("\\]", "]");
+    MATCH("\\{", "{");
+    MATCH("\\}", "}");
+    MATCH("\\^", "^");
+    MATCH("\\$", "$");
+    MATCH("\\\\", "\\");
+    MATCH("a\\.b", "a.b");
+    NO_MATCH("a\\.b", "aXb");
+    MATCH("\\d\\.\\d", "1.5");
+    MATCH("c\\+\\+", "c++");
+    MATCH("\\(test\\)", "(test)");
+    MATCH("\\[0\\]", "[0]");
+}
+
+static void test_complex_patterns(void) {
+    printf("  complex patterns...\n");
+    MATCH("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", "user@example.com");
+    MATCH("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", "test.user@mail.example.org");
+    NO_MATCH("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", "invalid");
+    NO_MATCH("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}", "@example.com");
+    MATCH("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}", "192.168.1.1");
+    MATCH("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}", "10.0.0.1");
+    MATCH("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}", "255.255.255.255");
+    NO_MATCH("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}", "1.2.3");
+    MATCH("https?://[a-zA-Z0-9.-]+(/[a-zA-Z0-9./-]*)?", "http://example.com");
+    MATCH("https?://[a-zA-Z0-9.-]+(/[a-zA-Z0-9./-]*)?", "https://example.com");
+    MATCH("https?://[a-zA-Z0-9.-]+(/[a-zA-Z0-9./-]*)?", "http://example.com/path");
+    MATCH("https?://[a-zA-Z0-9.-]+(/[a-zA-Z0-9./-]*)?", "https://example.com/path/to/page");
+    MATCH("\\d{3}-\\d{3}-\\d{4}", "123-456-7890");
+    MATCH("\\d{3}-\\d{3}-\\d{4}", "555-123-4567");
+    NO_MATCH("\\d{3}-\\d{3}-\\d{4}", "12-345-6789");
+    NO_MATCH("\\d{3}-\\d{3}-\\d{4}", "1234567890");
+    MATCH("\\(\\d{3}\\) \\d{3}-\\d{4}", "(123) 456-7890");
+    MATCH("[A-Z]{2}\\d{6}", "AB123456");
+    NO_MATCH("[A-Z]{2}\\d{6}", "A1234567");
+    MATCH("\\d{4}-\\d{2}-\\d{2}", "2024-01-15");
+    MATCH("\\d{2}/\\d{2}/\\d{4}", "01/15/2024");
+    MATCH("\\d{1,2}:\\d{2}(:\\d{2})?", "12:30");
+    MATCH("\\d{1,2}:\\d{2}(:\\d{2})?", "12:30:45");
+    MATCH("\\d{1,2}:\\d{2}(:\\d{2})?", "9:05");
+}
+
+static void test_word_boundaries(void) {
+    printf("  word patterns...\n");
+    MATCH("\\w+", "hello");
+    MATCH("\\w+", "hello123");
+    MATCH("\\w+", "test_var");
+    MATCH("[a-zA-Z_][a-zA-Z0-9_]*", "variable");
+    MATCH("[a-zA-Z_][a-zA-Z0-9_]*", "_private");
+    MATCH("[a-zA-Z_][a-zA-Z0-9_]*", "var123");
+    NO_MATCH("^[a-zA-Z_][a-zA-Z0-9_]*$", "123var");
+    MATCH("\\w+\\s+\\w+", "hello world");
+    MATCH("\\w+\\s+\\w+", "foo bar");
+    NO_MATCH("\\w+\\s+\\w+", "hello");
+}
+
+static void test_greedy_vs_nongreedy(void) {
+    printf("  greedy vs non-greedy...\n");
+    MATCH("a+", "aaa");
+    MATCH("a+?", "aaa");
+    MATCH("a*", "aaa");
+    MATCH("a*?", "aaa");
+    MATCH("a?", "a");
+    MATCH("a??", "a");
+    MATCH("a{2,4}", "aaaa");
+    MATCH("a{2,4}?", "aaaa");
+    MATCH(".*x", "abcx");
+    MATCH(".*?x", "abcx");
+}
+
+static void test_empty_and_edge_cases(void) {
+    printf("  empty and edge cases...\n");
+    MATCH("", "");
+    MATCH("", "abc");
+    MATCH("a*", "");
+    MATCH("a?", "");
+    MATCH("(a*)*", "");
+    MATCH("(a*)+", "");
+    MATCH("(a+)*", "");
+    MATCH("(a|b)*", "");
+    MATCH("[a-z]*", "");
+    NO_MATCH("a+", "");
+    NO_MATCH(".+", "");
+    NO_MATCH("[a-z]+", "");
+    MATCH("^", "");
+    MATCH("$", "");
+    MATCH("^$", "");
+    NO_MATCH("^$", "a");
+    MATCH("a*b*c*", "");
+    MATCH("a*b*c*", "abc");
+    MATCH("a*b*c*", "aabbcc");
+    MATCH("a*b*c*", "c");
+    MATCH("a*b*c*", "b");
+}
+
+static void test_special_characters_in_text(void) {
+    printf("  special characters in text...\n");
+    MATCH("a", "a\nb");
+    MATCH("b", "a\nb");
+    MATCH("a.b", "a\tb");
+    NO_MATCH("a.b", "a\nb");
+    MATCH("\\.", "3.14");
+    MATCH("\\+", "1+2");
+    MATCH("\\*", "2*3");
+    MATCH("\\?", "why?");
+    MATCH("\\(\\)", "func()");
+    MATCH("\\[\\]", "array[]");
+    MATCH("\\{\\}", "object{}");
+    MATCH("\\^", "x^2");
+    MATCH("\\$", "$100");
+    MATCH("\\|", "a|b");
+}
+
+static void test_repetition_combinations(void) {
+    printf("  repetition combinations...\n");
+    MATCH("a+b+", "ab");
+    MATCH("a+b+", "aabb");
+    MATCH("a+b+", "aaabbb");
+    NO_MATCH("a+b+", "a");
+    NO_MATCH("a+b+", "b");
+    MATCH("a*b+", "b");
+    MATCH("a*b+", "ab");
+    MATCH("a*b+", "aab");
+    MATCH("a+b*", "a");
+    MATCH("a+b*", "ab");
+    MATCH("a+b*", "abb");
+    MATCH("a*b*", "");
+    MATCH("a*b*", "a");
+    MATCH("a*b*", "b");
+    MATCH("a*b*", "ab");
+    MATCH("(ab)+c+", "abc");
+    MATCH("(ab)+c+", "ababcc");
+    MATCH("(a+b)+", "ab");
+    MATCH("(a+b)+", "aabaaab");
+    MATCH("((a+)+)+", "a");
+    MATCH("((a+)+)+", "aaa");
+}
+
+static void test_alternation_combinations(void) {
+    printf("  alternation combinations...\n");
+    MATCH("a|b|c|d|e", "a");
+    MATCH("a|b|c|d|e", "e");
+    NO_MATCH("a|b|c|d|e", "f");
+    MATCH("(a|b)(c|d)", "ac");
+    MATCH("(a|b)(c|d)", "ad");
+    MATCH("(a|b)(c|d)", "bc");
+    MATCH("(a|b)(c|d)", "bd");
+    NO_MATCH("(a|b)(c|d)", "ab");
+    MATCH("(cat|dog)s?", "cat");
+    MATCH("(cat|dog)s?", "cats");
+    MATCH("(cat|dog)s?", "dog");
+    MATCH("(cat|dog)s?", "dogs");
+    MATCH("(red|green|blue)\\s+(car|truck)", "red car");
+    MATCH("(red|green|blue)\\s+(car|truck)", "green truck");
+    MATCH("(a|aa|aaa)", "aaa");
+    MATCH("(aaa|aa|a)", "aaa");
+}
+
+static void test_nested_groups(void) {
+    printf("  nested groups...\n");
+    MATCH("((a))", "a");
+    MATCH("(((a)))", "a");
+    MATCH("((a)(b))", "ab");
+    MATCH("((a(b))c)", "abc");
+    MATCH("(a(b(c)))", "abc");
+    MATCH("((a|b)(c|d))", "ac");
+    MATCH("(a(b|c)d)", "abd");
+    MATCH("(a(b|c)d)", "acd");
+    MATCH("((ab)+)", "abab");
+    MATCH("(a(bc)*d)", "ad");
+    MATCH("(a(bc)*d)", "abcd");
+    MATCH("(a(bc)*d)", "abcbcd");
+    MATCH("((a+)(b+))", "aabb");
+    MATCH("(((a|b)+)c)", "ababc");
+}
+
+static void test_real_world_patterns(void) {
+    printf("  real world patterns...\n");
+    MATCH("[a-zA-Z]+", "Hello");
+    MATCH("[a-zA-Z]+", "WORLD");
+    MATCH("[a-zA-Z]+", "test");
+    MATCH("-?\\d+", "123");
+    MATCH("-?\\d+", "-456");
+    MATCH("-?\\d+", "0");
+    MATCH("-?\\d+\\.?\\d*", "3.14");
+    MATCH("-?\\d+\\.?\\d*", "-2.5");
+    MATCH("-?\\d+\\.?\\d*", "42");
+    MATCH("[a-fA-F0-9]+", "deadbeef");
+    MATCH("[a-fA-F0-9]+", "CAFEBABE");
+    MATCH("[a-fA-F0-9]+", "123abc");
+    MATCH("[01]+", "101010");
+    MATCH("[01]+", "11110000");
+    MATCH("[A-Z][a-z]+", "Hello");
+    MATCH("[A-Z][a-z]+", "World");
+    NO_MATCH("[A-Z][a-z]+", "hello");
+    MATCH("\"[^\"]*\"", "\"hello\"");
+    MATCH("\"[^\"]*\"", "\"hello world\"");
+    MATCH("\"[^\"]*\"", "\"\"");
+    MATCH("'[^']*'", "'test'");
+    MATCH("#[a-fA-F0-9]{6}", "#ff0000");
+    MATCH("#[a-fA-F0-9]{6}", "#00FF00");
+    MATCH("#[a-fA-F0-9]{3}", "#f00");
+}
+
+static void test_pathological_patterns(void) {
+    printf("  stress test patterns...\n");
+    MATCH("a?a?a?aaa", "aaa");
+    MATCH("(a+)+", "aaaa");
+    MATCH("(a*)*", "aaaa");
+    MATCH("(a|a)+", "aaaa");
+    MATCH("((a*)*)*", "aaaa");
+    MATCH("a*a*a*a*a*b", "aaaaab");
+    MATCH(".*.*.*.*.*", "test");
+    MATCH("(a?){5}a{5}", "aaaaa");
+}
+
+int main(void) {
+    printf("loreg integration tests\n");
+    printf("=======================\n\n");
+
+    test_literals();
+    test_dot();
+    test_anchors();
+    test_star();
+    test_plus();
+    test_question();
+    test_alternation();
+    test_groups();
+    test_bracket_simple();
+    test_bracket_ranges();
+    test_bracket_negated();
+    test_character_classes();
+    test_quantifier_braces();
+    test_escape_sequences();
+    test_complex_patterns();
+    test_word_boundaries();
+    test_greedy_vs_nongreedy();
+    test_empty_and_edge_cases();
+    test_special_characters_in_text();
+    test_repetition_combinations();
+    test_alternation_combinations();
+    test_nested_groups();
+    test_real_world_patterns();
+    test_pathological_patterns();
+
+    printf("\n=======================\n");
+    printf("integration: %d passed, %d failed\n", passed, failed);
+    printf("total tests: %d\n", passed + failed);
+
+    return failed > 0 ? 1 : 0;
+}
--- a/tests/test_lexer.c
+++ b/tests/test_lexer.c
@ -0,0 +1,195 @@
+/* retoor <retoor@molodetz.nl> */
+#include "../include/lexer.h"
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+static int tests_passed = 0;
+static int tests_failed = 0;
+
+#define TEST(name) static void test_##name(void)
+#define RUN_TEST(name) do { \
+    printf("  %s... ", #name); \
+    test_##name(); \
+    printf("ok\n"); \
+    tests_passed++; \
+} while(0)
+
+#define ASSERT(cond) do { \
+    if (!(cond)) { \
+        printf("FAILED at line %d: %s\n", __LINE__, #cond); \
+        tests_failed++; \
+        return; \
+    } \
+} while(0)
+
+TEST(simple_chars) {
+    lexer_t lexer;
+    lexer_init(&lexer, "abc");
+
+    token_t t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'b');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'c');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_EOF);
+}
+
+TEST(meta_chars) {
+    lexer_t lexer;
+    lexer_init(&lexer, ".*+?|()^$");
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_DOT);
+    ASSERT(lexer_next(&lexer).type == TOKEN_STAR);
+    ASSERT(lexer_next(&lexer).type == TOKEN_PLUS);
+    ASSERT(lexer_next(&lexer).type == TOKEN_QUESTION);
+    ASSERT(lexer_next(&lexer).type == TOKEN_PIPE);
+    ASSERT(lexer_next(&lexer).type == TOKEN_LPAREN);
+    ASSERT(lexer_next(&lexer).type == TOKEN_RPAREN);
+    ASSERT(lexer_next(&lexer).type == TOKEN_CARET);
+    ASSERT(lexer_next(&lexer).type == TOKEN_DOLLAR);
+    ASSERT(lexer_next(&lexer).type == TOKEN_EOF);
+}
+
+TEST(escaped_chars) {
+    lexer_t lexer;
+    lexer_init(&lexer, "\\*\\+\\.");
+
+    token_t t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == '*');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == '+');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == '.');
+}
+
+TEST(character_classes) {
+    lexer_t lexer;
+    lexer_init(&lexer, "\\d\\w\\s\\D\\W\\S");
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_DIGIT);
+    ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_WORD);
+    ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_SPACE);
+    ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_NDIGIT);
+    ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_NWORD);
+    ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_NSPACE);
+}
+
+TEST(bracket_expression) {
+    lexer_t lexer;
+    lexer_init(&lexer, "[abc]");
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_LBRACKET);
+
+    token_t t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'b');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'c');
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_RBRACKET);
+}
+
+TEST(bracket_range) {
+    lexer_t lexer;
+    lexer_init(&lexer, "[a-z]");
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_LBRACKET);
+
+    token_t t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_DASH);
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'z');
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_RBRACKET);
+}
+
+TEST(negated_bracket) {
+    lexer_t lexer;
+    lexer_init(&lexer, "[^a]");
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_LBRACKET);
+    ASSERT(lexer_next(&lexer).type == TOKEN_CARET);
+
+    token_t t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_RBRACKET);
+}
+
+TEST(quantifier_braces) {
+    lexer_t lexer;
+    lexer_init(&lexer, "a{3}");
+
+    token_t t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_LBRACE);
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == '3');
+
+    ASSERT(lexer_next(&lexer).type == TOKEN_RBRACE);
+}
+
+TEST(peek) {
+    lexer_t lexer;
+    lexer_init(&lexer, "ab");
+
+    token_t t = lexer_peek(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
+
+    t = lexer_peek(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
+
+    t = lexer_peek(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == 'b');
+}
+
+TEST(escape_sequences) {
+    lexer_t lexer;
+    lexer_init(&lexer, "\\n\\t\\r");
+
+    token_t t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == '\n');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == '\t');
+
+    t = lexer_next(&lexer);
+    ASSERT(t.type == TOKEN_CHAR && t.value == '\r');
+}
+
+int main(void) {
+    printf("lexer tests:\n");
+
+    RUN_TEST(simple_chars);
+    RUN_TEST(meta_chars);
+    RUN_TEST(escaped_chars);
+    RUN_TEST(character_classes);
+    RUN_TEST(bracket_expression);
+    RUN_TEST(bracket_range);
+    RUN_TEST(negated_bracket);
+    RUN_TEST(quantifier_braces);
+    RUN_TEST(peek);
+    RUN_TEST(escape_sequences);
+
+    printf("\nlexer: %d passed, %d failed\n", tests_passed, tests_failed);
+    return tests_failed > 0 ? 1 : 0;
+}
--- a/tests/test_matcher.c
+++ b/tests/test_matcher.c
@ -0,0 +1,294 @@
+/* retoor <retoor@molodetz.nl> */
+#include "../include/loreg.h"
+#include <stdio.h>
+#include <string.h>
+
+static int tests_passed = 0;
+static int tests_failed = 0;
+
+#define TEST(name) static void test_##name(void)
+#define RUN_TEST(name) do { \
+    printf("  %s... ", #name); \
+    test_##name(); \
+    printf("ok\n"); \
+    tests_passed++; \
+} while(0)
+
+#define ASSERT(cond) do { \
+    if (!(cond)) { \
+        printf("FAILED at line %d: %s\n", __LINE__, #cond); \
+        tests_failed++; \
+        return; \
+    } \
+} while(0)
+
+#define ASSERT_MATCH(pattern, text) do { \
+    loreg_error_t err; \
+    loreg_regex_t *re = loreg_compile(pattern, &err); \
+    ASSERT(re != NULL); \
+    loreg_match_t result; \
+    ASSERT(loreg_search(re, text, &result) == true); \
+    loreg_free(re); \
+} while(0)
+
+#define ASSERT_NO_MATCH(pattern, text) do { \
+    loreg_error_t err; \
+    loreg_regex_t *re = loreg_compile(pattern, &err); \
+    ASSERT(re != NULL); \
+    loreg_match_t result; \
+    ASSERT(loreg_search(re, text, &result) == false); \
+    loreg_free(re); \
+} while(0)
+
+TEST(simple_char) {
+    ASSERT_MATCH("a", "a");
+    ASSERT_MATCH("a", "bab");
+    ASSERT_NO_MATCH("a", "bcd");
+}
+
+TEST(concat) {
+    ASSERT_MATCH("ab", "ab");
+    ASSERT_MATCH("ab", "xaby");
+    ASSERT_NO_MATCH("ab", "ba");
+}
+
+TEST(alternation) {
+    ASSERT_MATCH("a|b", "a");
+    ASSERT_MATCH("a|b", "b");
+    ASSERT_MATCH("cat|dog", "cat");
+    ASSERT_MATCH("cat|dog", "dog");
+    ASSERT_NO_MATCH("cat|dog", "rat");
+}
+
+TEST(star) {
+    ASSERT_MATCH("a*", "");
+    ASSERT_MATCH("a*", "a");
+    ASSERT_MATCH("a*", "aaa");
+    ASSERT_MATCH("a*b", "b");
+    ASSERT_MATCH("a*b", "ab");
+    ASSERT_MATCH("a*b", "aaab");
+}
+
+TEST(plus) {
+    ASSERT_NO_MATCH("a+", "");
+    ASSERT_MATCH("a+", "a");
+    ASSERT_MATCH("a+", "aaa");
+    ASSERT_MATCH("a+b", "ab");
+    ASSERT_MATCH("a+b", "aaab");
+}
+
+TEST(question) {
+    ASSERT_MATCH("a?", "");
+    ASSERT_MATCH("a?", "a");
+    ASSERT_MATCH("a?b", "b");
+    ASSERT_MATCH("a?b", "ab");
+}
+
+TEST(dot) {
+    ASSERT_MATCH(".", "a");
+    ASSERT_MATCH(".", "x");
+    ASSERT_MATCH("a.b", "aab");
+    ASSERT_MATCH("a.b", "axb");
+    ASSERT_NO_MATCH("a.b", "ab");
+}
+
+TEST(bracket_simple) {
+    ASSERT_MATCH("[abc]", "a");
+    ASSERT_MATCH("[abc]", "b");
+    ASSERT_MATCH("[abc]", "c");
+    ASSERT_NO_MATCH("[abc]", "d");
+}
+
+TEST(bracket_range) {
+    ASSERT_MATCH("[a-z]", "a");
+    ASSERT_MATCH("[a-z]", "m");
+    ASSERT_MATCH("[a-z]", "z");
+    ASSERT_NO_MATCH("[a-z]", "A");
+    ASSERT_NO_MATCH("[a-z]", "0");
+}
+
+TEST(bracket_negated) {
+    ASSERT_NO_MATCH("[^abc]", "a");
+    ASSERT_NO_MATCH("[^abc]", "b");
+    ASSERT_MATCH("[^abc]", "d");
+    ASSERT_MATCH("[^abc]", "x");
+}
+
+TEST(group) {
+    ASSERT_MATCH("(ab)", "ab");
+    ASSERT_MATCH("(ab)+", "abab");
+    ASSERT_MATCH("(a|b)+", "abba");
+}
+
+TEST(anchors) {
+    ASSERT_MATCH("^a", "a");
+    ASSERT_MATCH("^a", "abc");
+    ASSERT_NO_MATCH("^a", "ba");
+
+    ASSERT_MATCH("a$", "a");
+    ASSERT_MATCH("a$", "ba");
+    ASSERT_NO_MATCH("a$", "ab");
+
+    ASSERT_MATCH("^abc$", "abc");
+    ASSERT_NO_MATCH("^abc$", "xabc");
+    ASSERT_NO_MATCH("^abc$", "abcx");
+}
+
+TEST(quantifier_exact) {
+    ASSERT_MATCH("a{3}", "aaa");
+    ASSERT_MATCH("a{3}", "aaaa");
+    ASSERT_NO_MATCH("a{3}", "aa");
+}
+
+TEST(quantifier_range) {
+    ASSERT_MATCH("a{2,4}", "aa");
+    ASSERT_MATCH("a{2,4}", "aaa");
+    ASSERT_MATCH("a{2,4}", "aaaa");
+    ASSERT_NO_MATCH("a{2,4}", "a");
+}
+
+TEST(quantifier_open) {
+    ASSERT_MATCH("a{2,}", "aa");
+    ASSERT_MATCH("a{2,}", "aaaaa");
+    ASSERT_NO_MATCH("a{2,}", "a");
+}
+
+TEST(class_digit) {
+    ASSERT_MATCH("\\d", "0");
+    ASSERT_MATCH("\\d", "9");
+    ASSERT_MATCH("\\d+", "123");
+    ASSERT_NO_MATCH("\\d", "a");
+}
+
+TEST(class_word) {
+    ASSERT_MATCH("\\w", "a");
+    ASSERT_MATCH("\\w", "Z");
+    ASSERT_MATCH("\\w", "0");
+    ASSERT_MATCH("\\w", "_");
+    ASSERT_NO_MATCH("\\w", " ");
+    ASSERT_NO_MATCH("\\w", "-");
+}
+
+TEST(class_space) {
+    ASSERT_MATCH("\\s", " ");
+    ASSERT_MATCH("\\s", "\t");
+    ASSERT_MATCH("\\s", "\n");
+    ASSERT_NO_MATCH("\\s", "a");
+}
+
+TEST(class_negated) {
+    ASSERT_NO_MATCH("\\D", "0");
+    ASSERT_MATCH("\\D", "a");
+    ASSERT_NO_MATCH("\\W", "a");
+    ASSERT_MATCH("\\W", " ");
+    ASSERT_NO_MATCH("\\S", " ");
+    ASSERT_MATCH("\\S", "a");
+}
+
+TEST(escape_sequences) {
+    ASSERT_MATCH("\\.", ".");
+    ASSERT_NO_MATCH("\\.", "a");
+    ASSERT_MATCH("\\*", "*");
+    ASSERT_MATCH("\\+", "+");
+    ASSERT_MATCH("\\?", "?");
+}
+
+TEST(complex_email) {
+    ASSERT_MATCH("[a-z]+@[a-z]+\\.[a-z]+", "test@example.com");
+    ASSERT_NO_MATCH("[a-z]+@[a-z]+\\.[a-z]+", "invalid");
+}
+
+TEST(complex_phone) {
+    ASSERT_MATCH("\\d{3}-\\d{3}-\\d{4}", "123-456-7890");
+    ASSERT_NO_MATCH("\\d{3}-\\d{3}-\\d{4}", "123-456-789");
+}
+
+TEST(complex_url) {
+    ASSERT_MATCH("https?://[a-z]+\\.[a-z]+", "http://example.com");
+    ASSERT_MATCH("https?://[a-z]+\\.[a-z]+", "https://example.com");
+}
+
+TEST(group_capture) {
+    loreg_error_t err;
+    loreg_regex_t *re = loreg_compile("(\\d+)-(\\d+)", &err);
+    ASSERT(re != NULL);
+
+    loreg_match_t result;
+    ASSERT(loreg_search(re, "123-456", &result));
+    ASSERT(result.group_count == 2);
+    ASSERT(result.groups[0].matched);
+    ASSERT(result.groups[1].matched);
+
+    loreg_free(re);
+}
+
+TEST(nested_groups) {
+    loreg_error_t err;
+    loreg_regex_t *re = loreg_compile("((a)(b))", &err);
+    ASSERT(re != NULL);
+
+    loreg_match_t result;
+    ASSERT(loreg_search(re, "ab", &result));
+    ASSERT(result.group_count == 3);
+
+    loreg_free(re);
+}
+
+TEST(empty_pattern) {
+    loreg_error_t err;
+    loreg_regex_t *re = loreg_compile("", &err);
+    ASSERT(re != NULL);
+
+    loreg_match_t result;
+    ASSERT(loreg_match(re, "anything", &result));
+
+    loreg_free(re);
+}
+
+TEST(match_position) {
+    loreg_error_t err;
+    loreg_regex_t *re = loreg_compile("test", &err);
+    ASSERT(re != NULL);
+
+    loreg_match_t result;
+    ASSERT(loreg_search(re, "xxxtestyyy", &result));
+    ASSERT(result.match_start == 3);
+    ASSERT(result.match_end == 7);
+
+    loreg_free(re);
+}
+
+int main(void) {
+    printf("matcher tests:\n");
+
+    RUN_TEST(simple_char);
+    RUN_TEST(concat);
+    RUN_TEST(alternation);
+    RUN_TEST(star);
+    RUN_TEST(plus);
+    RUN_TEST(question);
+    RUN_TEST(dot);
+    RUN_TEST(bracket_simple);
+    RUN_TEST(bracket_range);
+    RUN_TEST(bracket_negated);
+    RUN_TEST(group);
+    RUN_TEST(anchors);
+    RUN_TEST(quantifier_exact);
+    RUN_TEST(quantifier_range);
+    RUN_TEST(quantifier_open);
+    RUN_TEST(class_digit);
+    RUN_TEST(class_word);
+    RUN_TEST(class_space);
+    RUN_TEST(class_negated);
+    RUN_TEST(escape_sequences);
+    RUN_TEST(complex_email);
+    RUN_TEST(complex_phone);
+    RUN_TEST(complex_url);
+    RUN_TEST(group_capture);
+    RUN_TEST(nested_groups);
+    RUN_TEST(empty_pattern);
+    RUN_TEST(match_position);
+
+    printf("\nmatcher: %d passed, %d failed\n", tests_passed, tests_failed);
+    return tests_failed > 0 ? 1 : 0;
+}
--- a/tests/test_nfa.c
+++ b/tests/test_nfa.c
@ -0,0 +1,159 @@
+/* retoor <retoor@molodetz.nl> */
+#include "../include/nfa.h"
+#include "../include/parser.h"
+#include <stdio.h>
+#include <assert.h>
+
+static int tests_passed = 0;
+static int tests_failed = 0;
+
+#define TEST(name) static void test_##name(void)
+#define RUN_TEST(name) do { \
+    printf("  %s... ", #name); \
+    test_##name(); \
+    printf("ok\n"); \
+    tests_passed++; \
+} while(0)
+
+#define ASSERT(cond) do { \
+    if (!(cond)) { \
+        printf("FAILED at line %d: %s\n", __LINE__, #cond); \
+        tests_failed++; \
+        return; \
+    } \
+} while(0)
+
+static nfa_t *compile_pattern(const char *pattern) {
+    parser_t parser;
+    parser_init(&parser, pattern);
+    ast_node_t *ast = parser_parse(&parser);
+    if (!ast || parser_get_error(&parser) != LOREG_OK) {
+        ast_free(ast);
+        return NULL;
+    }
+    loreg_error_t error;
+    nfa_t *nfa = nfa_from_ast(ast, &error);
+    ast_free(ast);
+    return nfa;
+}
+
+TEST(single_char) {
+    nfa_t *nfa = compile_pattern("a");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    ASSERT(nfa->state_count >= 2);
+    nfa_free(nfa);
+}
+
+TEST(concat) {
+    nfa_t *nfa = compile_pattern("ab");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+TEST(alternation) {
+    nfa_t *nfa = compile_pattern("a|b");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+TEST(star) {
+    nfa_t *nfa = compile_pattern("a*");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+TEST(plus) {
+    nfa_t *nfa = compile_pattern("a+");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+TEST(question) {
+    nfa_t *nfa = compile_pattern("a?");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+TEST(group) {
+    nfa_t *nfa = compile_pattern("(ab)");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->group_count == 1);
+    nfa_free(nfa);
+}
+
+TEST(nested_groups) {
+    nfa_t *nfa = compile_pattern("((a)(b))");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->group_count == 3);
+    nfa_free(nfa);
+}
+
+TEST(bracket) {
+    nfa_t *nfa = compile_pattern("[abc]");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+TEST(quantifier) {
+    nfa_t *nfa = compile_pattern("a{2,4}");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+TEST(complex_pattern) {
+    nfa_t *nfa = compile_pattern("^([a-z]+)@([a-z]+)\\.([a-z]{2,})$");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->group_count == 3);
+    nfa_free(nfa);
+}
+
+TEST(dot) {
+    nfa_t *nfa = compile_pattern("a.b");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+TEST(anchors) {
+    nfa_t *nfa = compile_pattern("^abc$");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+TEST(character_classes) {
+    nfa_t *nfa = compile_pattern("\\d\\w\\s");
+    ASSERT(nfa != NULL);
+    ASSERT(nfa->start != NULL);
+    nfa_free(nfa);
+}
+
+int main(void) {
+    printf("nfa tests:\n");
+
+    RUN_TEST(single_char);
+    RUN_TEST(concat);
+    RUN_TEST(alternation);
+    RUN_TEST(star);
+    RUN_TEST(plus);
+    RUN_TEST(question);
+    RUN_TEST(group);
+    RUN_TEST(nested_groups);
+    RUN_TEST(bracket);
+    RUN_TEST(quantifier);
+    RUN_TEST(complex_pattern);
+    RUN_TEST(dot);
+    RUN_TEST(anchors);
+    RUN_TEST(character_classes);
+
+    printf("\nnfa: %d passed, %d failed\n", tests_passed, tests_failed);
+    return tests_failed > 0 ? 1 : 0;
+}
--- a/tests/test_parser.c
+++ b/tests/test_parser.c
@ -0,0 +1,301 @@
+/* retoor <retoor@molodetz.nl> */
+#include "../include/parser.h"
+#include <stdio.h>
+#include <assert.h>
+
+static int tests_passed = 0;
+static int tests_failed = 0;
+
+#define TEST(name) static void test_##name(void)
+#define RUN_TEST(name) do { \
+    printf("  %s... ", #name); \
+    test_##name(); \
+    printf("ok\n"); \
+    tests_passed++; \
+} while(0)
+
+#define ASSERT(cond) do { \
+    if (!(cond)) { \
+        printf("FAILED at line %d: %s\n", __LINE__, #cond); \
+        tests_failed++; \
+        return; \
+    } \
+} while(0)
+
+TEST(single_char) {
+    parser_t parser;
+    parser_init(&parser, "a");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_CHAR);
+    ASSERT(ast->value == 'a');
+
+    ast_free(ast);
+}
+
+TEST(concat) {
+    parser_t parser;
+    parser_init(&parser, "ab");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_CONCAT);
+    ASSERT(ast->left->type == AST_CHAR);
+    ASSERT(ast->left->value == 'a');
+    ASSERT(ast->right->type == AST_CHAR);
+    ASSERT(ast->right->value == 'b');
+
+    ast_free(ast);
+}
+
+TEST(alternation) {
+    parser_t parser;
+    parser_init(&parser, "a|b");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_ALTER);
+    ASSERT(ast->left->type == AST_CHAR);
+    ASSERT(ast->left->value == 'a');
+    ASSERT(ast->right->type == AST_CHAR);
+    ASSERT(ast->right->value == 'b');
+
+    ast_free(ast);
+}
+
+TEST(star) {
+    parser_t parser;
+    parser_init(&parser, "a*");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_STAR);
+    ASSERT(ast->left->type == AST_CHAR);
+    ASSERT(ast->left->value == 'a');
+
+    ast_free(ast);
+}
+
+TEST(plus) {
+    parser_t parser;
+    parser_init(&parser, "a+");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_PLUS);
+    ASSERT(ast->left->type == AST_CHAR);
+    ASSERT(ast->left->value == 'a');
+
+    ast_free(ast);
+}
+
+TEST(question) {
+    parser_t parser;
+    parser_init(&parser, "a?");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_QUESTION);
+    ASSERT(ast->left->type == AST_CHAR);
+    ASSERT(ast->left->value == 'a');
+
+    ast_free(ast);
+}
+
+TEST(group) {
+    parser_t parser;
+    parser_init(&parser, "(ab)");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_GROUP);
+    ASSERT(ast->group_id == 0);
+    ASSERT(ast->left->type == AST_CONCAT);
+
+    ast_free(ast);
+}
+
+TEST(dot) {
+    parser_t parser;
+    parser_init(&parser, ".");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_DOT);
+
+    ast_free(ast);
+}
+
+TEST(anchors) {
+    parser_t parser;
+    parser_init(&parser, "^a$");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_CONCAT);
+
+    ast_free(ast);
+}
+
+TEST(bracket_simple) {
+    parser_t parser;
+    parser_init(&parser, "[abc]");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_BRACKET);
+    ASSERT(ast->bracket != NULL);
+    ASSERT(ast->bracket->count == 3);
+
+    ast_free(ast);
+}
+
+TEST(bracket_range) {
+    parser_t parser;
+    parser_init(&parser, "[a-z]");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_BRACKET);
+    ASSERT(ast->bracket != NULL);
+    ASSERT(ast->bracket->count == 1);
+    ASSERT(ast->bracket->ranges[0].start == 'a');
+    ASSERT(ast->bracket->ranges[0].end == 'z');
+
+    ast_free(ast);
+}
+
+TEST(bracket_negated) {
+    parser_t parser;
+    parser_init(&parser, "[^a]");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_BRACKET);
+    ASSERT(ast->bracket->negated == true);
+
+    ast_free(ast);
+}
+
+TEST(quantifier_exact) {
+    parser_t parser;
+    parser_init(&parser, "a{3}");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_QUANTIFIER);
+    ASSERT(ast->quant.min == 3);
+    ASSERT(ast->quant.max == 3);
+
+    ast_free(ast);
+}
+
+TEST(quantifier_range) {
+    parser_t parser;
+    parser_init(&parser, "a{2,5}");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_QUANTIFIER);
+    ASSERT(ast->quant.min == 2);
+    ASSERT(ast->quant.max == 5);
+
+    ast_free(ast);
+}
+
+TEST(quantifier_open) {
+    parser_t parser;
+    parser_init(&parser, "a{2,}");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_QUANTIFIER);
+    ASSERT(ast->quant.min == 2);
+    ASSERT(ast->quant.max == -1);
+
+    ast_free(ast);
+}
+
+TEST(character_class_digit) {
+    parser_t parser;
+    parser_init(&parser, "\\d");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_CLASS_DIGIT);
+
+    ast_free(ast);
+}
+
+TEST(character_class_word) {
+    parser_t parser;
+    parser_init(&parser, "\\w");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_CLASS_WORD);
+
+    ast_free(ast);
+}
+
+TEST(complex_pattern) {
+    parser_t parser;
+    parser_init(&parser, "^([a-z]+)@([a-z]+)\\.([a-z]{2,})$");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(parser_get_error(&parser) == LOREG_OK);
+
+    ast_free(ast);
+}
+
+TEST(unbalanced_paren) {
+    parser_t parser;
+    parser_init(&parser, "(abc");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast == NULL || parser_get_error(&parser) == LOREG_ERR_UNBALANCED_PAREN);
+
+    ast_free(ast);
+}
+
+TEST(non_greedy) {
+    parser_t parser;
+    parser_init(&parser, "a*?");
+    ast_node_t *ast = parser_parse(&parser);
+
+    ASSERT(ast != NULL);
+    ASSERT(ast->type == AST_STAR);
+    ASSERT(ast->quant.greedy == false);
+
+    ast_free(ast);
+}
+
+int main(void) {
+    printf("parser tests:\n");
+
+    RUN_TEST(single_char);
+    RUN_TEST(concat);
+    RUN_TEST(alternation);
+    RUN_TEST(star);
+    RUN_TEST(plus);
+    RUN_TEST(question);
+    RUN_TEST(group);
+    RUN_TEST(dot);
+    RUN_TEST(anchors);
+    RUN_TEST(bracket_simple);
+    RUN_TEST(bracket_range);
+    RUN_TEST(bracket_negated);
+    RUN_TEST(quantifier_exact);
+    RUN_TEST(quantifier_range);
+    RUN_TEST(quantifier_open);
+    RUN_TEST(character_class_digit);
+    RUN_TEST(character_class_word);
+    RUN_TEST(complex_pattern);
+    RUN_TEST(unbalanced_paren);
+    RUN_TEST(non_greedy);
+
+    printf("\nparser: %d passed, %d failed\n", tests_passed, tests_failed);
+    return tests_failed > 0 ? 1 : 0;
+}