commit 6a65176b159ecb250c36e5afd779b62900ef6533 Author: retoor Date: Tue Jan 14 19:05:52 2025 +0100 New repository. diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..6f46b94 --- /dev/null +++ b/.clang-format @@ -0,0 +1,192 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveMacros: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: true +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +QualifierAlignment: Leave +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +PackConstructorInitializers: BinPack +BasedOnStyle: '' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +AllowAllConstructorInitializersOnNextLine: true +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: false +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequires: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: Signature +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Right +PPIndentWidth: -1 +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + BeforeNonEmptyParentheses: false +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + diff --git a/.github/workflows/make-rrex2-single-platform.yml b/.github/workflows/make-rrex2-single-platform.yml new file mode 100644 index 0000000..8c7b384 --- /dev/null +++ b/.github/workflows/make-rrex2-single-platform.yml @@ -0,0 +1,28 @@ +name: Build and run rrex2 + + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + BUILD_TYPE: Release + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Build + working-directory: ${{github.workspace}} + run: make build + + - name: Test + working-directory: ${{github.workspace}} + run: make run + diff --git a/.github/workflows/make-rrex3-single-platform.yml b/.github/workflows/make-rrex3-single-platform.yml new file mode 100644 index 0000000..08c6f93 --- /dev/null +++ b/.github/workflows/make-rrex3-single-platform.yml @@ -0,0 +1,28 @@ +name: Build and run rrex3 + + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + BUILD_TYPE: Release + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Build + working-directory: ${{github.workspace}} + run: make build_rrex3 + + - name: Test + working-directory: ${{github.workspace}} + run: make run_rrex3 + diff --git a/.github/workflows/make-rrex4-single-platform.yml b/.github/workflows/make-rrex4-single-platform.yml new file mode 100644 index 0000000..bf84df2 --- /dev/null +++ b/.github/workflows/make-rrex4-single-platform.yml @@ -0,0 +1,28 @@ +name: Build and run rrex4 + + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + BUILD_TYPE: Release + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Build + working-directory: ${{github.workspace}} + run: make rrex4 + + - name: Test + working-directory: ${{github.workspace}} + run: make debug_rrex4 + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2f397a9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.vscode +.bzr +*.o +rrex2 +rrex2full.c +rrex2full +rrex4 +rrex.coverage +rrex3.coverage +rrex4.coverage +*.cast +*.tty +regex.py +rrex3alle.c \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d1fb5a9 --- /dev/null +++ b/Makefile @@ -0,0 +1,113 @@ +all: format_all build run + +update_rlib: + cp ../rlib/rlib.c ./rlib.h + +format_all: + clang-format *.c *.h -i + +build: + gcc rrex2.c -o rrex2 -O2 -Wall -Wextra -static + +run: + ./rrex2 + +test: + $(MAKE) build + ./rrex2 test + +cli: build + ./rrex2 cli + +one-file: + rmerge rrex2.c > rrex2full.c + clang-format rrex2full.c -i + gcc rrex2full.c -o rrexfull.o -O3 -static -Wall -Wextra + @echo "g++ rrex2full.c -o rrex2full.o -O2" + +compiler: + gcc compiler.c -o compiler.o -O3 + ./compiler.o + +coverage: + @rm -f *.gcda 2>/dev/null + @rm -f *.gcno 2>/dev/null + @rm -f rrex.coverage.info 2>/dev/null + gcc -pg -fprofile-arcs -ftest-coverage -g -o rrex_coverage.o rrex2.c + ./rrex_coverage.o test + lcov --capture --directory . --output-file rrex.coverage.info + genhtml rrex.coverage.info --output-directory rrex.coverage + @rm -f *.gcda 2>/dev/null + @rm -f *.gcno 2>/dev/null + @rm -f rrex.coverage.info 2>/dev/null + @rm -f rrex_coverage.o + @rm -f gmon.out + google-chrome rrex.coverage/index.html + +build_and_run_rrex3: build_rrex3 run_rrex3 + +build_rrex3: + gcc rrex3.c -o rrex3 -Wall -Wextra -Ofast + -@rmerge rrex3.c > rrex3all.c + -@gcc -E rrex3.c -o rrex3alle.c -Wall -Wextra -Ofast + +run_rrex3: + ./rrex3 + +r4: rrex4 run_rrex4 + +rrex4: rrex4.c rrex4.h + gcc rrex4.c -o rrex4 -Wall -Wextra -Ofast + +run_rrex4: + ./rrex4 + +debug_rrex4: + ./rrex4 --debug + +coverage_rrex4: + @rm -f *.gcda 2>/dev/null + @rm -f *.gcno 2>/dev/null + @rm -f rrex4.coverage.info 2>/dev/null + gcc -pg -fprofile-arcs -ftest-coverage -g -o rrex4_coverage.o rrex4.c + ./rrex4_coverage.o test --debug + lcov --capture --directory . --output-file rrex4.coverage.info + genhtml rrex4.coverage.info --output-directory rrex4.coverage + @rm -f *.gcda 2>/dev/null + @rm -f *.gcno 2>/dev/null + @rm -f rrex4.coverage.info 2>/dev/null + @rm -f rrex4_coverage.o + @rm -f gmon.out + google-chrome rrex4.coverage/index.html + + + + +build_and_run_re: build_re run_re + +build_re: + gcc re.c -o re -Wall -Wextra -O2 + +run_re: + ./re "/home/projects/retoor/rlib" " (.*) "; + +coverage_rrex3: + @rm -f *.gcda 2>/dev/null + @rm -f *.gcno 2>/dev/null + @rm -f rrex3.coverage.info 2>/dev/null + gcc -pg -fprofile-arcs -ftest-coverage -g -o rrex3_coverage.o rrex3.c + ./rrex3_coverage.o test + lcov --capture --directory . --output-file rrex3.coverage.info + genhtml rrex3.coverage.info --output-directory rrex3.coverage + @rm -f *.gcda 2>/dev/null + @rm -f *.gcno 2>/dev/null + @rm -f rrex3.coverage.info 2>/dev/null + @rm -f rrex3_coverage.o + @rm -f gmon.out + google-chrome rrex3.coverage/index.html + +publish: + brz add + brz commit + brz push lp:rrex2 + diff --git a/README.md b/README.md new file mode 100644 index 0000000..4689adf --- /dev/null +++ b/README.md @@ -0,0 +1,54 @@ +# RREX + +## Regular expression interpreter / validator + +This regular expression validator is made with the target to be faster than the glibc regular expression validator and with success. In 23/25 tests it scores a better result than the original glibc validator. My bytecode compiler is way faster than the one provided by glibc and my executor often. For single validation, my validator is always a better choice. + +## Benchmark and test +Benchmark vs. glibc regex. (Animated gif, takes some time to load) + +![Gif of build process](build.gif) + +## Todo / issues + - Segmenation fault fix is expr ends with \\d? + - rassert(!rrex("123", "[123]+b")); doesn't work + - abc with abc[gg]d matches valid. Shouldn't be so + +## Make + +### `all:` +Runs the following tasks sequentially: +- **`one-file:`** Merges, formats, and compiles a single C file. +- **`format_all:`** Formats all `.c` and `.h` files using `clang-format`. +- **`build:`** Compiles the main program (`rrex2.c`) with optimization and static linking. +- **`run:`** Executes the compiled program (`rrex2`). + +### `format_all:` +Formats all `.c` and `.h` files in the directory using `clang-format`. + +### `build:` +Compiles `rrex2.c` into an executable named `rrex2` with optimization (`-O2`), all warnings enabled (`-Wall`), and extra warnings enabled (`-Wextra`). The executable is statically linked. + +### `run:` +Runs the compiled program (`rrex2`). + +### `test:` +Rebuilds the project by running the `build` target and then runs the program with `test` as an argument. + +### `cli:` +Runs the `build` target and then executes the program in CLI mode. + +### `compiler:` +Compiles `compiler.c` into an optimized executable `compiler.o` and then runs it. + +### `backup:` +Creates a compressed archive (`rrex.rzip`) of all `.c`, `.h`, Makefile, and markdown files in the directory. + +### `coverage:` +Generates code coverage information: +- Removes existing coverage data files. +- Compiles `rrex2.c` with profiling and test coverage flags. +- Executes the compiled coverage binary with `test` as an argument. +- Captures coverage data with `lcov` and generates an HTML report using `genhtml`. +- Opens the coverage report in Google Chrome. +- Cleans up intermediate coverage files and the binary. diff --git a/build.gif b/build.gif new file mode 100644 index 0000000..1f14f7d Binary files /dev/null and b/build.gif differ diff --git a/compiler.c b/compiler.c new file mode 100644 index 0000000..a309956 --- /dev/null +++ b/compiler.c @@ -0,0 +1,31 @@ +#include "compiler.h" +#include "rlib.h" +#include +#include +#include +#include + +void rrex_compiler_repl() { + rclear(); + printf("Type expression to convert bytecode to human readable format.\n"); + while (true) { + rprintb("> "); + char line[8096]; + rreadline(line, sizeof(line), true); + if (!line) + continue; + char bdata[sizeof(line) * 2]; + rprint("\\t"); + rrex_compile(line, bdata); + rprinty("< "); + print_bc(bdata); + rprint("\n"); + } +} + +int main() { + rrex_compiler_tests(); + printf("%s\n", "Executed all compiler tests at boot of this application."); + rrex_compiler_repl(); + return 0; +} \ No newline at end of file diff --git a/compiler.h b/compiler.h new file mode 100644 index 0000000..7c9374c --- /dev/null +++ b/compiler.h @@ -0,0 +1,314 @@ +#include "rlib.h" +#include "rrex.h" +#include + +typedef struct rrex_compiler_t { + int previous_method; + char *previous_method_start; + char *bdata; + char *rdata; +} rrex_compiler_t; + +void compile_one(rrex_compiler_t *compiler, char **content, char **compiled, + int *indexp); +void rrex_compile(char *content, char *compiled); +int test_compiler(); +int convert_bt(size_t i); +char *format_bc(char *code); +void print_bc(char *code); +int test_compile(char *s, char *r); + +void rexx_init_compiler(rrex_compiler_t *c, char *rdata, char *bdata) { + memset(c, 0, sizeof(rrex_compiler_t)); + c->rdata = rdata; + c->bdata = bdata; + c->previous_method = 0; + c->previous_method_start = rdata; +} + +typedef enum reg_new_t { + RN_LITERAL = 1, + RN_DRANGE = 2, + RN_ARANGE = 3, + RN_IGNORE = 4, + RN_REPEAT = 5, + RN_FUNCTION = 6, + RN_DOT, + RN_ROOF, + RN_CHOICE_START, + RN_CHOICE_END, + RN_WHITESPACE, + RN_SLASH_CD, + RN_SLASH_CW, + RN_PLUS, + RN_DOLLAR, + RN_ASTERISK, + RN_GROUP_START, + RN_GROUP_END, + RN_PIPE, + RN_QUESTION, + RN_DIGIT, + RN_ALPHA +} reg_new_t; + +int convert_bt(size_t i) { + char chars[] = "lRRirf.^[]wDW+$*()|?da"; + if (i < strlen(chars) + 1) // Index starts at 1 + return chars[i - 1]; + return i; +} +char *format_bc(char *code) { + static char result[50000]; + result[0] = 0; + char value; + int type = 0; + for (size_t i = 0; i < strlen(code); i++) { + type = 0; + value = code[i]; + if (i && (code[i - 1] == 1)) { + type = 1; // no byte + } else if (code[i - 1] == RN_REPEAT) { + type = 2; // int + } else { + type = 0; // byte + } + char chunk[10]; + chunk[0] = 0; + if (type == 0) { + sprintf(chunk, "%c", convert_bt(value)); + } else if (type == 2) + sprintf(chunk, "%d", value); + else + sprintf(chunk, "(%c)", value); + strcat(result, chunk); + } + return result; +} +void print_bc(char *code) { + char *human_readable = format_bc(code); + printf("%s", human_readable); +} + +void compile_one(rrex_compiler_t *compiler, char **content, char **compiled, + int *indexp) { + char *r = *content; + char *c = *compiled; + int index = *indexp; + if (*r == '*') { + compiler->previous_method_start = r; + + c[index] = RN_ASTERISK; + index++; + r++; + } else if (*r == '\\') { + r++; + if (*r == 'd') { + compiler->previous_method_start = r - 1; + + c[index] = RN_DIGIT; + index++; + r++; + } else if (*r == 'w') { + compiler->previous_method_start = r - 1; + + c[index] = RN_ALPHA; + index++; + r++; + } else if (*r == 's') { + compiler->previous_method_start = r - 1; + + c[index] = RN_WHITESPACE; + index++; + r++; + } else if (*r == 'D') { + compiler->previous_method_start = r - 1; + + c[index] = RN_SLASH_CD; + index++; + r++; + } else if (*r == 'W') { + compiler->previous_method_start = r - 1; + + c[index] = RN_SLASH_CW; + index++; + r++; + } else { + compiler->previous_method_start = r - 1; + c[index] = *r; + index++; + r++; + } + } else if (*r == '$') { + compiler->previous_method_start = r; + c[index] = RN_DOLLAR; + index++; + r++; + } else if (*r == '(') { + char *choice_start = r; + r++; + c[index] = RN_GROUP_START; + index++; + while (*r != ')') { + compile_one(compiler, &r, &c, &index); + } + compiler->previous_method_start = choice_start; + c[index] = RN_GROUP_END; + index++; + r++; + } else if (*r == '|') { + compiler->previous_method_start = r; + c[index] = RN_PIPE; + index++; + r++; + } else if (*r == '?') { + r++; + if (index) { + char buff_r[1024] = {0}; + char *br = buff_r; + char *br_start = br; + char *first_position = compiler->previous_method_start; + char *rindex = first_position; + while (rindex != r - 1) { + *br = *rindex; + br++; + *br = 0; + rindex++; + } + br = br_start; + char buff_b[1024] = {0}; + char *bc = buff_b; + char *bc_start = buff_b; + int indexb = 0; + compile_one(compiler, &br, &bc, &indexb); + bc = bc_start; + index -= strlen(bc); + c[index] = RN_QUESTION; + index++; + while (*bc) { + c[index] = *bc; + index++; + bc++; + } + compiler->previous_method_start = r - 1; + } + } else if (isalpharange(r) || isdigitrange(r)) { + compiler->previous_method_start = r; + + c[index] = isalpha(*r) ? RN_ARANGE : RN_DRANGE; + index++; + c[index] = *r; + index++; + r += 2; + c[index] = *r; + index++; + r++; + } else if (*r == '.') { + compiler->previous_method_start = r; + + c[index] = RN_DOT; + index++; + r++; + } else if (*r == '^') { + compiler->previous_method_start = r; + + c[index] = RN_ROOF; + index++; + r++; + } else if (*r == '[') { + char *choice_start = r; + r++; + c[index] = RN_CHOICE_START; + index++; + while (*r != ']') { + compile_one(compiler, &r, &c, &index); + } + compiler->previous_method_start = choice_start; + c[index] = RN_CHOICE_END; + index++; + r++; + } else if (*r == '+') { + compiler->previous_method_start = r; + + r++; + c[index] = RN_PLUS; + index++; + } else if (*r == '{') { + + r++; + char *to_repeat = compiler->previous_method_start; // r - 2; + compiler->previous_method_start = r; + char *to_repeat_end = r - 2; + if (isgrouping(to_repeat)) { + char begin_chr = groupcreverse(*(r - 2)); + while (*to_repeat != begin_chr) + to_repeat--; + to_repeat--; + } else { + to_repeat--; + } + int times = *r - '0'; + r++; + while (isdigit(*r)) { + times *= 10; + times += *r - '0'; + r++; + } + for (int i = 0; i < times - 1; i++) { + char *repeat_index = to_repeat + 1; + while (repeat_index <= to_repeat_end) { + compile_one(compiler, &repeat_index, &c, &index); + } + } + r++; + } else { + compiler->previous_method_start = r; + c[index] = *r; + index++; + r++; + } + c[index] = 0; + *indexp = index; + *content = r; + *compiled = c; +} + +void rrex_compile(char *content, char *compiled) { + rrex_compiler_t compiler; + rexx_init_compiler(&compiler, content, compiled); + + char *r = content; + int index = 0; + while (*r) { + compile_one(&compiler, &r, &compiled, &index); + } + + compiled[index] = 0; +} + +int test_compile(char *s, char *r) { + char compiled[50000]; + memset(compiled, 0, sizeof(compiled)); + rrex_compile(s, compiled); + char *human_format = format_bc(compiled); + + bool result = !strcmp(r, human_format); + rassert(result); + return result; +} + +void rrex_compiler_tests() { + rtest_banner("rrex compiler"); + + test_compile("\\W\\w\\d\\D", "WadD"); + test_compile("0-9", "R09"); + test_compile("a-z", "Raz"); + test_compile("0-9a-z", "R09Raz"); + test_compile("0-9A-Z", "R09RAZ"); + test_compile("^12^3", "^12^3"); + test_compile("3{1}", "3"); + test_compile("3{2}", "33"); + test_compile("[123]{1}", "[123]"); + test_compile("[123]{2}", "[123][123]"); + test_compile("[123]{3}$", "[123][123][123]$"); + test_compile("(123){3}$", "(123)(123)(123)$"); +} \ No newline at end of file diff --git a/re b/re new file mode 100755 index 0000000..af08d2d Binary files /dev/null and b/re differ diff --git a/re.c b/re.c new file mode 100644 index 0000000..1676db4 --- /dev/null +++ b/re.c @@ -0,0 +1,53 @@ +#define RREX3_DEBUG 0 +#include "../rlib/rlib.h" +#include "rrex3.h" + +rrex3_t *rrex; +char *expr; + +void file_found(char *path) { + if (rfile_size(path) < 1024) { + + if (risdir(path)) + return; + if (!rstrendswith(path, ".c") && !rstrendswith(path, ".h")) { + return; + } + if (rstrendswith(path, "rlib.h")) { + return; + } + size_t size = rfile_size(path); + char file_data[size + 1 + 1024 * 1024]; + rfile_readb(path, file_data, size); + file_data[size] = 0; + rrex->str = file_data; + char *ptr = file_data; + while (rrex3(rrex, ptr, expr)) { + ptr = rrex->str; + printf("%s\n", rrex->str); + printf(">%s<\n", rrex->matches[0]); + printf("J,"); + break; + // printf("%s\n",rrex->matches[0]); + } + printf("\n"); + } +} + +int main(int argc, char *argv[]) { + rrex = rrex3_new(); + if (argc != 3) { + printf("Usage: \n"); + return 1; + } + + char fixed[strlen(argv[2]) + 20]; + fixed[0] = 0; + strcpy(fixed, "int (.*)[; ]?"); + // sprintf(fixed, "%s", argv[2]); + expr = fixed; + ; + rforfile(argv[1], file_found); + + return 0; +} \ No newline at end of file diff --git a/rlib.h b/rlib.h new file mode 100644 index 0000000..4af31b4 --- /dev/null +++ b/rlib.h @@ -0,0 +1,4843 @@ +// RETOOR - Sep 24 2024 +// MIT License +// =========== + +// Copyright (c) 2024 Retoor + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +#ifndef RLIB_H +#define RLIB_H +// BEGIN OF RLIB +#ifndef RSTRING_LIST_H +#define RSTRING_LIST_H +#include +#include +#include + +typedef struct rstring_list_t { + unsigned int size; + unsigned int count; + char **strings; +} rstring_list_t; + +rstring_list_t *rstring_list_new() { + rstring_list_t *rsl = (rstring_list_t *)malloc(sizeof(rstring_list_t)); + memset(rsl, 0, sizeof(rstring_list_t)); + return rsl; +} + +void rstring_list_free(rstring_list_t *rsl) { + for (unsigned int i = 0; i < rsl->size; i++) { + free(rsl->strings[i]); + } + free(rsl); + rsl = NULL; +} + +void rstring_list_add(rstring_list_t *rsl, char *str) { + if (rsl->count == rsl->size) { + rsl->size++; + rsl->strings = realloc(rsl->strings, sizeof(char *) * rsl->size); + } + rsl->strings[rsl->count] = (char *)malloc(strlen(str) + 1); + strcpy(rsl->strings[rsl->count], str); + rsl->count++; +} +bool rstring_list_contains(rstring_list_t *rsl, char *str) { + for (unsigned int i = 0; i < rsl->count; i++) { + if (!strcmp(rsl->strings[i], str)) + return true; + } + return false; +} + +#endif +#ifndef RAUTOCOMPLETE_H +#define RAUTOCOMPLETE_H +#define R4_DEBUG +#ifndef RREX4_H +#define RREX4_H +#include +#include +#include +#include +#include +#include + +#define R4_DEBUG_a + +#ifdef R4_DEBUG +static int _r4_debug = 1; +#else +static int _r4_debug = 0; +#endif + +static char *_format_function_name(const char *name) { + static char result[100]; + result[0] = 0; + + char *new_name = (char *)name; + new_name += 11; + if (new_name[0] == '_') + new_name += 1; + if (strlen(new_name) == 0) { + return " -"; + } + strcpy(result, new_name); + return result; +} + +#define DEBUG_VALIDATE_FUNCTION \ + if (_r4_debug || r4->debug) \ + printf("DEBUG: %s %s <%s> \"%s\"\n", _format_function_name(__func__), \ + r4->valid ? "valid" : "INVALID", r4->expr, r4->str); + +struct r4_t; + +void r4_enable_debug() { _r4_debug = true; } +void r4_disable_debug() { _r4_debug = false; } + +typedef bool (*r4_function)(struct r4_t *); + +typedef struct r4_t { + bool debug; + bool valid; + bool in_block; + bool in_range; + unsigned int backtracking; + unsigned int loop_count; + unsigned int in_group; + unsigned int match_count; + unsigned int validation_count; + unsigned int start; + unsigned int end; + unsigned int length; + bool (*functions[254])(struct r4_t *); + bool (*slash_functions[254])(struct r4_t *); + char *_str; + char *_expr; + char *match; + char *str; + char *expr; + char *str_previous; + char *expr_previous; + char **matches; +} r4_t; + +static bool v4_initiated = false; +typedef bool (*v4_function_map)(r4_t *); +v4_function_map v4_function_map_global[256]; +v4_function_map v4_function_map_slash[256]; +v4_function_map v4_function_map_block[256]; + +static void r4_free_matches(r4_t *r) { + if (!r) + return; + if (r->match) { + free(r->match); + r->match = NULL; + } + if (!r->match_count) { + return; + } + for (unsigned i = 0; i < r->match_count; i++) { + free(r->matches[i]); + } + free(r->matches); + r->match_count = 0; + r->matches = NULL; +} + +static void r4_free(r4_t *r) { + if (!r) + return; + r4_free_matches(r); + free(r); +} + +static bool r4_backtrack(r4_t *r4); +static bool r4_validate(r4_t *r4); +static void r4_match_add(r4_t *r4, char *extracted); + +static bool r4_validate_literal(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (!r4->valid) + return false; + if (*r4->str != *r4->expr) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + if (r4->in_range || r4->in_block) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_question_mark(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->valid = true; + r4->expr++; + return r4_validate(r4); +} + +static bool r4_validate_plus(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (r4->valid == false) { + return r4_validate(r4); + } + char *expr_left = r4->expr_previous; + char *expr_right = r4->expr; + char *str = r4->str; + char *return_expr = NULL; + if (*expr_right == ')') { + return_expr = expr_right; + expr_right++; + } + r4->in_block = true; + r4->expr = expr_left; + while (r4->valid) { + if (*expr_right) { + r4->expr = expr_right; + r4->in_block = false; + if (r4_backtrack(r4)) { + + if (return_expr) { + r4->str = str; + r4->expr = return_expr; + } + return r4_validate(r4); + } else { + r4->in_block = true; + } + } + r4->valid = true; + r4->expr = expr_left; + r4->str = str; + r4_validate(r4); + str = r4->str; + } + r4->in_block = false; + r4->valid = true; + r4->expr = return_expr ? return_expr : expr_right; + return r4_validate(r4); +} + +static bool r4_validate_dollar(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + return *r4->str == 0; +} + +static bool r4_validate_roof(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (r4->str != r4->_str) { + return false; + } + r4->expr++; + return r4_validate(r4); +} + +static bool r4_validate_dot(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (*r4->str == 0) { + return false; + } + r4->expr++; + r4->valid = *r4->str != '\n'; + r4->str++; + + if (r4->in_range || r4->in_block) { + return r4->valid; + } + return r4_validate(r4); +} + +static bool r4_validate_asterisk(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (r4->valid == false) { + r4->valid = true; + return r4->valid; + // return r4_validate(r4); + } + char *expr_left = r4->expr_previous; + char *expr_right = r4->expr; + char *str = r4->str; + char *return_expr = NULL; + if (*expr_right == ')') { + return_expr = expr_right; + expr_right++; + } + r4->in_block = true; + r4->expr = expr_left; + while (r4->valid) { + if (*expr_right) { + r4->expr = expr_right; + r4->in_block = false; + if (r4_backtrack(r4)) { + + if (return_expr) { + r4->str = str; + r4->expr = return_expr; + } + return r4_validate(r4); + } else { + r4->in_block = true; + } + } + r4->valid = true; + r4->expr = expr_left; + r4->str = str; + r4_validate(r4); + str = r4->str; + } + r4->in_block = false; + r4->valid = true; + r4->expr = return_expr ? return_expr : expr_right; + return r4_validate(r4); +} + +static bool r4_validate_pipe(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (r4->valid == true) { + return true; + } else { + r4->valid = true; + } + return r4_validate(r4); +} + +static bool r4_validate_digit(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (!isdigit(*r4->str)) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + if (r4->in_block) { + return r4->valid; + } + if (r4->in_range) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_not_digit(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (isdigit(*r4->str)) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + + if (r4->in_block) { + return r4->valid; + } + + if (r4->in_range) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_word(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (!isalpha(*r4->str)) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + + if (r4->in_block) { + return r4->valid; + } + + if (r4->in_range) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_not_word(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (isalpha(*r4->str)) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + + if (r4->in_block) { + return r4->valid; + } + + if (r4->in_range) { + return r4->valid; + } + return r4_validate(r4); +} + +static bool r4_isrange(char *s) { + if (!isalnum(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isalnum(*(s + 2)); +} + +static bool r4_validate_block_open(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (r4->valid == false) { + return false; + } + char *expr_self = r4->expr; + r4->expr++; + bool reversed = *r4->expr == '^'; + if (reversed) { + r4->expr++; + } + + bool valid_once = false; + r4->in_block = true; + while (*r4->expr != ']') { + r4->valid = true; + if (r4_isrange(r4->expr)) { + char s = *r4->expr; + char e = *(r4->expr + 2); + r4->expr += 2; + if (s > e) { + char tempc = s; + s = e; + e = tempc; + } + if (*r4->str >= s && *r4->str <= e) { + if (!reversed) { + r4->str++; + } + valid_once = true; + break; + } else { + r4->expr++; + } + } else if (r4_validate(r4)) { + valid_once = true; + if (reversed) + r4->str--; + break; + } + } + char *expr_end = strchr(r4->expr, ']'); + + r4->expr = expr_end ? expr_end : r4->expr; + r4->in_block = false; + r4->valid = expr_end && (!reversed ? valid_once : !valid_once); + r4->expr++; + r4->expr_previous = expr_self; + + if (r4->in_range) { + return r4->valid; + } + return r4_validate(r4); +} + +static bool r4_validate_whitespace(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->valid = strchr("\r\t \n", *r4->str) != NULL; + r4->expr++; + if (r4->valid) { + r4->str++; + } + if (r4->in_range || r4->in_block) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_not_whitespace(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->valid = strchr("\r\t \n", *r4->str) == NULL; + r4->expr++; + if (r4->valid) { + r4->str++; + } + if (r4->in_range || r4->in_block) { + return r4->valid; + } + return r4_validate(r4); +} + +static bool r4_validate_range(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION; + if (r4->valid == false) { + r4->expr++; + return false; + } + char *previous = r4->expr_previous; + r4->in_range = true; + r4->expr++; + unsigned int start = 0; + while (isdigit(*r4->expr)) { + start = 10 * start; + start += *r4->expr - '0'; + r4->expr++; + } + if (start != 0) + start--; + + unsigned int end = 0; + bool variable_end_range = false; + if (*r4->expr == ',') { + r4->expr++; + if (!isdigit(*r4->expr)) { + variable_end_range = true; + } + } + while (isdigit(*r4->expr)) { + end = end * 10; + end += *r4->expr - '0'; + r4->expr++; + } + r4->expr++; + + bool valid = true; + char *expr_right = r4->expr; + for (unsigned int i = 0; i < start; i++) { + r4->expr = previous; + valid = r4_validate(r4); + if (!*r4->str) + break; + if (!valid) { + break; + } + } + r4->expr = expr_right; + r4->in_range = false; + if (!r4->valid) + return false; + return r4_validate(r4); + + for (unsigned int i = start; i < end; i++) { + r4->expr = previous; + valid = r4_validate(r4); + if (!valid) { + break; + } + } + + while (variable_end_range) { + r4->in_range = false; + valid = r4_validate(r4); + r4->in_range = true; + if (valid) { + break; + } + r4->in_range = true; + valid = r4_validate(r4); + r4->in_range = false; + if (!valid) { + break; + } + } + r4->valid = valid; + + return r4_validate(r4); +} + +static bool r4_validate_group_close(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + return r4->valid; +} + +static bool r4_validate_group_open(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + char *expr_previous = r4->expr_previous; + r4->expr++; + bool save_match = r4->in_group == 0; + r4->in_group++; + char *str_extract_start = r4->str; + bool valid = r4_validate(r4); + + if (!valid || *r4->expr != ')') { + // this is a valid case if not everything between () matches + r4->in_group--; + if (save_match == false) { + r4->valid = true; + } + + // Not direct return? Not sure + return r4_validate(r4); + } + if (save_match) { + char *str_extract_end = r4->str; + unsigned int extracted_length = str_extract_end - str_extract_start; + // strlen(str_extract_start) - strlen(str_extract_end); + char *str_extracted = + (char *)calloc(sizeof(char), extracted_length + 1); + strncpy(str_extracted, str_extract_start, extracted_length); + r4_match_add(r4, str_extracted); + } + assert(*r4->expr == ')'); + r4->expr++; + r4->in_group--; + r4->expr_previous = expr_previous; + return r4_validate(r4); +} + +static bool r4_validate_slash(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + // The handling code for handling slashes is implemented in r4_validate + char *expr_previous = r4->expr_previous; + r4->expr++; + r4_function f = v4_function_map_slash[(int)*r4->expr]; + r4->expr_previous = expr_previous; + return f(r4); +} + +static void r4_match_add(r4_t *r4, char *extracted) { + r4->matches = + (char **)realloc(r4->matches, (r4->match_count + 1) * sizeof(char *)); + r4->matches[r4->match_count] = extracted; + r4->match_count++; +} + +static bool r4_validate_word_boundary_start(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (!r4->valid) { + return r4->valid; + } + r4->valid = + isalpha(*r4->str) && (r4->str == r4->_str || !isalpha(*(r4->str - 1))); + if (r4->in_range || r4->in_block) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_word_boundary_end(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (!r4->valid) { + return r4->valid; + } + r4->valid = + isalpha(*r4->str) && (*(r4->str + 1) == 0 || !isalpha(*(r4->str + 1))); + if (r4->in_range || r4->in_block) { + return r4->valid; + } + return r4_validate(r4); +} + +static void v4_init_function_maps() { + if (v4_initiated) + return; + v4_initiated = true; + for (__uint8_t i = 0; i < 255; i++) { + v4_function_map_global[i] = r4_validate_literal; + v4_function_map_slash[i] = r4_validate_literal; + v4_function_map_block[i] = r4_validate_literal; + } + v4_function_map_global['*'] = r4_validate_asterisk; + v4_function_map_global['?'] = r4_validate_question_mark; + v4_function_map_global['+'] = r4_validate_plus; + v4_function_map_global['$'] = r4_validate_dollar; + v4_function_map_global['^'] = r4_validate_roof; + v4_function_map_global['.'] = r4_validate_dot; + v4_function_map_global['|'] = r4_validate_pipe; + v4_function_map_global['\\'] = r4_validate_slash; + v4_function_map_global['['] = r4_validate_block_open; + v4_function_map_global['{'] = r4_validate_range; + v4_function_map_global['('] = r4_validate_group_open; + v4_function_map_global[')'] = r4_validate_group_close; + v4_function_map_slash['b'] = r4_validate_word_boundary_start; + v4_function_map_slash['B'] = r4_validate_word_boundary_end; + v4_function_map_slash['d'] = r4_validate_digit; + v4_function_map_slash['w'] = r4_validate_word; + v4_function_map_slash['D'] = r4_validate_not_digit; + v4_function_map_slash['W'] = r4_validate_not_word; + v4_function_map_slash['s'] = r4_validate_whitespace; + v4_function_map_slash['S'] = r4_validate_not_whitespace; + v4_function_map_block['\\'] = r4_validate_slash; + + v4_function_map_block['{'] = r4_validate_range; +} + +void r4_init(r4_t *r4) { + v4_init_function_maps(); + if (r4 == NULL) + return; + r4->debug = _r4_debug; + r4->valid = true; + r4->validation_count = 0; + r4->match_count = 0; + r4->start = 0; + r4->end = 0; + r4->length = 0; + r4->matches = NULL; +} + +static bool r4_looks_behind(char c) { return strchr("?*+{", c) != NULL; } + +r4_t *r4_new() { + r4_t *r4 = (r4_t *)malloc(sizeof(r4_t)); + + r4_init(r4); + + return r4; +} + +static bool r4_pipe_next(r4_t *r4) { + char *expr = r4->expr; + while (*expr) { + if (*expr == '|') { + r4->expr = expr + 1; + r4->valid = true; + return true; + } + expr++; + } + return false; +} + +static bool r4_backtrack(r4_t *r4) { + if (_r4_debug) + printf("\033[36mDEBUG: backtrack start (%d)\n", r4->backtracking); + r4->backtracking++; + char *str = r4->str; + char *expr = r4->expr; + bool result = r4_validate(r4); + r4->backtracking--; + if (result == false) { + r4->expr = expr; + r4->str = str; + } + if (_r4_debug) + printf("DEBUG: backtrack end (%d) result: %d %s\n", r4->backtracking, + result, r4->backtracking == 0 ? "\033[0m" : ""); + return result; +} + +static bool r4_validate(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->validation_count++; + char c_val = *r4->expr; + if (c_val == 0) { + return r4->valid; + } + if (!r4_looks_behind(c_val)) { + r4->expr_previous = r4->expr; + } else if (r4->expr == r4->_expr) { + // Regex may not start with a look behind ufnction + return false; + } + + if (!r4->valid && !r4_looks_behind(*r4->expr)) { + if (!r4_pipe_next(r4)) { + return false; + } + } + r4_function f; + f = v4_function_map_global[(int)c_val]; + + r4->valid = f(r4); + return r4->valid; +} + +char *r4_get_match(r4_t *r) { + char *match = (char *)malloc(r->length + 1); + strncpy(match, r->_str + r->start, r->length); + match[r->length] = 0; + return match; +} + +static bool r4_search(r4_t *r) { + bool valid = true; + char *str_next = r->str; + while (*r->str) { + if (!(valid = r4_validate(r))) { + // Move next until we find a match + if (!r->backtracking) { + r->start++; + } + str_next++; + r->str = str_next; + r->expr = r->_expr; + r->valid = true; + } else { + /// HIGH DOUBT + if (!r->backtracking) { + // r->start = 0; + } + break; + } + } + r->valid = valid; + if (r->valid) { + r->end = strlen(r->_str) - strlen(r->str); + r->length = r->end - r->start; + r->match = r4_get_match(r); + } + return r->valid; +} + +r4_t *r4(const char *str, const char *expr) { + r4_t *r = r4_new(); + r->_str = (char *)str; + r->_expr = (char *)expr; + r->match = NULL; + r->str = r->_str; + r->expr = r->_expr; + r->str_previous = r->_str; + r->expr_previous = r->expr; + r->in_block = false; + r->in_group = 0; + r->loop_count = 0; + r->backtracking = 0; + r->in_range = false; + r4_search(r); + return r; +} + +r4_t *r4_next(r4_t *r, char *expr) { + if (expr) { + r->_expr = expr; + } + r->backtracking = 0; + r->expr = r->_expr; + r4_free_matches(r); + r4_search(r); + return r; +} + +bool r4_match(char *str, char *expr) { + r4_t *r = r4(str, expr); + bool result = r->valid; + r4_free(r); + return result; +} +#endif +#define rautocomplete_new rstring_list_new +#define rautocomplete_free rstring_list_free +#define rautocomplete_add rstring_list_add +#define rautocomplete_find rstring_list_find +#define rautocomplete_t rstring_list_t +#define rautocomplete_contains rstring_list_contains + +char *r4_escape(char *content) { + size_t size = strlen(content) * 2 + 1; + char *escaped = (char *)calloc(size, sizeof(char)); + char *espr = escaped; + char *to_escape = "?*+()[]{}^$\\"; + *espr = '('; + espr++; + while (*content) { + if (strchr(to_escape, *content)) { + *espr = '\\'; + espr++; + } + *espr = *content; + espr++; + content++; + } + *espr = '.'; + espr++; + *espr = '+'; + espr++; + *espr = ')'; + espr++; + *espr = 0; + return escaped; +} + +char *rautocomplete_find(rstring_list_t *list, char *expr) { + if (!list->count) + return NULL; + if (!expr || !strlen(expr)) + return NULL; + + char *escaped = r4_escape(expr); + + for (unsigned int i = list->count - 1; i >= 0; i--) { + if (i == -1) + break; + char *match; + r4_t *r = r4(list->strings[i], escaped); + if (r->valid && r->match_count == 1) { + match = strdup(r->matches[0]); + } + r4_free(r); + if (match) { + + free(escaped); + return match; + } + } + free(escaped); + return NULL; +} +#endif +#ifndef RPRINT_H +#define RPRINT_H + +#ifndef RLIB_TIME +#define RLIB_TIME + +#include +#include +#include +#include +#include +#include + +#ifndef CLOCK_MONOTONIC +#define CLOCK_MONOTONIC 1 +#endif + +typedef unsigned long long msecs_t; +typedef uint64_t nsecs_t; + +nsecs_t nsecs() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (int64_t)ts.tv_sec * 1000000000LL + (int64_t)ts.tv_nsec; +} + +msecs_t rnsecs_to_msecs(nsecs_t nsecs) { return nsecs / 1000 / 1000; } + +nsecs_t rmsecs_to_nsecs(msecs_t msecs) { return msecs * 1000 * 1000; } + +msecs_t usecs() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (long long)(tv.tv_sec) * 1000000 + (long long)(tv.tv_usec); +} + +msecs_t msecs() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (long long)(tv.tv_sec) * 1000 + (tv.tv_usec / 1000); +} +char *msecs_strs(msecs_t ms) { + static char str[22]; + str[0] = 0; + sprintf(str, "%f", ms * 0.001); + for (int i = strlen(str); i > 0; i--) { + if (str[i] > '0') + break; + str[i] = 0; + } + return str; +} +char *msecs_strms(msecs_t ms) { + static char str[22]; + str[0] = 0; + sprintf(str, "%lld", ms); + return str; +} +char *msecs_str(long long ms) { + static char result[30]; + result[0] = 0; + if (ms > 999) { + char *s = msecs_strs(ms); + sprintf(result, "%ss", s); + } else { + char *s = msecs_strms(ms); + sprintf(result, "%sMs", s); + } + return result; +} + +void nsleep(nsecs_t nanoseconds) { + long seconds = 0; + int factor = 0; + while (nanoseconds > 1000000000) { + factor++; + nanoseconds = nanoseconds / 10; + } + if (factor) { + seconds = 1; + factor--; + while (factor) { + seconds = seconds * 10; + factor--; + } + } + + struct timespec req = {seconds, nanoseconds}; + struct timespec rem; + + if (nanosleep(&req, &rem) == -1) { + if (errno == EINTR) { + printf("Sleep was interrupted. Remaining time: %ld.%09ld seconds\n", + rem.tv_sec, rem.tv_nsec); + } else { + perror("nanosleep"); + } + } else { + // printf("Slept for %ld.%09ld seconds\n", req.tv_sec, req.tv_nsec); + } +} + +void ssleep(double s) { + long nanoseconds = (long)(1000000000 * s); + + long seconds = 0; + + struct timespec req = {seconds, nanoseconds}; + struct timespec rem; + + if (nanosleep(&req, &rem) == -1) { + if (errno == EINTR) { + printf("Sleep was interrupted. Remaining time: %ld.%09ld seconds\n", + rem.tv_sec, rem.tv_nsec); + } else { + perror("nanosleep"); + } + } else { + // printf("Slept for %ld.%09ld seconds\n", req.tv_sec, req.tv_nsec); + } +} +void msleep(long miliseonds) { + long nanoseconds = miliseonds * 1000000; + nsleep(nanoseconds); +} + +char *format_time(int64_t nanoseconds) { + static char output[1024]; + size_t output_size = sizeof(output); + output[0] = 0; + if (nanoseconds < 1000) { + // Less than 1 microsecond + snprintf(output, output_size, "%ldns", nanoseconds); + } else if (nanoseconds < 1000000) { + // Less than 1 millisecond + double us = nanoseconds / 1000.0; + snprintf(output, output_size, "%.2fµs", us); + } else if (nanoseconds < 1000000000) { + // Less than 1 second + double ms = nanoseconds / 1000000.0; + snprintf(output, output_size, "%.2fms", ms); + } else { + // 1 second or more + double s = nanoseconds / 1000000000.0; + snprintf(output, output_size, "%.2fs", s); + } + return output; +} + +#endif +#include +#include +#include +#include +#include + +long rpline_number = 0; +nsecs_t rprtime = 0; + +int8_t _env_rdisable_colors = -1; +bool _rprint_enable_colors = true; + +bool rprint_is_color_enabled() { + if (_env_rdisable_colors == -1) { + _env_rdisable_colors = getenv("RDISABLE_COLORS") != NULL; + } + if (_env_rdisable_colors) { + _rprint_enable_colors = false; + } + return _rprint_enable_colors; +} + +void rprint_disable_colors() { _rprint_enable_colors = false; } +void rprint_enable_colors() { _rprint_enable_colors = true; } +void rprint_toggle_colors() { _rprint_enable_colors = !_rprint_enable_colors; } + +void rclear() { printf("\033[2J"); } + +void rprintpf(FILE *f, const char *prefix, const char *format, va_list args) { + char *pprefix = (char *)prefix; + char *pformat = (char *)format; + bool reset_color = false; + bool press_any_key = false; + char new_format[4096]; + bool enable_color = rprint_is_color_enabled(); + memset(new_format, 0, 4096); + int new_format_length = 0; + char temp[1000]; + memset(temp, 0, 1000); + if (enable_color && pprefix[0]) { + strcat(new_format, pprefix); + new_format_length += strlen(pprefix); + reset_color = true; + } + while (true) { + if (pformat[0] == '\\' && pformat[1] == 'i') { + strcat(new_format, "\e[3m"); + new_format_length += strlen("\e[3m"); + reset_color = true; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'u') { + strcat(new_format, "\e[4m"); + new_format_length += strlen("\e[4m"); + reset_color = true; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'b') { + strcat(new_format, "\e[1m"); + new_format_length += strlen("\e[1m"); + reset_color = true; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'C') { + press_any_key = true; + rpline_number++; + pformat++; + pformat++; + reset_color = false; + } else if (pformat[0] == '\\' && pformat[1] == 'k') { + press_any_key = true; + rpline_number++; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'c') { + rpline_number++; + strcat(new_format, "\e[2J\e[H"); + new_format_length += strlen("\e[2J\e[H"); + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'L') { + rpline_number++; + temp[0] = 0; + sprintf(temp, "%ld", rpline_number); + strcat(new_format, temp); + new_format_length += strlen(temp); + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'l') { + rpline_number++; + temp[0] = 0; + sprintf(temp, "%.5ld", rpline_number); + strcat(new_format, temp); + new_format_length += strlen(temp); + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'T') { + nsecs_t nsecs_now = nsecs(); + nsecs_t end = rprtime ? nsecs_now - rprtime : 0; + temp[0] = 0; + sprintf(temp, "%s", format_time(end)); + strcat(new_format, temp); + new_format_length += strlen(temp); + rprtime = nsecs_now; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 't') { + rprtime = nsecs(); + pformat++; + pformat++; + } else { + new_format[new_format_length] = *pformat; + new_format_length++; + if (!*pformat) + break; + + // printf("%c",*pformat); + pformat++; + } + } + if (reset_color) { + strcat(new_format, "\e[0m"); + new_format_length += strlen("\e[0m"); + } + + new_format[new_format_length] = 0; + vfprintf(f, new_format, args); + + fflush(stdout); + if (press_any_key) { + nsecs_t s = nsecs(); + fgetc(stdin); + rprtime += nsecs() - s; + } +} + +void rprintp(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "", format, args); + va_end(args); +} + +void rprintf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "", format, args); + va_end(args); +} +void rprint(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "", format, args); + va_end(args); +} +#define printf rprint + +// Print line +void rprintlf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\\l", format, args); + va_end(args); +} +void rprintl(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\\l", format, args); + va_end(args); +} + +// Black +void rprintkf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[30m", format, args); + va_end(args); +} +void rprintk(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[30m", format, args); + va_end(args); +} + +// Red +void rprintrf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[31m", format, args); + va_end(args); +} +void rprintr(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[31m", format, args); + va_end(args); +} + +// Green +void rprintgf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[32m", format, args); + va_end(args); +} +void rprintg(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[32m", format, args); + va_end(args); +} + +// Yellow +void rprintyf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[33m", format, args); + va_end(args); +} +void rprinty(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[33m", format, args); + va_end(args); +} + +// Blue +void rprintbf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[34m", format, args); + va_end(args); +} + +void rprintb(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[34m", format, args); + va_end(args); +} + +// Magenta +void rprintmf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[35m", format, args); + va_end(args); +} +void rprintm(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[35m", format, args); + va_end(args); +} + +// Cyan +void rprintcf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[36m", format, args); + va_end(args); +} +void rprintc(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[36m", format, args); + va_end(args); +} + +// White +void rprintwf(FILE *f, const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[37m", format, args); + va_end(args); +} +void rprintw(const char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[37m", format, args); + va_end(args); +} +#endif +#ifndef RMATH_H +#define RMATH_H +#include + +#ifndef ceil +double ceil(double x) { + if (x == (double)(long long)x) { + return x; + } else if (x > 0.0) { + return (double)(long long)x + 1.0; + } else { + return (double)(long long)x; + } +} +#endif + +#ifndef floor +double floor(double x) { + if (x >= 0.0) { + return (double)(long long)x; + } else { + double result = (double)(long long)x; + return (result == x) ? result : result - 1.0; + } +} +#endif + +#ifndef modf +double modf(double x, double *iptr) { + double int_part = (x >= 0.0) ? floor(x) : ceil(x); + *iptr = int_part; + return x - int_part; +} +#endif +#endif +#ifndef RMALLOC_H +#define RMALLOC_H +#include +#include +#include + +unsigned long long rmalloc_count = 0; +unsigned long long rmalloc_alloc_count = 0; +unsigned long long int rmalloc_free_count = 0; + +void *rmalloc(size_t size) { + rmalloc_count++; + rmalloc_alloc_count++; + return malloc(size); +} +void *rrealloc(void *obj, size_t size) { + if (obj == NULL) { + rmalloc_count++; + rmalloc_alloc_count++; + } + return realloc(obj, size); +} +void *rfree(void *obj) { + rmalloc_count--; + rmalloc_free_count++; + free(obj); + return NULL; +} + +#define malloc rmalloc +#define realloc rrealloc +#define free rfree + +char *rmalloc_stats() { + static char res[100] = {0}; + sprintf(res, "Memory usage: %lld allocated, %lld freed, %lld in use.", + rmalloc_alloc_count, rmalloc_free_count, rmalloc_count); + return res; +} + +char *rstrdup(char *str) { + + char *res = (char *)strdup(str); + rmalloc_alloc_count++; + rmalloc_count++; + return res; +} + +#endif + +#ifndef RTEST_H +#define RTEST_H +#include +#include +#include +#define debug(fmt, ...) printf("%s:%d: " fmt, __FILE__, __LINE__, __VA_ARGS__); + +char *rcurrent_banner; +int rassert_count = 0; +unsigned short rtest_is_first = 1; +unsigned int rtest_fail_count = 0; + +int rtest_end(char *content) { + // Returns application exit code. 0 == success + printf("%s", content); + printf("\n@assertions: %d\n", rassert_count); + printf("@memory: %s\n", rmalloc_stats()); + + if (rmalloc_count != 0) { + printf("MEMORY ERROR\n"); + return rtest_fail_count > 0; + } + return rtest_fail_count > 0; +} + +void rtest_test_banner(char *content, char *file) { + if (rtest_is_first == 1) { + char delimiter[] = "."; + char *d = delimiter; + char f[2048]; + strcpy(f, file); + printf("%s tests", strtok(f, d)); + rtest_is_first = 0; + setvbuf(stdout, NULL, _IONBF, 0); + } + printf("\n - %s ", content); +} + +bool rtest_test_true_silent(char *expr, int res, int line) { + rassert_count++; + if (res) { + return true; + } + rprintrf(stderr, "\nERROR on line %d: %s", line, expr); + rtest_fail_count++; + return false; +} + +bool rtest_test_true(char *expr, int res, int line) { + rassert_count++; + if (res) { + fprintf(stdout, "."); + return true; + } + rprintrf(stderr, "\nERROR on line %d: %s", line, expr); + rtest_fail_count++; + return false; +} +bool rtest_test_false_silent(char *expr, int res, int line) { + return rtest_test_true_silent(expr, !res, line); +} +bool rtest_test_false(char *expr, int res, int line) { + return rtest_test_true(expr, !res, line); +} +void rtest_test_skip(char *expr, int line) { + rprintgf(stderr, "\n @skip(%s) on line %d\n", expr, line); +} +bool rtest_test_assert(char *expr, int res, int line) { + if (rtest_test_true(expr, res, line)) { + return true; + } + rtest_end(""); + exit(40); +} + +#define rtest_banner(content) \ + rcurrent_banner = content; \ + rtest_test_banner(content, __FILE__); +#define rtest_true(expr) rtest_test_true(#expr, expr, __LINE__); +#define rtest_assert(expr) \ + { \ + int __valid = expr ? 1 : 0; \ + rtest_test_true(#expr, __valid, __LINE__); \ + }; \ + ; + +#define rassert(expr) \ + { \ + int __valid = expr ? 1 : 0; \ + rtest_test_true(#expr, __valid, __LINE__); \ + }; \ + ; +#define rtest_asserts(expr) \ + { \ + int __valid = expr ? 1 : 0; \ + rtest_test_true_silent(#expr, __valid, __LINE__); \ + }; +#define rasserts(expr) \ + { \ + int __valid = expr ? 1 : 0; \ + rtest_test_true_silent(#expr, __valid, __LINE__); \ + }; +#define rtest_false(expr) \ + rprintf(" [%s]\t%s\t\n", expr == 0 ? "OK" : "NOK", #expr); \ + assert_count++; \ + assert(#expr); +#define rtest_skip(expr) rtest_test_skip(#expr, __LINE__); + +FILE *rtest_create_file(char *path, char *content) { + FILE *fd = fopen(path, "wb"); + + char c; + int index = 0; + + while ((c = content[index]) != 0) { + fputc(c, fd); + index++; + } + fclose(fd); + fd = fopen(path, "rb"); + return fd; +} + +void rtest_delete_file(char *path) { unlink(path); } +#endif +#ifndef RKEYTABLE_H +#define RKEYTABLE_H +/* + DERIVED FROM HASH TABLE K&R + */ +#include +#include +#include +#include + +typedef struct rnklist { + struct rnklist *next; + struct rnklist *last; + char *name; + char *defn; +} rnklist; + +static rnklist *rkeytab = NULL; + +rnklist *rlkget(char *s) { + rnklist *np; + for (np = rkeytab; np != NULL; np = np->next) + if (strcmp(s, np->name) == 0) + return np; // Found + return NULL; // Not found +} + +char *rkget(char *s) { + rnklist *np = rlkget(s); + return np ? np->defn : NULL; +} + +rnklist *rkset(char *name, char *defn) { + rnklist *np; + if ((np = (rlkget(name))) == NULL) { // Not found + np = (rnklist *)malloc(sizeof(rnklist)); + np->name = strdup(name); + np->next = NULL; + np->last = NULL; + + if (defn) { + np->defn = strdup(defn); + } else { + np->defn = NULL; + } + + if (rkeytab == NULL) { + rkeytab = np; + rkeytab->last = np; + } else { + if (rkeytab->last) + rkeytab->last->next = np; + + rkeytab->last = np; + } + } else { + if (np->defn) + free((void *)np->defn); + if (defn) { + np->defn = strdup(defn); + } else { + np->defn = NULL; + } + } + return np; +} +#endif +#ifndef RHASHTABLE_H +#define RHASHTABLE_H +/* + ORIGINAL SOURCE IS FROM K&R + */ +#include +#include +#include + +#define HASHSIZE 101 + +// Structure for the table entries +typedef struct rnlist { + struct rnlist *next; + char *name; + char *defn; +} rnlist; + +// Hash table array +static rnlist *rhashtab[HASHSIZE]; + +// Hash function +unsigned rhash(char *s) { + unsigned hashval; + for (hashval = 0; *s != '\0'; s++) + hashval = *s + 31 * hashval; + return hashval % HASHSIZE; +} + +rnlist *rlget(char *s) { + rnlist *np; + for (np = rhashtab[rhash(s)]; np != NULL; np = np->next) + if (strcmp(s, np->name) == 0) + return np; // Found + return NULL; // Not found +} + +// Lookup function +char *rget(char *s) { + rnlist *np = rlget(s); + return np ? np->defn : NULL; +} + +// Install function (adds a name and definition to the table) +struct rnlist *rset(char *name, char *defn) { + struct rnlist *np = NULL; + unsigned hashval; + + if ((rlget(name)) == NULL) { // Not found + np = (struct rnlist *)malloc(sizeof(*np)); + if (np == NULL || (np->name = strdup(name)) == NULL) + return NULL; + hashval = rhash(name); + np->next = rhashtab[hashval]; + rhashtab[hashval] = np; + } else { + if (np->defn) + free((void *)np->defn); + np->defn = NULL; + } + if ((np->defn = strdup(defn)) == NULL) + return NULL; + return np; +} +#endif +#ifndef RREX3_H +#define RREX3_H +#include +#include +#include +#include +#include +#include +#include +#ifndef RREX3_DEBUG +#define RREX3_DEBUG 0 +#endif + +struct rrex3_t; + +typedef void (*rrex3_function)(struct rrex3_t *); + +typedef struct rrex3_t { + void (*functions[254])(struct rrex3_t *); + void (*slash_functions[254])(struct rrex3_t *); + bool valid; + int match_count; + int match_capacity; + char **matches; + bool exit; + char *__expr; + char *__str; + char *_expr; + char *_str; + char *expr; + char *str; + char *compiled; + bool inside_brackets; + bool inside_parentheses; + bool pattern_error; + bool match_from_start; + char bytecode; + rrex3_function function; + struct { + void (*function)(struct rrex3_t *); + char *expr; + char *str; + char bytecode; + } previous; + struct { + void (*function)(struct rrex3_t *); + char *expr; + char *str; + char bytecode; + } failed; +} rrex3_t; + +static bool isdigitrange(char *s) { + if (!isdigit(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isdigit(*(s + 2)); +} + +static bool isalpharange(char *s) { + if (!isalpha(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isalpha(*(s + 2)); +} + +void rrex3_free_matches(rrex3_t *rrex3) { + if (!rrex3->matches) + return; + for (int i = 0; i < rrex3->match_count; i++) { + free(rrex3->matches[i]); + } + free(rrex3->matches); + rrex3->matches = NULL; + rrex3->match_count = 0; + rrex3->match_capacity = 0; +} + +void rrex3_free(rrex3_t *rrex3) { + if (!rrex3) + return; + if (rrex3->compiled) { + free(rrex3->compiled); + rrex3->compiled = NULL; + } + rrex3_free_matches(rrex3); + free(rrex3); + rrex3 = NULL; +} +static bool rrex3_move(rrex3_t *, bool); +static void rrex3_set_previous(rrex3_t *); +inline static void rrex3_cmp_asterisk(rrex3_t *); +void rrex3_cmp_literal_range(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Range check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char start = *rrex3->expr; + rrex3->expr++; + rrex3->expr++; + char end = *rrex3->expr; + if (*rrex3->str >= start && *rrex3->str <= end) { + rrex3->str++; + rrex3->valid = true; + } else { + rrex3->valid = false; + } + rrex3->expr++; +} + +bool rrex3_is_function(char chr) { + if (chr == ']' || chr == ')' || chr == '\\' || chr == '?' || chr == '+' || + chr == '*') + return true; + return false; +} + +inline static void rrex3_cmp_literal(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + if (rrex3->inside_brackets) { + if (isalpharange(rrex3->expr) || isdigitrange(rrex3->expr)) { + rrex3_cmp_literal_range(rrex3); + return; + } + } +#if RREX3_DEBUG == 1 + printf("Literal check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); + +#endif + if (*rrex3->expr == 0 && !*rrex3->str) { + printf("ERROR, EMPTY CHECK\n"); + // exit(1); + } + if (rrex3->valid == false) { + rrex3->expr++; + return; + } + + if (*rrex3->expr == *rrex3->str) { + rrex3->expr++; + rrex3->str++; + rrex3->valid = true; + // if(*rrex3->expr &&rrex3->functions[(int)*rrex3->expr] == + // rrex3_cmp_literal && !rrex3->inside_brackets && + //! rrex3_is_function(*rrex3->expr)){ rrex3_cmp_literal(rrex3); + // if(rrex3->valid == false){ + // rrex3->expr--; + // rrex3->valid = true; + // } + // } + return; + } + rrex3->expr++; + rrex3->valid = false; +} + +inline static void rrex3_cmp_dot(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Dot check (any char): %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + rrex3->expr++; + if (!rrex3->valid) { + return; + } + if (*rrex3->str && *rrex3->str != '\n') { + rrex3->str++; + if (*rrex3->expr && *rrex3->expr == '.') { + rrex3_cmp_dot(rrex3); + return; + } /*else if(*rrex3->expr && (*rrex3->expr == '*' || *rrex3->expr == + '+')){ char * next = strchr(rrex3->str,*(rrex3->expr + 1)); char * + space = strchr(rrex3->str,'\n'); if(next && (!space || space > next)){ + rrex3->str = next; + } + }*/ + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_question_mark(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Question mark check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + if (rrex3->valid == false) + rrex3->valid = true; + rrex3->expr++; +} + +inline static void rrex3_cmp_whitespace(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Whitespace check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char c = *rrex3->expr; + rrex3->valid = c == ' ' || c == '\n' || c == '\t'; + if (rrex3->valid) { + rrex3->str++; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_whitespace_upper(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Non whitespace check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char c = *rrex3->expr; + rrex3->valid = !(c == ' ' || c == '\n' || c == '\t'); + if (rrex3->valid) { + rrex3->str++; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_plus2(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Plus check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + if (rrex3->valid) { + rrex3->str--; + } else { + return; + } + char *original_expr = rrex3->expr; + char *next = original_expr + 1; + char *loop_expr = rrex3->previous.expr - 1; + if (*loop_expr == '+') { + rrex3->valid = false; + rrex3->pattern_error = true; + rrex3->expr++; + return; + } + bool success_next = false; + bool success_next_once = false; + bool success_current = false; + char *next_next = NULL; + char *next_str = rrex3->str; + while (*rrex3->str) { + // Check if next matches + char *original_str = rrex3->str; + rrex3->expr = next; + rrex3->valid = true; + if (rrex3_move(rrex3, false)) { + success_next = true; + next_next = rrex3->expr; + next_str = rrex3->str; + success_next_once = true; + } else { + success_next = false; + } + if (success_next_once && !success_next) { + break; + } + // Check if current matches + rrex3->str = original_str; + rrex3->expr = loop_expr; + rrex3->valid = true; + if (!*rrex3->str || !rrex3_move(rrex3, false)) { + success_current = false; + } else { + success_current = true; + if (!success_next) { + next_next = rrex3->expr + 1; // +1 is the * itself + next_str = rrex3->str; + } + } + if (success_next && !success_current) { + break; + } + } + if (!next_next) + rrex3->expr = next; + else { + rrex3->expr = next_next; + } + rrex3->str = next_str; + rrex3->valid = true; +} + +inline static void rrex3_cmp_plus(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->expr++; + return; + } + + char *left = rrex3->previous.expr; + // printf("%s\n",rrex3->str); + char *right = rrex3->expr + 1; + if (*right == ')') { + right++; + } + int right_valid = 0; + bool right_valid_once = false; + char *expr = right; + char *right_str = rrex3->str; + ; + char *right_expr = NULL; + char *str = rrex3->str; + bool first_time = true; + bool left_valid = true; + char *str_prev = NULL; + bool valid_from_start = true; + ; + while (*rrex3->str) { + if (!left_valid && !right_valid) { + break; + } + if (right_valid && !left_valid) { + str = right_str; + break; + } + + rrex3->expr = right; + rrex3->str = str; +#if RREX3_DEBUG == 1 + printf("r"); +#endif + if (*rrex3->str && rrex3_move(rrex3, false)) { + right_valid++; + right_str = rrex3->str; + expr = rrex3->expr; + if (!right_valid_once) { + right_expr = rrex3->expr; + right_valid_once = true; + } + } else { + right_valid = 0; + } + if (first_time) { + first_time = false; + valid_from_start = right_valid; + } + + if (right_valid && !valid_from_start && right_valid > 0) { + expr = right_expr - 1; + ; + if (*(right - 1) == ')') { + expr = right - 1; + } + break; + } + + if ((!right_valid && right_valid_once)) { + expr = right_expr; + if (*(right - 1) == ')') { + str = str_prev; + expr = right - 1; + } + break; + } + + str_prev = str; + rrex3->valid = true; + rrex3->str = str; + rrex3->expr = left; +#if RREX3_DEBUG == 1 + printf("l"); +#endif + if (rrex3_move(rrex3, false)) { + left_valid = true; + + str = rrex3->str; + } else { + left_valid = false; + } + } + + rrex3->expr = expr; + rrex3->str = str; + rrex3->valid = true; + +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_asterisk(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->valid = true; + rrex3->expr++; + return; + } + + rrex3->str = rrex3->previous.str; + char *left = rrex3->previous.expr; + // printf("%s\n",rrex3->str); + char *right = rrex3->expr + 1; + if (*right == ')') { + right++; + } + int right_valid = 0; + bool right_valid_once = false; + char *expr = right; + char *right_str = rrex3->str; + ; + char *right_expr = NULL; + char *str = rrex3->str; + bool first_time = true; + bool left_valid = true; + char *str_prev = NULL; + bool valid_from_start = true; + ; + while (*rrex3->str) { + if (!left_valid && !right_valid) { + break; + } + if (right_valid && !left_valid) { + str = right_str; + break; + } + + rrex3->expr = right; + rrex3->str = str; +#if RREX3_DEBUG == 1 + printf("r"); +#endif + if (*rrex3->str && rrex3_move(rrex3, false)) { + right_valid++; + right_str = rrex3->str; + expr = rrex3->expr; + if (!right_valid_once) { + right_expr = rrex3->expr; + right_valid_once = true; + } + } else { + right_valid = 0; + } + if (first_time) { + first_time = false; + valid_from_start = right_valid; + } + + if (right_valid && !valid_from_start && right_valid > 0) { + expr = right_expr - 1; + if (*(right - 1) == ')') { + expr = right - 1; + } + break; + } + + if ((!right_valid && right_valid_once)) { + expr = right_expr; + if (*(right - 1) == ')') { + str = str_prev; + expr = right - 1; + } + break; + } + + str_prev = str; + rrex3->valid = true; + rrex3->str = str; + rrex3->expr = left; +#if RREX3_DEBUG == 1 + printf("l"); +#endif + if (rrex3_move(rrex3, false)) { + left_valid = true; + str = rrex3->str; + } else { + left_valid = false; + } + } + + rrex3->expr = expr; + rrex3->str = str; + rrex3->valid = true; + +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_asterisk2(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->valid = true; + rrex3->expr++; + return; + } + if (*rrex3->previous.expr == '*') { + // Support for ** + rrex3->valid = false; + // rrex3->pattern_error = true; + rrex3->expr++; + return; + } + rrex3->str = rrex3->previous.str; + ; + char *next = rrex3->expr + 1; + char *next_original = NULL; + if (*next == '*') { + next++; + } + if (*next == ')' && *(next + 1)) { + next_original = next; + next++; + } + char *loop_expr = rrex3->previous.expr; + bool success_next = false; + bool success_next_once = false; + bool success_current = false; + char *right_next = NULL; + char *right_str = rrex3->str; + while (*rrex3->str && *rrex3->expr && *rrex3->expr != ')') { + // Remember original_str because it's modified + // by checking right and should be restored + // for checking left so they're matching the + // same value. + char *original_str = rrex3->str; + // Check if right matches. + // if(*next != ')'){ + rrex3->expr = next; + rrex3->valid = true; + if (rrex3_move(rrex3, false)) { + // Match rright. + success_next = true; + if (!next_original) { + if (!success_next_once) { + right_next = rrex3->expr; + } + + } else { + right_next = next_original; + break; + } + right_str = rrex3->str; + success_next_once = true; + } else { + // No match Right. + success_next = false; + } + //} + if (success_next_once && !success_next) { + // Matched previous time but now doesn't. + break; + } + // Check if left matches. + rrex3->str = original_str; + rrex3->expr = loop_expr; + rrex3->valid = true; + if (!rrex3_move(rrex3, false)) { + // No match left. + success_current = false; + } else { + // Match left. + success_current = true; + // NOT SURE< WITHOUT DOET HETZELFDE: + // original_str = rrex3->str; + if (!success_next) { + right_str = rrex3->str; + if (*rrex3->expr != ')') { + right_next = rrex3->expr + 1; // +1 is the * itself + + } else { + + // break; + } + } + } + + if ((success_next && !success_current) || + (!success_next && !success_current)) { + break; + } + } + rrex3->expr = right_next; + rrex3->str = right_str; + rrex3->valid = true; +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_roof(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); +#if RREX3_DEBUG == 1 + printf("expr, *rrex3->str, rrex3->valid); +#endif + rrex3->valid = rrex3->str == rrex3->_str; + rrex3->match_from_start = true; + rrex3->expr++; +} +inline static void rrex3_cmp_dollar(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + +#if RREX3_DEBUG == 1 + printf("Dollar check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (*rrex3->str || !rrex3->valid) { + rrex3->valid = false; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_w(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("Word check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (isalpha(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} +inline static void rrex3_cmp_w_upper(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("!Word check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (!isalpha(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_d(rrex3_t *rrex3) { + + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("Digit check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (isdigit(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} +inline static void rrex3_cmp_d_upper(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("!Digit check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (!isdigit(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_slash(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; + + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->slash_functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); +} + +inline static int collect_digits(rrex3_t *rrex3) { + char output[20]; + unsigned int digit_count = 0; + while (isdigit(*rrex3->expr)) { + + output[digit_count] = *rrex3->expr; + rrex3->expr++; + digit_count++; + } + output[digit_count] = 0; + return atoi(output); +} + +inline static void rrex3_cmp_range(rrex3_t *rrex3) { + char *loop_code = rrex3->previous.expr; + char *expr_original = rrex3->expr; + rrex3->expr++; + int range_start = collect_digits(rrex3) - 1; + int range_end = 0; + if (*rrex3->expr == ',') { + rrex3->expr++; + range_end = collect_digits(rrex3); + } + rrex3->expr++; + int times_valid = 0; + while (*rrex3->str) { + rrex3->expr = loop_code; + rrex3_move(rrex3, false); + if (rrex3->valid == false) { + break; + } else { + times_valid++; + } + if (range_end) { + if (times_valid >= range_start && times_valid == range_end - 1) { + rrex3->valid = true; + } else { + rrex3->valid = false; + } + break; + } else if (range_start) { + if (times_valid == range_start) { + rrex3->valid = true; + break; + } + } + } + rrex3->valid = times_valid >= range_start; + if (rrex3->valid && range_end) { + rrex3->valid = times_valid <= range_end; + } + rrex3->expr = strchr(expr_original, '}') + 1; +} + +inline static void rrex3_cmp_word_start_or_end(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + if (*rrex3->expr != 'B') { + printf("Check word start or end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); + } + +#endif + rrex3_set_previous(rrex3); + bool valid = false; + if (isalpha(*rrex3->str)) { + if (rrex3->_str != rrex3->str) { + if (!isalpha(*(rrex3->str - 1))) { + valid = true; + } + } else { + valid = true; + } + } else if (isalpha(isalpha(*rrex3->str) && !isalpha(*rrex3->str + 1))) { + valid = true; + } + rrex3->expr++; + rrex3->valid = valid; +} +inline static void rrex3_cmp_word_not_start_or_end(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Check word NOT start or end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); + +#endif + rrex3_set_previous(rrex3); + + rrex3_cmp_word_start_or_end(rrex3); + rrex3->valid = !rrex3->valid; +} + +inline static void rrex3_cmp_brackets(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintb("\\l Brackets start: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + char *original_expr = rrex3->expr; + rrex3->expr++; + rrex3->inside_brackets = true; + bool valid_once = false; + bool reversed = false; + if (*rrex3->expr == '^') { + reversed = true; + rrex3->expr++; + } + bool valid = false; + while (*rrex3->expr != ']' && *rrex3->expr != 0) { + rrex3->valid = true; + valid = rrex3_move(rrex3, false); + if (reversed) { + valid = !valid; + } + if (valid) { + valid_once = true; + if (!reversed) { + valid_once = true; + break; + } + } else { + if (reversed) { + valid_once = false; + break; + } + } + } + if (valid_once && reversed) { + rrex3->str++; + } + while (*rrex3->expr != ']' && *rrex3->expr != 0) + rrex3->expr++; + if (*rrex3->expr != 0) + rrex3->expr++; + + rrex3->valid = valid_once; + rrex3->inside_brackets = false; + char *previous_expr = rrex3->expr; + rrex3->expr = original_expr; + rrex3_set_previous(rrex3); + rrex3->expr = previous_expr; +#if RREX3_DEBUG == 1 + rprintb("\\l Brackets end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_pipe(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + +#if RREX3_DEBUG == 1 + printf("Pipe check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (rrex3->valid == true) { + rrex3->exit = true; + } else { + rrex3->valid = true; + } + rrex3->expr++; +} +inline static void rrex3_cmp_parentheses(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprinty("\\l Parentheses start check: %c:%c:%d\n", *rrex3->expr, + *rrex3->str, rrex3->valid); +#endif + + rrex3_set_previous(rrex3); + if (!rrex3->valid) { + rrex3->expr++; + return; + } + if (rrex3->match_count == rrex3->match_capacity) { + + rrex3->match_capacity++; + rrex3->matches = (char **)realloc( + rrex3->matches, rrex3->match_capacity * sizeof(char *)); + } + rrex3->matches[rrex3->match_count] = (char *)malloc(strlen(rrex3->str) + 1); + strcpy(rrex3->matches[rrex3->match_count], rrex3->str); + char *original_expr = rrex3->expr; + char *original_str = rrex3->str; + rrex3->expr++; + rrex3->inside_parentheses = true; + while (*rrex3->expr != ')' && !rrex3->exit) { + rrex3_move(rrex3, false); + } + while (*rrex3->expr != ')') { + rrex3->expr++; + } + rrex3->expr++; + rrex3->inside_parentheses = false; + + char *previous_expr = rrex3->expr; + rrex3->expr = original_expr; + rrex3_set_previous(rrex3); + rrex3->expr = previous_expr; + if (rrex3->valid == false) { + rrex3->str = original_str; + free(rrex3->matches[rrex3->match_count]); + } else { + rrex3->matches[rrex3->match_count] + [strlen(rrex3->matches[rrex3->match_count]) - + strlen(rrex3->str)] = 0; + rrex3->match_count++; + } +#if RREX3_DEBUG == 1 + rprinty("\\l Parentheses end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_reset(rrex3_t *rrex3) { + rrex3_free_matches(rrex3); + rrex3->valid = true; + rrex3->pattern_error = false; + rrex3->inside_brackets = false; + rrex3->inside_parentheses = false; + rrex3->exit = false; + rrex3->previous.expr = NULL; + rrex3->previous.str = NULL; + rrex3->previous.bytecode = 0; + rrex3->failed.expr = NULL; + rrex3->failed.str = NULL; + rrex3->failed.bytecode = 0; + rrex3->match_from_start = false; +} + +void rrex3_init(rrex3_t *rrex3) { + for (__uint8_t i = 0; i < 254; i++) { + rrex3->functions[i] = rrex3_cmp_literal; + rrex3->slash_functions[i] = rrex3_cmp_literal; + } + rrex3->functions['?'] = rrex3_cmp_question_mark; + rrex3->functions['^'] = rrex3_cmp_roof; + rrex3->functions['$'] = rrex3_cmp_dollar; + rrex3->functions['.'] = rrex3_cmp_dot; + rrex3->functions['*'] = rrex3_cmp_asterisk; + rrex3->functions['+'] = rrex3_cmp_plus; + rrex3->functions['|'] = rrex3_cmp_pipe; + rrex3->functions['\\'] = rrex3_cmp_slash; + rrex3->functions['{'] = rrex3_cmp_range; + rrex3->functions['['] = rrex3_cmp_brackets; + rrex3->functions['('] = rrex3_cmp_parentheses; + rrex3->slash_functions['w'] = rrex3_cmp_w; + rrex3->slash_functions['W'] = rrex3_cmp_w_upper; + rrex3->slash_functions['d'] = rrex3_cmp_d; + rrex3->slash_functions['D'] = rrex3_cmp_d_upper; + rrex3->slash_functions['s'] = rrex3_cmp_whitespace; + rrex3->slash_functions['S'] = rrex3_cmp_whitespace_upper; + rrex3->slash_functions['b'] = rrex3_cmp_word_start_or_end; + rrex3->slash_functions['B'] = rrex3_cmp_word_not_start_or_end; + rrex3->match_count = 0; + rrex3->match_capacity = 0; + rrex3->matches = NULL; + rrex3->compiled = NULL; + + rrex3_reset(rrex3); +} + +rrex3_t *rrex3_new() { + rrex3_t *rrex3 = (rrex3_t *)malloc(sizeof(rrex3_t)); + + rrex3_init(rrex3); + + return rrex3; +} + +rrex3_t *rrex3_compile(rrex3_t *rrex, char *expr) { + + rrex3_t *rrex3 = rrex ? rrex : rrex3_new(); + + char *compiled = (char *)malloc(strlen(expr) + 1); + unsigned int count = 0; + while (*expr) { + if (*expr == '[' && *(expr + 2) == ']') { + *compiled = *(expr + 1); + expr++; + expr++; + } else if (*expr == '[' && *(expr + 1) == '0' && *(expr + 2) == '-' && + *(expr + 3) == '9' && *(expr + 4) == ']') { + *compiled = '\\'; + compiled++; + *compiled = 'd'; + count++; + expr++; + expr++; + expr++; + expr++; + } else { + *compiled = *expr; + } + if (*compiled == '[') { + // in_brackets = true; + + } else if (*compiled == ']') { + // in_brackets = false; + } + expr++; + compiled++; + count++; + } + *compiled = 0; + compiled -= count; + rrex3->compiled = compiled; + return rrex3; +} + +inline static void rrex3_set_previous(rrex3_t *rrex3) { + rrex3->previous.function = rrex3->function; + rrex3->previous.expr = rrex3->expr; + rrex3->previous.str = rrex3->str; + rrex3->previous.bytecode = *rrex3->expr; +} + +static bool rrex3_move(rrex3_t *rrex3, bool resume_on_fail) { + char *original_expr = rrex3->expr; + char *original_str = rrex3->str; + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); + if (!*rrex3->expr && !*rrex3->str) { + rrex3->exit = true; + return rrex3->valid; + } else if (!*rrex3->expr) { + // rrex3->valid = true; + return rrex3->valid; + } + if (rrex3->pattern_error) { + rrex3->valid = false; + return rrex3->valid; + } + if (resume_on_fail && !rrex3->valid && *rrex3->expr) { + + // rrex3_set_previous(rrex3); + rrex3->failed.bytecode = rrex3->bytecode; + rrex3->failed.function = rrex3->function; + rrex3->failed.expr = original_expr; + rrex3->failed.str = original_str; + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); + + if (!rrex3->valid && !rrex3->pattern_error) { + + if (*rrex3->str) { + char *pipe_position = strstr(rrex3->expr, "|"); + if (pipe_position != NULL) { + rrex3->expr = pipe_position + 1; + rrex3->str = rrex3->_str; + rrex3->valid = true; + return true; + } + } + if (rrex3->match_from_start) { + rrex3->valid = false; + return rrex3->valid; + } + if (!*rrex3->str++) { + rrex3->valid = false; + return rrex3->valid; + } + rrex3->expr = rrex3->_expr; + if (*rrex3->str) + rrex3->valid = true; + } + } else { + } + return rrex3->valid; +} + +rrex3_t *rrex3(rrex3_t *rrex3, char *str, char *expr) { +#if RREX3_DEBUG == 1 + printf("Regex check: %s:%s:%d\n", expr, str, 1); +#endif + bool self_initialized = false; + if (rrex3 == NULL) { + self_initialized = true; + rrex3 = rrex3_new(); + } else { + rrex3_reset(rrex3); + } + + rrex3->_str = str; + rrex3->_expr = rrex3->compiled ? rrex3->compiled : expr; + rrex3->str = rrex3->_str; + rrex3->expr = rrex3->_expr; + while (*rrex3->expr && !rrex3->exit) { + if (!rrex3_move(rrex3, true)) + return NULL; + } + rrex3->expr = rrex3->_expr; + if (rrex3->valid) { + + return rrex3; + } else { + if (self_initialized) { + rrex3_free(rrex3); + } + return NULL; + } +} + +void rrex3_test() { + rrex3_t *rrex = rrex3_new(); + + assert(rrex3(rrex, "\"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.*)\"\"(.*)\"\"(.*)\"")); + + assert(rrex3(rrex, "aaaaaaa", "a*a$")); + + // assert(rrex3("ababa", "a*b*a*b*a$")); + assert(rrex3(rrex, "#include\"test.h\"a", "#include.*\".*\"a$")); + assert(rrex3(rrex, "#include \"test.h\"a", "#include.*\".*\"a$")); + assert(rrex3(rrex, "aaaaaad", "a*d$")); + assert(rrex3(rrex, "abcdef", "abd?cdef")); + assert(!rrex3(rrex, "abcdef", "abd?def")); + assert(rrex3(rrex, "abcdef", "def")); + assert(!rrex3(rrex, "abcdef", "^def")); + assert(rrex3(rrex, "abcdef", "def$")); + assert(!rrex3(rrex, "abcdef", "^abc$")); + assert(rrex3(rrex, "aB!.#1", "......")); + assert(!rrex3(rrex, "aB!.#\n", " ......")); + assert(!rrex3(rrex, "aaaaaad", "q+d$")); + assert(rrex3(rrex, "aaaaaaa", "a+a$")); + assert(rrex3(rrex, "aaaaaad", "q*d$")); + assert(!rrex3(rrex, "aaaaaad", "^q*d$")); + + // Asterisk function + assert(rrex3(rrex, "123321", "123*321")); + assert(rrex3(rrex, "pony", "p*ony")); + assert(rrex3(rrex, "pppony", "p*ony")); + assert(rrex3(rrex, "ppony", "p*pony")); + assert(rrex3(rrex, "pppony", "pp*pony")); + assert(rrex3(rrex, "pppony", ".*pony")); + assert(rrex3(rrex, "pony", ".*ony")); + assert(rrex3(rrex, "pony", "po*ny")); + // assert(rrex3(rrex,"ppppony", "p*pppony")); + + // Plus function + assert(rrex3(rrex, "pony", "p+ony")); + assert(!rrex3(rrex, "ony", "p+ony")); + assert(rrex3(rrex, "ppony", "p+pony")); + assert(rrex3(rrex, "pppony", "pp+pony")); + assert(rrex3(rrex, "pppony", ".+pony")); + assert(rrex3(rrex, "pony", ".+ony")); + assert(rrex3(rrex, "pony", "po+ny")); + + // Slash functions + assert(rrex3(rrex, "a", "\\w")); + assert(!rrex3(rrex, "1", "\\w")); + assert(rrex3(rrex, "1", "\\W")); + assert(!rrex3(rrex, "a", "\\W")); + assert(rrex3(rrex, "a", "\\S")); + assert(!rrex3(rrex, " ", "\\s")); + assert(!rrex3(rrex, "\t", "\\s")); + assert(!rrex3(rrex, "\n", "\\s")); + assert(rrex3(rrex, "1", "\\d")); + assert(!rrex3(rrex, "a", "\\d")); + assert(rrex3(rrex, "a", "\\D")); + assert(!rrex3(rrex, "1", "\\D")); + assert(rrex3(rrex, "abc", "\\b")); + + assert(rrex3(rrex, "abc", "\\babc")); + assert(!rrex3(rrex, "abc", "a\\b")); + assert(!rrex3(rrex, "abc", "ab\\b")); + assert(!rrex3(rrex, "abc", "abc\\b")); + assert(rrex3(rrex, "abc", "a\\Bbc")); + assert(rrex3(rrex, "abc", "ab\\B")); + assert(!rrex3(rrex, "1ab", "1\\Bab")); + assert(rrex3(rrex, "abc", "a\\Bbc")); + + // Escaping of special chars + assert(rrex3(rrex, "()+*.\\", "\\(\\)\\+\\*\\.\\\\")); + + // Pipe + // assert(rrex3(rrex,"abc","abc|def")); + assert(rrex3(rrex, "abc", "def|jkl|abc")); + assert(rrex3(rrex, "abc", "abc|def")); + + assert(rrex3(rrex, "rhq", "def|rhq|rha")); + assert(rrex3(rrex, "abc", "abc|def")); + + // Repeat + assert(rrex3(rrex, "aaaaa", "a{4}")); + + assert(rrex3(rrex, "aaaa", "a{1,3}a")); + + // Range + assert(rrex3(rrex, "abc", "[abc][abc][abc]$")); + assert(rrex3(rrex, "def", "[^abc][^abc][^abc]$")); + assert(rrex3(rrex, "defabc", "[^abc][^abc][^abc]abc")); + assert(rrex3(rrex, "0-9", "0-9")); + assert(rrex3(rrex, "55-9", "[^6-9]5-9$")); + assert(rrex3(rrex, "a", "[a-z]$")); + assert(rrex3(rrex, "A", "[A-Z]$")); + assert(rrex3(rrex, "5", "[0-9]$")); + assert(!rrex3(rrex, "a", "[^a-z]$")); + assert(!rrex3(rrex, "A", "[^A-Z]$")); + assert(!rrex3(rrex, "5", "[^0-9]$")); + assert(rrex3(rrex, "123abc", "[0-9]*abc$")); + assert(rrex3(rrex, "123123", "[0-9]*$")); + + // Parentheses + + assert(rrex3(rrex, "datadata", "(data)*")); + + assert(rrex3(rrex, "datadatapony", "(data)*pony$")); + + assert(!rrex3(rrex, "datadatapony", "(d*p*ata)*pond$")); + assert(rrex3(rrex, "datadatadato", "(d*p*ata)*dato")); + assert(rrex3(rrex, "datadatadato", "(d*p*ata)*dato$")); + assert(!rrex3(rrex, "datadatadato", "(d*p*a*ta)*gato$")); + + // Matches + assert(rrex3(rrex, "123", "(123)")); + assert(!strcmp(rrex->matches[0], "123")); + + assert(rrex3(rrex, "123321a", "(123)([0-4][2]1)a$")); + assert(!strcmp(rrex->matches[1], "321")); + + assert(rrex3(rrex, "123321a", "(123)([0-4][2]1)a$")); + assert(!strcmp(rrex->matches[1], "321")); + + assert(rrex3(rrex, "aaaabc", "(.*)c")); + + assert(rrex3(rrex, "abcde", ".....$")); + + assert(rrex3(rrex, "abcdefghijklmnopqrstuvwxyz", + "..........................$")); + // printf("(%d)\n", rrex->valid); + + assert(rrex3(rrex, "#include ", "#include.*<(.*)>")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(rrex3(rrex, "#include \"stdlib.h\"", "#include.\"(.*)\"")); + assert(!strcmp(rrex->matches[0], "stdlib.h")); + assert(rrex3(rrex, "\"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.*)\"\"(.*)\"\"(.*)\"")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(!strcmp(rrex->matches[1], "string.h")); + assert(!strcmp(rrex->matches[2], "sys/time.h")); + + assert(rrex3(rrex, " #include ", "#include.+<(.+)>")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(rrex3(rrex, " #include \"stdlib.h\"", "#include.+\"(.+)\"")); + assert(!strcmp(rrex->matches[0], "stdlib.h")); + + assert(rrex3(rrex, " \"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.+)\"\"(.+)\"\"(.+)\"")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(!strcmp(rrex->matches[1], "string.h")); + assert(!strcmp(rrex->matches[2], "sys/time.h")); + + assert(rrex3(rrex, "int abc ", "int (.*)[; ]?$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "int abc;", "int (.*)[; ]?$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "int abc", "int (.*)[; ]?$")); + assert(!strcmp(rrex->matches[0], "abc")); + + rrex3_free(rrex); +} +#endif +#ifndef RARENA_H +#define RARENA_H + +#include +#include + +typedef struct arena_t { + unsigned char *memory; + unsigned int pointer; + unsigned int size; +} arena_t; + +arena_t *arena_construct() { + arena_t *arena = (arena_t *)rmalloc(sizeof(arena_t)); + arena->memory = NULL; + arena->pointer = 0; + arena->size = 0; + return arena; +} + +arena_t *arena_new(size_t size) { + arena_t *arena = arena_construct(); + arena->memory = (unsigned char *)rmalloc(size); + arena->size = size; + return arena; +} + +void *arena_alloc(arena_t *arena, size_t size) { + if (arena->pointer + size > arena->size) { + return NULL; + } + void *p = arena->memory + arena->pointer; + arena->pointer += size; + return p; +} + +void arena_free(arena_t *arena) { + // Just constructed and unused arena memory is NULL so no free needed + if (arena->memory) { + rfree(arena->memory); + } + rfree(arena); +} + +void arena_reset(arena_t *arena) { arena->pointer = 0; } +#endif +#ifndef RLIB_RIO +#define RLIB_RIO +#include +#include +#include +#include +#include +#include +#include +#include + +bool rfile_exists(char *path) { + struct stat s; + return !stat(path, &s); +} + +void rjoin_path(char *p1, char *p2, char *output) { + output[0] = 0; + strcpy(output, p1); + + if (output[strlen(output) - 1] != '/') { + char slash[] = "/"; + strcat(output, slash); + } + if (p2[0] == '/') { + p2++; + } + strcat(output, p2); +} + +int risprivatedir(const char *path) { + struct stat statbuf; + + if (stat(path, &statbuf) != 0) { + perror("stat"); + return -1; + } + + if (!S_ISDIR(statbuf.st_mode)) { + return -2; + } + + if ((statbuf.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) == S_IRWXU) { + return 1; // Private (owner has all permissions, others have none) + } + + return 0; +} +bool risdir(const char *path) { return !risprivatedir(path); } + +void rforfile(char *path, void callback(char *)) { + if (!rfile_exists(path)) + return; + DIR *dir = opendir(path); + struct dirent *d; + while ((d = readdir(dir)) != NULL) { + if (!d) + break; + + if ((d->d_name[0] == '.' && strlen(d->d_name) == 1) || + d->d_name[1] == '.') { + continue; + } + char full_path[4096]; + rjoin_path(path, d->d_name, full_path); + + if (risdir(full_path)) { + callback(full_path); + rforfile(full_path, callback); + } else { + callback(full_path); + } + } + closedir(dir); +} + +bool rfd_wait(int fd, int ms) { + + fd_set read_fds; + struct timeval timeout; + + FD_ZERO(&read_fds); + FD_SET(fd, &read_fds); + + timeout.tv_sec = 0; + timeout.tv_usec = 1000 * ms; // 100 milliseconds timeout + + int ret = select(fd + 1, &read_fds, NULL, NULL, &timeout); + return ret > 0 && FD_ISSET(fd, &read_fds); +} + +bool rfd_wait_forever(int fd) { + while ((!rfd_wait(fd, 10))) { + } + return true; +} + +size_t rfile_size(char *path) { + struct stat s; + stat(path, &s); + return s.st_size; +} + +size_t rfile_readb(char *path, void *data, size_t size) { + FILE *fd = fopen(path, "r"); + if (!fd) { + return 0; + } + __attribute__((unused)) size_t bytes_read = + fread(data, size, sizeof(char), fd); + + fclose(fd); + return size; +} + +#endif +#ifndef RSTRING_H +#define RSTRING_H +#include +#include +#include +#include +#include + +unsigned long _r_generate_key_current = 0; + +char *_rcat_int_int(int a, int b) { + static char res[20]; + res[0] = 0; + sprintf(res, "%d%d", a, b); + return res; +} +char *_rcat_int_double(int a, double b) { + static char res[20]; + res[0] = 0; + sprintf(res, "%d%f", a, b); + return res; +} + +char *_rcat_charp_int(char *a, int b) { + char res[20]; + sprintf(res, "%c", b); + return strcat(a, res); +} + +char *_rcat_charp_double(char *a, double b) { + char res[20]; + sprintf(res, "%f", b); + return strcat(a, res); +} + +char *_rcat_charp_charp(char *a, char *b) { + ; + return strcat(a, b); +} +char *_rcat_charp_char(char *a, char b) { + char extra[] = {b, 0}; + return strcat(a, extra); +} +char *_rcat_charp_bool(char *a, bool *b) { + if (b) { + return strcat(a, "true"); + } else { + return strcat(a, "false"); + } +} + +#define rcat(x, y) \ + _Generic((x), \ + int: _Generic((y), \ + int: _rcat_int_int, \ + double: _rcat_int_double, \ + char *: _rcat_charp_charp), \ + char *: _Generic((y), \ + int: _rcat_charp_int, \ + double: _rcat_charp_double, \ + char *: _rcat_charp_charp, \ + char: _rcat_charp_char, \ + bool: _rcat_charp_bool))((x), (y)) + +char *rgenerate_key() { + _r_generate_key_current++; + static char key[100]; + key[0] = 0; + sprintf(key, "%ld", _r_generate_key_current); + return key; +} + +char *rformat_number(long lnumber) { + static char formatted[1024]; + + char number[1024]; + sprintf(number, "%ld", lnumber); + + int len = strlen(number); + int commas_needed = (len - 1) / 3; + int new_len = len + commas_needed; + + formatted[new_len] = '\0'; + + int i = len - 1; + int j = new_len - 1; + int count = 0; + + while (i >= 0) { + if (count == 3) { + formatted[j--] = '.'; + count = 0; + } + formatted[j--] = number[i--]; + count++; + } + return formatted; +} + +bool rstrextractdouble(char *str, double *d1) { + for (size_t i = 0; i < strlen(str); i++) { + if (isdigit(str[i])) { + str += i; + sscanf(str, "%lf", d1); + return true; + } + } + return false; +} + +void rstrstripslashes(const char *content, char *result) { + size_t content_length = strlen((char *)content); + unsigned int index = 0; + for (unsigned int i = 0; i < content_length; i++) { + char c = content[i]; + if (c == '\\') { + i++; + c = content[i]; + if (c == 'r') { + c = '\r'; + } else if (c == 't') { + c = '\t'; + } else if (c == 'b') { + c = '\b'; + } else if (c == 'n') { + c = '\n'; + } else if (c == 'f') { + c = '\f'; + } else if (c == '\\') { + // No need tbh + c = '\\'; + } + } + result[index] = c; + index++; + } + result[index] = 0; +} + +int rstrstartswith(const char *s1, const char *s2) { + if (s1 == NULL) + return s2 == NULL; + if (s1 == s2 || s2 == NULL || *s2 == 0) + return true; + size_t len_s2 = strlen(s2); + size_t len_s1 = strlen(s1); + if (len_s2 > len_s1) + return false; + return !strncmp(s1, s2, len_s2); +} + +bool rstrendswith(const char *s1, const char *s2) { + if (s1 == NULL) + return s2 == NULL; + if (s1 == s2 || s2 == NULL || *s2 == 0) + return true; + size_t len_s2 = strlen(s2); + size_t len_s1 = strlen(s1); + if (len_s2 > len_s1) { + return false; + } + s1 += len_s1 - len_s2; + return !strncmp(s1, s2, len_s2); +} + +void rstraddslashes(const char *content, char *result) { + size_t content_length = strlen((char *)content); + unsigned int index = 0; + for (unsigned int i = 0; i < content_length; i++) { + if (content[i] == '\r') { + result[index] = '\\'; + index++; + result[index] = 'r'; + index++; + continue; + } else if (content[i] == '\t') { + result[index] = '\\'; + index++; + result[index] = 't'; + index++; + continue; + } else if (content[i] == '\n') { + result[index] = '\\'; + index++; + result[index] = 'n'; + index++; + continue; + } else if (content[i] == '\\') { + result[index] = '\\'; + index++; + result[index] = '\\'; + index++; + continue; + } else if (content[i] == '\b') { + result[index] = '\\'; + index++; + result[index] = 'b'; + index++; + continue; + } else if (content[i] == '\f') { + result[index] = '\\'; + index++; + result[index] = 'f'; + index++; + continue; + } + result[index] = content[i]; + index++; + } + result[index] = 0; +} + +int rstrip_whitespace(char *input, char *output) { + output[0] = 0; + int count = 0; + size_t len = strlen(input); + for (size_t i = 0; i < len; i++) { + if (input[i] == '\t' || input[i] == ' ' || input[i] == '\n') { + continue; + } + count = i; + size_t j; + for (j = 0; j < len - count; j++) { + output[j] = input[j + count]; + } + output[j] = '\0'; + break; + } + return count; +} + +void rstrtocstring(const char *input, char *output) { + int index = 0; + char clean_input[strlen(input) * 2]; + char *iptr = clean_input; + rstraddslashes(input, clean_input); + output[index] = '"'; + index++; + while (*iptr) { + if (*iptr == '"') { + output[index] = '\\'; + output++; + } else if (*iptr == '\\' && *(iptr + 1) == 'n') { + output[index] = '\\'; + output++; + output[index] = 'n'; + output++; + output[index] = '"'; + output++; + output[index] = '\n'; + output++; + output[index] = '"'; + output++; + iptr++; + iptr++; + continue; + } + output[index] = *iptr; + index++; + iptr++; + } + if (output[index - 1] == '"' && output[index - 2] == '\n') { + output[index - 1] = 0; + } else if (output[index - 1] != '"') { + output[index] = '"'; + output[index + 1] = 0; + } +} + +size_t rstrtokline(char *input, char *output, size_t offset, bool strip_nl) { + + size_t len = strlen(input); + output[0] = 0; + size_t new_offset = 0; + size_t j; + size_t index = 0; + + for (j = offset; j < len + offset; j++) { + if (input[j] == 0) { + index++; + break; + } + index = j - offset; + output[index] = input[j]; + + if (output[index] == '\n') { + index++; + break; + } + } + output[index] = 0; + + new_offset = index + offset; + + if (strip_nl) { + if (output[index - 1] == '\n') { + output[index - 1] = 0; + } + } + return new_offset; +} + +void rstrjoin(char **lines, size_t count, char *glue, char *output) { + output[0] = 0; + for (size_t i = 0; i < count; i++) { + strcat(output, lines[i]); + if (i != count - 1) + strcat(output, glue); + } +} + +int rstrsplit(char *input, char **lines) { + int index = 0; + size_t offset = 0; + char line[1024]; + while ((offset = rstrtokline(input, line, offset, false)) && *line) { + if (!*line) { + break; + } + lines[index] = (char *)malloc(strlen(line) + 1); + strcpy(lines[index], line); + index++; + } + return index; +} + +bool rstartswithnumber(char *str) { return isdigit(str[0]); } + +void rstrmove2(char *str, unsigned int start, size_t length, + unsigned int new_pos) { + size_t str_len = strlen(str); + char new_str[str_len + 1]; + memset(new_str, 0, str_len); + if (start < new_pos) { + strncat(new_str, str + length, str_len - length - start); + new_str[new_pos] = 0; + strncat(new_str, str + start, length); + strcat(new_str, str + strlen(new_str)); + memset(str, 0, str_len); + strcpy(str, new_str); + } else { + strncat(new_str, str + start, length); + strncat(new_str, str, start); + strncat(new_str, str + start + length, str_len - start); + memset(str, 0, str_len); + strcpy(str, new_str); + } + new_str[str_len] = 0; +} + +void rstrmove(char *str, unsigned int start, size_t length, + unsigned int new_pos) { + size_t str_len = strlen(str); + if (start >= str_len || new_pos >= str_len || start + length > str_len) { + return; + } + char temp[length + 1]; + strncpy(temp, str + start, length); + temp[length] = 0; + if (start < new_pos) { + memmove(str + start, str + start + length, new_pos - start); + strncpy(str + new_pos - length + 1, temp, length); + } else { + memmove(str + new_pos + length, str + new_pos, start - new_pos); + strncpy(str + new_pos, temp, length); + } +} + +int cmp_line(const void *left, const void *right) { + char *l = *(char **)left; + char *r = *(char **)right; + + char lstripped[strlen(l) + 1]; + rstrip_whitespace(l, lstripped); + char rstripped[strlen(r) + 1]; + rstrip_whitespace(r, rstripped); + + double d1, d2; + bool found_d1 = rstrextractdouble(lstripped, &d1); + bool found_d2 = rstrextractdouble(rstripped, &d2); + + if (found_d1 && found_d2) { + double frac_part1; + double int_part1; + frac_part1 = modf(d1, &int_part1); + double frac_part2; + double int_part2; + frac_part2 = modf(d2, &int_part2); + if (d1 == d2) { + return strcmp(lstripped, rstripped); + } else if (frac_part1 && frac_part2) { + return d1 > d2; + } else if (frac_part1 && !frac_part2) { + return 1; + } else if (frac_part2 && !frac_part1) { + return -1; + } else if (!frac_part1 && !frac_part2) { + return d1 > d2; + } + } + return 0; +} + +int rstrsort(char *input, char *output) { + char **lines = (char **)malloc(strlen(input) * 10); + int line_count = rstrsplit(input, lines); + qsort(lines, line_count, sizeof(char *), cmp_line); + rstrjoin(lines, line_count, "", output); + free(lines); + return line_count; +} + +#endif +#ifndef RLIB_TERMINAL_H +#define RLIB_TERMINAL_H + +#include +#include +#include +#include + +char *rfcaptured = NULL; + +void rfcapture(FILE *f, char *buff, size_t size) { + rfcaptured = buff; + setvbuf(f, rfcaptured, _IOFBF, size); +} +void rfstopcapture(FILE *f) { setvbuf(f, 0, _IOFBF, 0); } + +bool _r_disable_stdout_toggle = false; + +FILE *_r_original_stdout = NULL; + +bool rr_enable_stdout() { + if (_r_disable_stdout_toggle) + return false; + if (!_r_original_stdout) { + stdout = fopen("/dev/null", "rb"); + return false; + } + if (_r_original_stdout && _r_original_stdout != stdout) { + fclose(stdout); + } + stdout = _r_original_stdout; + return true; +} +bool rr_disable_stdout() { + if (_r_disable_stdout_toggle) { + return false; + } + if (_r_original_stdout == NULL) { + _r_original_stdout = stdout; + } + if (stdout == _r_original_stdout) { + stdout = fopen("/dev/null", "rb"); + return true; + } + return false; +} +bool rr_toggle_stdout() { + if (!_r_original_stdout) { + rr_disable_stdout(); + return true; + } else if (stdout != _r_original_stdout) { + rr_enable_stdout(); + return true; + } else { + rr_disable_stdout(); + return true; + } +} + +typedef struct rprogressbar_t { + unsigned long current_value; + unsigned long min_value; + unsigned long max_value; + unsigned int length; + bool changed; + double percentage; + unsigned int width; + unsigned long draws; + FILE *fout; +} rprogressbar_t; + +rprogressbar_t *rprogressbar_new(long min_value, long max_value, + unsigned int width, FILE *fout) { + rprogressbar_t *pbar = (rprogressbar_t *)malloc(sizeof(rprogressbar_t)); + pbar->min_value = min_value; + pbar->max_value = max_value; + pbar->current_value = min_value; + pbar->width = width; + pbar->draws = 0; + pbar->length = 0; + pbar->changed = false; + pbar->fout = fout ? fout : stdout; + return pbar; +} + +void rprogressbar_free(rprogressbar_t *pbar) { free(pbar); } + +void rprogressbar_draw(rprogressbar_t *pbar) { + if (!pbar->changed) { + return; + } else { + pbar->changed = false; + } + pbar->draws++; + char draws_text[22]; + draws_text[0] = 0; + sprintf(draws_text, "%ld", pbar->draws); + char *draws_textp = draws_text; + // bool draws_text_len = strlen(draws_text); + char bar_begin_char = ' '; + char bar_progress_char = ' '; + char bar_empty_char = ' '; + char bar_end_char = ' '; + char content[4096] = {0}; + char bar_content[1024]; + char buff[2048] = {0}; + bar_content[0] = '\r'; + bar_content[1] = bar_begin_char; + unsigned int index = 2; + for (unsigned long i = 0; i < pbar->length; i++) { + if (*draws_textp) { + bar_content[index] = *draws_textp; + draws_textp++; + } else { + bar_content[index] = bar_progress_char; + } + index++; + } + char infix[] = "\033[0m"; + for (unsigned long i = 0; i < strlen(infix); i++) { + bar_content[index] = infix[i]; + index++; + } + for (unsigned long i = 0; i < pbar->width - pbar->length; i++) { + bar_content[index] = bar_empty_char; + index++; + } + bar_content[index] = bar_end_char; + bar_content[index + 1] = '\0'; + sprintf(buff, "\033[43m%s\033[0m \033[33m%.2f%%\033[0m ", bar_content, + pbar->percentage * 100); + strcat(content, buff); + if (pbar->width == pbar->length) { + strcat(content, "\r"); + for (unsigned long i = 0; i < pbar->width + 10; i++) { + strcat(content, " "); + } + strcat(content, "\r"); + } + fprintf(pbar->fout, "%s", content); + fflush(pbar->fout); +} + +bool rprogressbar_update(rprogressbar_t *pbar, unsigned long value) { + if (value == pbar->current_value) { + return false; + } + pbar->current_value = value; + pbar->percentage = (double)pbar->current_value / + (double)(pbar->max_value - pbar->min_value); + unsigned long new_length = (unsigned long)(pbar->percentage * pbar->width); + pbar->changed = new_length != pbar->length; + if (pbar->changed) { + pbar->length = new_length; + rprogressbar_draw(pbar); + return true; + } + return false; +} + +size_t rreadline(char *data, size_t len, bool strip_ln) { + __attribute__((unused)) char *unused = fgets(data, len, stdin); + size_t length = strlen(data); + if (length && strip_ln) + data[length - 1] = 0; + return length; +} + +void rlib_test_progressbar() { + rtest_banner("Progress bar"); + rprogressbar_t *pbar = rprogressbar_new(0, 1000, 10, stderr); + rprogressbar_draw(pbar); + // No draws executed, nothing to show + rassert(pbar->draws == 0); + rprogressbar_update(pbar, 500); + rassert(pbar->percentage == 0.5); + rprogressbar_update(pbar, 500); + rprogressbar_update(pbar, 501); + rprogressbar_update(pbar, 502); + // Should only have drawn one time since value did change, but percentage + // did not + rassert(pbar->draws == 1); + // Changed is false because update function calls draw + rassert(pbar->changed == false); + rprogressbar_update(pbar, 777); + rassert(pbar->percentage == 0.777); + rprogressbar_update(pbar, 1000); + rassert(pbar->percentage == 1); +} + +#endif +#ifndef RTERM_H +#define RTERM_H +#include +#include +#include +#include +#include +#include +#include +typedef struct winsize winsize_t; + +typedef struct rshell_keypress_t { + bool pressed; + bool ctrl; + bool shift; + bool escape; + char c; + int ms; + int fd; +} rshell_keypress_t; + +typedef struct rterm_t { + bool show_cursor; + bool show_footer; + int ms_tick; + rshell_keypress_t key; + void (*before_cursor_move)(struct rterm_t *); + void (*after_cursor_move)(struct rterm_t *); + void (*after_key_press)(struct rterm_t *); + void (*before_key_press)(struct rterm_t *); + void (*before_draw)(struct rterm_t *); + void (*after_draw)(struct rterm_t *); + void *session; + unsigned long iterations; + void (*tick)(struct rterm_t *); + char *status_text; + char *_status_text_previous; + winsize_t size; + struct { + int x; + int y; + int pos; + int available; + } cursor; +} rterm_t; + +typedef void (*rterm_event)(rterm_t *); + +void rterm_init(rterm_t *rterm) { + memset(rterm, 0, sizeof(rterm_t)); + rterm->show_cursor = true; + rterm->cursor.x = 0; + rterm->cursor.y = 0; + rterm->ms_tick = 100; + rterm->_status_text_previous = NULL; +} + +void rterm_getwinsize(winsize_t *w) { + // Get the terminal size + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, w) == -1) { + perror("ioctl"); + exit(EXIT_FAILURE); + } +} + +// Terminal setup functions +void enableRawMode(struct termios *orig_termios) { + struct termios raw = *orig_termios; + raw.c_lflag &= ~(ICANON | ECHO); // Disable canonical mode and echoing + raw.c_cc[VMIN] = 1; + raw.c_cc[VTIME] = 240; // Set timeout for read input + + tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw); +} + +void disableRawMode(struct termios *orig_termios) { + tcsetattr(STDIN_FILENO, TCSAFLUSH, + orig_termios); // Restore original terminal settings +} + +void rterm_clear_screen() { + printf("\x1b[2J"); // Clear the entire screen + printf("\x1b[H"); // Move cursor to the home position (0,0) +} + +void setBackgroundColor() { + printf("\x1b[34m"); // Set background color to blue +} + +void rterm_move_cursor(int x, int y) { + + printf("\x1b[%d;%dH", y + 1, x + 1); // Move cursor to (x, y) +} + +void cursor_set(rterm_t *rt, int x, int y) { + rt->cursor.x = x; + rt->cursor.y = y; + rt->cursor.pos = y * rt->size.ws_col + x; + rterm_move_cursor(rt->cursor.x, rt->cursor.y); +} +void cursor_restore(rterm_t *rt) { + rterm_move_cursor(rt->cursor.x, rt->cursor.y); +} + +void rterm_print_status_bar(rterm_t *rt, char c, unsigned long i) { + if (rt->_status_text_previous && + !strcmp(rt->_status_text_previous, rt->status_text)) { + return; + } + if (rt->_status_text_previous) { + free(rt->_status_text_previous); + } + rt->_status_text_previous = strdup(rt->status_text); + winsize_t ws = rt->size; + cursor_set(rt, rt->cursor.x, rt->cursor.y); + rterm_move_cursor(0, ws.ws_row - 1); + + char output_str[1024]; + output_str[0] = 0; + + // strcat(output_str, "\x1b[48;5;240m"); + + for (int i = 0; i < ws.ws_col; i++) { + strcat(output_str, " "); + } + char content[500]; + content[0] = 0; + if (!rt->status_text) { + sprintf(content, "\rp:%d:%d | k:%c:%d | i:%ld ", rt->cursor.x + 1, + rt->cursor.y + 1, c == 0 ? '0' : c, c, i); + } else { + sprintf(content, "\r%s", rt->status_text); + } + strcat(output_str, content); + // strcat(output_str, "\x1b[0m"); + printf("%s", output_str); + cursor_restore(rt); +} + +void rterm_show_cursor() { + printf("\x1b[?25h"); // Show the cursor +} + +void rterm_hide_cursor() { + printf("\x1b[?25l"); // Hide the cursor +} + +rshell_keypress_t rshell_getkey(rterm_t *rt) { + static rshell_keypress_t press; + press.c = 0; + press.ctrl = false; + press.shift = false; + press.escape = false; + press.pressed = rfd_wait(0, rt->ms_tick); + if (!press.pressed) { + return press; + } + press.c = getchar(); + char ch = press.c; + if (ch == '\x1b') { + // Get detail + ch = getchar(); + + if (ch == '[') { + // non char key: + press.escape = true; + + ch = getchar(); // is a number. 1 if shift + arrow + press.c = ch; + if (ch >= '0' && ch <= '9') + ch = getchar(); + press.c = ch; + if (ch == ';') { + ch = getchar(); + press.c = ch; + if (ch == '5') { + press.ctrl = true; + press.c = getchar(); // De arrow + } + } + } else if (ch == 27) { + press.escape = true; + press.c = ch; + } else { + press.c = ch; + } + } + return press; +} + +// Main function +void rterm_loop(rterm_t *rt) { + struct termios orig_termios; + tcgetattr(STDIN_FILENO, &orig_termios); // Get current terminal attributes + enableRawMode(&orig_termios); + + int x = 0, y = 0; // Initial cursor position + char ch = 0; + + ; + while (1) { + rterm_getwinsize(&rt->size); + rt->cursor.available = rt->size.ws_col * rt->size.ws_row; + if (rt->tick) { + rt->tick(rt); + } + + rterm_hide_cursor(); + setBackgroundColor(); + rterm_clear_screen(); + if (rt->before_draw) { + rt->before_draw(rt); + } + rterm_print_status_bar(rt, ch, rt->iterations); + if (rt->after_draw) { + rt->after_draw(rt); + } + if (!rt->iterations || (x != rt->cursor.x || y != rt->cursor.y)) { + if (rt->cursor.y == rt->size.ws_row) { + rt->cursor.y--; + } + if (rt->cursor.y < 0) { + rt->cursor.y = 0; + } + x = rt->cursor.x; + y = rt->cursor.y; + if (rt->before_cursor_move) + rt->before_cursor_move(rt); + cursor_set(rt, rt->cursor.x, rt->cursor.y); + if (rt->after_cursor_move) + rt->after_cursor_move(rt); + // x = rt->cursor.x; + // y = rt->cursor.y; + } + if (rt->show_cursor) + rterm_show_cursor(); + + fflush(stdout); + + rt->key = rshell_getkey(rt); + if (rt->key.pressed && rt->before_key_press) { + rt->before_key_press(rt); + } + rshell_keypress_t key = rt->key; + ch = key.c; + if (ch == 'q') + break; // Press 'q' to quit + if (key.c == -1) { + nsleep(1000 * 1000); + } + // Escape + if (key.escape) { + switch (key.c) { + case 65: // Move up + if (rt->cursor.y > -1) + rt->cursor.y--; + break; + case 66: // Move down + if (rt->cursor.y < rt->size.ws_row) + rt->cursor.y++; + break; + case 68: // Move left + if (rt->cursor.x > 0) + rt->cursor.x--; + if (key.ctrl) + rt->cursor.x -= 4; + break; + case 67: // Move right + if (rt->cursor.x < rt->size.ws_col) { + rt->cursor.x++; + } + if (key.ctrl) { + rt->cursor.x += 4; + } + break; + } + } + + if (rt->key.pressed && rt->after_key_press) { + rt->after_key_press(rt); + } + rt->iterations++; + + // usleep (1000); + } + + // Cleanup + printf("\x1b[0m"); // Reset colors + rterm_clear_screen(); + disableRawMode(&orig_termios); +} +#endif +#ifndef RTREE_H +#define RTREE_H +#include +#include +#include + +typedef struct rtree_t { + struct rtree_t *next; + struct rtree_t *children; + char c; + void *data; +} rtree_t; + +rtree_t *rtree_new() { + rtree_t *b = (rtree_t *)rmalloc(sizeof(rtree_t)); + b->next = NULL; + b->children = NULL; + b->c = 0; + b->data = NULL; + return b; +} + +rtree_t *rtree_set(rtree_t *b, char *c, void *data) { + while (b) { + if (b->c == 0) { + b->c = *c; + c++; + if (*c == 0) { + b->data = data; + // printf("SET1 %c\n", b->c); + return b; + } + } else if (b->c == *c) { + c++; + if (*c == 0) { + b->data = data; + return b; + } + if (b->children) { + b = b->children; + } else { + b->children = rtree_new(); + b = b->children; + } + } else if (b->next) { + b = b->next; + } else { + b->next = rtree_new(); + b = b->next; + b->c = *c; + c++; + if (*c == 0) { + b->data = data; + return b; + } else { + b->children = rtree_new(); + b = b->children; + } + } + } + return NULL; +} + +rtree_t *rtree_find(rtree_t *b, char *c) { + while (b) { + if (b->c == *c) { + c++; + if (*c == 0) { + return b; + } + b = b->children; + continue; + } + b = b->next; + } + return NULL; +} + +void rtree_free(rtree_t *b) { + if (!b) + return; + rtree_free(b->children); + rtree_free(b->next); + rfree(b); +} + +void *rtree_get(rtree_t *b, char *c) { + rtree_t *t = rtree_find(b, c); + if (t) { + return t->data; + } + return NULL; +} +#endif +#ifndef RLEXER_H +#define RLEXER_H +#include +#include +#include +#include +#include +#include + +#define RTOKEN_VALUE_SIZE 1024 + +typedef enum rtoken_type_t { + RT_UNKNOWN = 0, + RT_SYMBOL, + RT_NUMBER, + RT_STRING, + RT_PUNCT, + RT_OPERATOR, + RT_EOF = 10, + RT_BRACE_OPEN, + RT_CURLY_BRACE_OPEN, + RT_BRACKET_OPEN, + RT_BRACE_CLOSE, + RT_CURLY_BRACE_CLOSE, + RT_BRACKET_CLOSE +} rtoken_type_t; + +typedef struct rtoken_t { + rtoken_type_t type; + char value[RTOKEN_VALUE_SIZE]; + unsigned int line; + unsigned int col; +} rtoken_t; + +static char *_content; +static unsigned int _content_ptr; +static unsigned int _content_line; +static unsigned int _content_col; + +static int isgroupingchar(char c) { + return (c == '{' || c == '}' || c == '(' || c == ')' || c == '[' || + c == ']' || c == '"' || c == '\''); +} + +static int isoperator(char c) { + return (c == '+' || c == '-' || c == '/' || c == '*' || c == '=' || + c == '>' || c == '<' || c == '|' || c == '&'); +} + +static rtoken_t rtoken_new() { + rtoken_t token; + memset(&token, 0, sizeof(token)); + token.type = RT_UNKNOWN; + return token; +} + +rtoken_t rlex_number() { + rtoken_t token = rtoken_new(); + token.col = _content_col; + token.line = _content_line; + bool first_char = true; + int dot_count = 0; + char c; + while (isdigit(c = _content[_content_ptr]) || + (first_char && _content[_content_ptr] == '-') || + (dot_count == 0 && _content[_content_ptr] == '.')) { + if (c == '.') + dot_count++; + first_char = false; + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + } + token.type = RT_NUMBER; + return token; +} + +static rtoken_t rlex_symbol() { + rtoken_t token = rtoken_new(); + + token.col = _content_col; + token.line = _content_line; + char c; + while (isalpha(_content[_content_ptr]) || _content[_content_ptr] == '_') { + c = _content[_content_ptr]; + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + } + token.type = RT_SYMBOL; + return token; +} + +static rtoken_t rlex_operator() { + + rtoken_t token = rtoken_new(); + + token.col = _content_col; + token.line = _content_line; + char c; + bool is_first = true; + while (isoperator(_content[_content_ptr])) { + if (!is_first) { + if (_content[_content_ptr - 1] == '=' && + _content[_content_ptr] == '-') { + break; + } + } + c = _content[_content_ptr]; + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + is_first = false; + } + token.type = RT_OPERATOR; + return token; +} + +static rtoken_t rlex_punct() { + + rtoken_t token = rtoken_new(); + + token.col = _content_col; + token.line = _content_line; + char c; + bool is_first = true; + while (ispunct(_content[_content_ptr])) { + if (!is_first) { + if (_content[_content_ptr] == '"') { + break; + } + if (_content[_content_ptr] == '\'') { + break; + } + if (isgroupingchar(_content[_content_ptr])) { + break; + } + if (isoperator(_content[_content_ptr])) { + break; + } + } + c = _content[_content_ptr]; + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + is_first = false; + } + token.type = RT_PUNCT; + return token; +} + +static rtoken_t rlex_string() { + rtoken_t token = rtoken_new(); + char c; + token.col = _content_col; + token.line = _content_line; + char str_chr = _content[_content_ptr]; + _content_ptr++; + while (_content[_content_ptr] != str_chr) { + c = _content[_content_ptr]; + if (c == '\\') { + _content_ptr++; + c = _content[_content_ptr]; + if (c == 'n') { + c = '\n'; + } else if (c == 'r') { + c = '\r'; + } else if (c == 't') { + c = '\t'; + } else if (c == str_chr) { + c = str_chr; + } + + _content_col++; + } + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + } + _content_ptr++; + token.type = RT_STRING; + return token; +} + +void rlex(char *content) { + _content = content; + _content_ptr = 0; + _content_col = 1; + _content_line = 1; +} + +static void rlex_repeat_str(char *dest, char *src, unsigned int times) { + for (size_t i = 0; i < times; i++) { + strcat(dest, src); + } +} + +rtoken_t rtoken_create(rtoken_type_t type, char *value) { + rtoken_t token = rtoken_new(); + token.type = type; + token.col = _content_col; + token.line = _content_line; + strcpy(token.value, value); + return token; +} + +rtoken_t rlex_next() { + while (true) { + + _content_col++; + + if (_content[_content_ptr] == 0) { + return rtoken_create(RT_EOF, "eof"); + } else if (_content[_content_ptr] == '\n') { + _content_line++; + _content_col = 1; + _content_ptr++; + } else if (isspace(_content[_content_ptr])) { + _content_ptr++; + } else if (isdigit(_content[_content_ptr]) || + (_content[_content_ptr] == '-' && + isdigit(_content[_content_ptr + 1]))) { + return rlex_number(); + } else if (isalpha(_content[_content_ptr]) || + _content[_content_ptr] == '_') { + return rlex_symbol(); + } else if (_content[_content_ptr] == '"' || + _content[_content_ptr] == '\'') { + return rlex_string(); + } else if (isoperator(_content[_content_ptr])) { + return rlex_operator(); + } else if (ispunct(_content[_content_ptr])) { + if (_content[_content_ptr] == '{') { + + _content_ptr++; + return rtoken_create(RT_CURLY_BRACE_OPEN, "{"); + } + if (_content[_content_ptr] == '}') { + + _content_ptr++; + return rtoken_create(RT_CURLY_BRACE_CLOSE, "}"); + } + if (_content[_content_ptr] == '(') { + + _content_ptr++; + return rtoken_create(RT_BRACE_OPEN, "("); + } + if (_content[_content_ptr] == ')') { + + _content_ptr++; + return rtoken_create(RT_BRACE_CLOSE, ")"); + } + if (_content[_content_ptr] == '[') { + + _content_ptr++; + return rtoken_create(RT_BRACKET_OPEN, "["); + } + if (_content[_content_ptr] == ']') { + + _content_ptr++; + return rtoken_create(RT_BRACKET_CLOSE, "]"); + } + return rlex_punct(); + } + } +} + +char *rlex_format(char *content) { + rlex(content); + char *result = (char *)malloc(strlen(content) + 4096); + result[0] = 0; + unsigned int tab_index = 0; + char *tab_chars = " "; + unsigned int col = 0; + rtoken_t token_previous; + token_previous.value[0] = 0; + token_previous.type = RT_UNKNOWN; + while (true) { + rtoken_t token = rlex_next(); + if (token.type == RT_EOF) { + break; + } + + // col = strlen(token.value); + + if (col == 0) { + rlex_repeat_str(result, tab_chars, tab_index); + // col = strlen(token.value);// strlen(tab_chars) * tab_index; + } + + if (token.type == RT_STRING) { + strcat(result, "\""); + + char string_with_slashes[strlen(token.value) * 2 + 1]; + rstraddslashes(token.value, string_with_slashes); + strcat(result, string_with_slashes); + + strcat(result, "\""); + // col+= strlen(token.value) + 2; + // printf("\n"); + // printf("<<<%s>>>\n",token.value); + + memcpy(&token_previous, &token, sizeof(token)); + continue; + } + if (!(strcmp(token.value, "{"))) { + if (col != 0) { + strcat(result, "\n"); + rlex_repeat_str(result, " ", tab_index); + } + strcat(result, token.value); + + tab_index++; + + strcat(result, "\n"); + + col = 0; + + memcpy(&token_previous, &token, sizeof(token)); + continue; + } else if (!(strcmp(token.value, "}"))) { + unsigned int tab_indexed = 0; + if (tab_index) + tab_index--; + strcat(result, "\n"); + + rlex_repeat_str(result, tab_chars, tab_index); + tab_indexed++; + + strcat(result, token.value); + strcat(result, "\n"); + col = 0; + + memcpy(&token_previous, &token, sizeof(token)); + continue; + } + if ((token_previous.type == RT_SYMBOL && token.type == RT_NUMBER) || + (token_previous.type == RT_NUMBER && token.type == RT_SYMBOL) || + (token_previous.type == RT_PUNCT && token.type == RT_SYMBOL) || + (token_previous.type == RT_BRACE_CLOSE && + token.type == RT_SYMBOL) || + (token_previous.type == RT_SYMBOL && token.type == RT_SYMBOL)) { + if (token_previous.value[0] != ',' && + token_previous.value[0] != '.') { + if (token.type != RT_OPERATOR && token.value[0] != '.') { + strcat(result, "\n"); + rlex_repeat_str(result, tab_chars, tab_index); + } + } + } + + if (token.type == RT_OPERATOR) { + strcat(result, " "); + } + if (token.type == RT_STRING) { + strcat(result, "\""); + } + strcat(result, token.value); + if (token.type == RT_STRING) { + strcat(result, "\""); + } + + if (token.type == RT_OPERATOR) { + strcat(result, " "); + } + if (!strcmp(token.value, ",")) { + strcat(result, " "); + } + col += strlen(token.value); + memcpy(&token_previous, &token, sizeof(token)); + } + return result; +} +#endif +#ifndef RBENCH_H +#define RBENCH_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#define RBENCH(times, action) \ + { \ + unsigned long utimes = (unsigned long)times; \ + nsecs_t start = nsecs(); \ + for (unsigned long i = 0; i < utimes; i++) { \ + { \ + action; \ + } \ + } \ + nsecs_t end = nsecs(); \ + printf("%s\n", format_time(end - start)); \ + } + +#define RBENCHP(times, action) \ + { \ + printf("\n"); \ + nsecs_t start = nsecs(); \ + unsigned int prev_percentage = 0; \ + unsigned long utimes = (unsigned long)times; \ + for (unsigned long i = 0; i < utimes; i++) { \ + unsigned int percentage = \ + ((long double)i / (long double)times) * 100; \ + int percentage_changed = percentage != prev_percentage; \ + __attribute__((unused)) int first = i == 0; \ + __attribute__((unused)) int last = i == utimes - 1; \ + { action; }; \ + if (percentage_changed) { \ + printf("\r%d%%", percentage); \ + fflush(stdout); \ + \ + prev_percentage = percentage; \ + } \ + } \ + nsecs_t end = nsecs(); \ + printf("\r%s\n", format_time(end - start)); \ + } + +struct rbench_t; + +typedef struct rbench_function_t { +#ifdef __cplusplus + void (*call)(); +#else + void(*call); +#endif + char name[256]; + char group[256]; + void *arg; + void *data; + bool first; + bool last; + int argc; + unsigned long times_executed; + + nsecs_t average_execution_time; + nsecs_t total_execution_time; +} rbench_function_t; + +typedef struct rbench_t { + unsigned int function_count; + rbench_function_t functions[100]; + rbench_function_t *current; + rprogressbar_t *progress_bar; + bool show_progress; + int winner; + bool stdout; + unsigned long times; + bool silent; + nsecs_t execution_time; +#ifdef __cplusplus + void (*add_function)(struct rbench_t *r, const char *name, + const char *group, void (*)()); +#else + void (*add_function)(struct rbench_t *r, const char *name, + const char *group, void *); +#endif + void (*rbench_reset)(struct rbench_t *r); + struct rbench_t *(*execute)(struct rbench_t *r, long times); + struct rbench_t *(*execute1)(struct rbench_t *r, long times, void *arg1); + struct rbench_t *(*execute2)(struct rbench_t *r, long times, void *arg1, + void *arg2); + struct rbench_t *(*execute3)(struct rbench_t *r, long times, void *arg1, + void *arg2, void *arg3); + +} rbench_t; + +FILE *_rbench_stdout = NULL; +FILE *_rbench_stdnull = NULL; + +void rbench_toggle_stdout(rbench_t *r) { + if (!r->stdout) { + if (_rbench_stdout == NULL) { + _rbench_stdout = stdout; + } + if (_rbench_stdnull == NULL) { + _rbench_stdnull = fopen("/dev/null", "wb"); + } + if (stdout == _rbench_stdout) { + stdout = _rbench_stdnull; + } else { + stdout = _rbench_stdout; + } + } +} +void rbench_restore_stdout(rbench_t *r) { + if (r->stdout) + return; + if (_rbench_stdout) { + stdout = _rbench_stdout; + } + if (_rbench_stdnull) { + fclose(_rbench_stdnull); + _rbench_stdnull = NULL; + } +} + +rbench_t *rbench_new(); + +rbench_t *_rbench = NULL; +rbench_function_t *rbf; +rbench_t *rbench() { + if (_rbench == NULL) { + _rbench = rbench_new(); + } + return _rbench; +} + +typedef void *(*rbench_call)(); +typedef void *(*rbench_call1)(void *); +typedef void *(*rbench_call2)(void *, void *); +typedef void *(*rbench_call3)(void *, void *, void *); + +#ifdef __cplusplus +void rbench_add_function(rbench_t *rp, const char *name, const char *group, + void (*call)()) { +#else +void rbench_add_function(rbench_t *rp, const char *name, const char *group, + void *call) { +#endif + rbench_function_t *f = &rp->functions[rp->function_count]; + rp->function_count++; + f->average_execution_time = 0; + f->total_execution_time = 0; + f->times_executed = 0; + f->call = call; + strcpy(f->name, name); + strcpy(f->group, group); +} + +void rbench_reset_function(rbench_function_t *f) { + f->average_execution_time = 0; + f->times_executed = 0; + f->total_execution_time = 0; +} + +void rbench_reset(rbench_t *rp) { + for (unsigned int i = 0; i < rp->function_count; i++) { + rbench_reset_function(&rp->functions[i]); + } +} +int rbench_get_winner_index(rbench_t *r) { + int winner = 0; + nsecs_t time = 0; + for (unsigned int i = 0; i < r->function_count; i++) { + if (time == 0 || r->functions[i].total_execution_time < time) { + winner = i; + time = r->functions[i].total_execution_time; + } + } + return winner; +} +bool rbench_was_last_function(rbench_t *r) { + for (unsigned int i = 0; i < r->function_count; i++) { + if (i == r->function_count - 1 && r->current == &r->functions[i]) + return true; + } + return false; +} + +rbench_function_t *rbench_execute_prepare(rbench_t *r, int findex, long times, + int argc) { + rbench_toggle_stdout(r); + if (findex == 0) { + r->execution_time = 0; + } + rbench_function_t *rf = &r->functions[findex]; + rf->argc = argc; + rbf = rf; + r->current = rf; + if (r->show_progress) + r->progress_bar = rprogressbar_new(0, times, 20, stderr); + r->times = times; + // printf(" %s:%s gets executed for %ld times with %d + // arguments.\n",rf->group, rf->name, times,argc); + rbench_reset_function(rf); + + return rf; +} +void rbench_execute_finish(rbench_t *r) { + rbench_toggle_stdout(r); + if (r->progress_bar) { + free(r->progress_bar); + r->progress_bar = NULL; + } + r->current->average_execution_time = + r->current->total_execution_time / r->current->times_executed; + ; + // printf(" %s:%s finished executing in + // %s\n",r->current->group,r->current->name, + // format_time(r->current->total_execution_time)); + // rbench_show_results_function(r->current); + if (rbench_was_last_function(r)) { + rbench_restore_stdout(r); + unsigned int winner_index = rbench_get_winner_index(r); + r->winner = winner_index + 1; + if (!r->silent) + rprintgf(stderr, "Benchmark results:\n"); + nsecs_t total_time = 0; + + for (unsigned int i = 0; i < r->function_count; i++) { + rbf = &r->functions[i]; + total_time += rbf->total_execution_time; + bool is_winner = winner_index == i; + if (is_winner) { + if (!r->silent) + rprintyf(stderr, " > %s:%s:%s\n", + format_time(rbf->total_execution_time), rbf->group, + rbf->name); + } else { + if (!r->silent) + rprintbf(stderr, " %s:%s:%s\n", + format_time(rbf->total_execution_time), rbf->group, + rbf->name); + } + } + if (!r->silent) + rprintgf(stderr, "Total execution time: %s\n", + format_time(total_time)); + } + rbench_restore_stdout(r); + rbf = NULL; + r->current = NULL; +} +struct rbench_t *rbench_execute(rbench_t *r, long times) { + + for (unsigned int i = 0; i < r->function_count; i++) { + + rbench_function_t *f = rbench_execute_prepare(r, i, times, 0); + rbench_call c = (rbench_call)f->call; + nsecs_t start = nsecs(); + f->first = true; + c(); + f->first = false; + f->last = false; + f->times_executed++; + for (int j = 1; j < times; j++) { + c(); + f->times_executed++; + f->last = f->times_executed == r->times - 1; + if (r->progress_bar) { + rprogressbar_update(r->progress_bar, f->times_executed); + } + } + f->total_execution_time = nsecs() - start; + r->execution_time += f->total_execution_time; + rbench_execute_finish(r); + } + return r; +} + +struct rbench_t *rbench_execute1(rbench_t *r, long times, void *arg1) { + + for (unsigned int i = 0; i < r->function_count; i++) { + rbench_function_t *f = rbench_execute_prepare(r, i, times, 1); + rbench_call1 c = (rbench_call1)f->call; + nsecs_t start = nsecs(); + f->first = true; + c(arg1); + f->first = false; + f->last = false; + f->times_executed++; + for (int j = 1; j < times; j++) { + c(arg1); + f->times_executed++; + f->last = f->times_executed == r->times - 1; + if (r->progress_bar) { + rprogressbar_update(r->progress_bar, f->times_executed); + } + } + f->total_execution_time = nsecs() - start; + r->execution_time += f->total_execution_time; + rbench_execute_finish(r); + } + return r; +} + +struct rbench_t *rbench_execute2(rbench_t *r, long times, void *arg1, + void *arg2) { + + for (unsigned int i = 0; i < r->function_count; i++) { + rbench_function_t *f = rbench_execute_prepare(r, i, times, 2); + rbench_call2 c = (rbench_call2)f->call; + nsecs_t start = nsecs(); + f->first = true; + c(arg1, arg2); + f->first = false; + f->last = false; + f->times_executed++; + for (int j = 1; j < times; j++) { + c(arg1, arg2); + f->times_executed++; + f->last = f->times_executed == r->times - 1; + if (r->progress_bar) { + rprogressbar_update(r->progress_bar, f->times_executed); + } + } + f->total_execution_time = nsecs() - start; + r->execution_time += f->total_execution_time; + rbench_execute_finish(r); + } + return r; +} + +struct rbench_t *rbench_execute3(rbench_t *r, long times, void *arg1, + void *arg2, void *arg3) { + + for (unsigned int i = 0; i < r->function_count; i++) { + rbench_function_t *f = rbench_execute_prepare(r, i, times, 3); + + rbench_call3 c = (rbench_call3)f->call; + nsecs_t start = nsecs(); + f->first = true; + c(arg1, arg2, arg3); + f->first = false; + f->last = false; + f->times_executed++; + for (int j = 1; j < times; j++) { + c(arg1, arg2, arg3); + f->times_executed++; + f->last = f->times_executed == r->times - 1; + if (r->progress_bar) { + rprogressbar_update(r->progress_bar, f->times_executed); + } + } + f->total_execution_time = nsecs() - start; + rbench_execute_finish(r); + } + return r; +} + +rbench_t *rbench_new() { + + rbench_t *r = (rbench_t *)malloc(sizeof(rbench_t)); + memset(r, 0, sizeof(rbench_t)); + r->add_function = rbench_add_function; + r->rbench_reset = rbench_reset; + r->execute1 = rbench_execute1; + r->execute2 = rbench_execute2; + r->execute3 = rbench_execute3; + r->execute = rbench_execute; + r->stdout = true; + r->silent = false; + r->winner = 0; + r->show_progress = true; + return r; +} +void rbench_free(rbench_t *r) { free(r); } + +#endif +// END OF RLIB +#endif diff --git a/rrex.h b/rrex.h new file mode 100644 index 0000000..4bcfbd3 --- /dev/null +++ b/rrex.h @@ -0,0 +1,339 @@ +#include "rlib.h" +#include +#include +#include +#include +#include + +bool latleast(char *s, unsigned int l) { + if (!l) + return true; + unsigned int i = 0; + while (s[i] != 0) { + i++; + if (i == l) + return true; + } + return false; +} + +bool long_enough(char *s, char *n) { + while (++(*n)) { + if (!(++(*s))) + return true; + } + return false; +} + +int swith(char *s1, char *s2) { + + while (*s1 == *s2) { + if (!*s2) + return true; + s1++; + s2++; + } + return (*s1 && !*s2); +} + +int substr(char *s, int start, int len, char *rdata) { + + for (int i = 0; i < len; i++) { + if (s[i + start] == 0) + return false; + rdata[i] = s[i + start]; + } + rdata[len] = '\0'; + return strlen(rdata); +} + +char groupcreverse(char c) { + if (c == '{') + return '}'; + if (c == '}') + return '{'; + if (c == '(') + return ')'; + if (c == ')') + return '('; + if (c == '[') + return ']'; + if (c == ']') + return '['; + return 0; +} + +char isgroupingc(char c) { return groupcreverse(c) != 0; } + +bool isgrouping(char *s) { return isgroupingc(s[0]) > 0; } + +void test_isgrouping() { + rassert(isgrouping("{")); + rassert(isgrouping("{test")); + rassert(isgrouping("}")); + rassert(isgrouping("(")); + rassert(isgrouping(")")); + rassert(isgrouping("[")); + rassert(isgrouping("]")); + rassert(!isgrouping("!")); +} + +int sextract(char *s, char *s_open, char *s_close, char *rdata) { + unsigned int indent = 0; + char *s_original = s; + char *sptr = s; + int start = -1; + int pos = 0; + unsigned int s_open_len = strlen(s_open); + while (*sptr) { + pos = sptr - s_original; + if (start == -1 && !swith(sptr, s_open)) { + break; + } else if (start == -1) { + start = s_open_len; + indent++; + } else if (swith(sptr, s_open)) { + indent++; + } else if (swith(sptr, s_close)) { + indent--; + if (indent == 0) { + if (substr(s_original, start, pos - start, rdata)) + return pos; + else + return false; + } + } + sptr++; + } + rdata[0] = 0; + return -1; +} + +// expr rex +int exprtok(char *expr, char *ex) { + if (*expr == '\\' && *(expr + 1) != 0) { + *ex = *expr; + *(ex++) = *(expr + 1); + *(ex + 2) = 0; + return 2; + } + char close_chr = groupcreverse(*expr); + if (close_chr) { + + int length = 0; + char open_chr = *expr; + + int indent = 0; + while (*expr) { + length++; + char c = *expr; + if (c == open_chr) { + indent++; + } else if (c == close_chr) { + indent--; + } + *ex = c; + + if (indent == 0) { + break; + } + + ex++; + expr++; + } + (*ex++) = 0; + return indent == 0 ? length : 0; + } else if (isalpharange(expr) || isdigitrange(expr)) { + for (int i = 0; i < 3; i++) { + ex[i] = expr[i]; + } + ex[3] = 0; + return 3; + } + // printf("%s\n",expr); + if (*expr) { + *ex = *expr; + *(ex++) = 0; + return 1; + } + return 0; +} + +void _test_exprtok(char *expr, char *texpected, int len) { + char tok[4096]; + int toklen = exprtok(expr, tok); + if (toklen != len) { + printf("%d:%d\n", toklen, len); + printf("Assert error length of expected token %s\n", texpected); + rassert(toklen == len); + } + if (strncmp(expr, texpected, len)) { + printf( + "Compare error of exprtok with expected token %s does not starts " + "with %s\n", + expr, texpected); + rassert(false); + } +} +typedef struct rreg_token_t { + char content[4096]; + int len; +} rreg_token_t; + +void test_exprtok() { + _test_exprtok("[abc]def", "[abc]", 5); + _test_exprtok("0-9", "0-9", 3); + _test_exprtok("a-z", "a-z", 3); + _test_exprtok("A-Z", "A-Z", 3); + _test_exprtok("\\w", "\\w", 2); + _test_exprtok("\\", "\\", 1); + _test_exprtok("a", "a", 1); +} + +int sexpand(char *s, char *rdata) { + int times = 0; + while (isgrouping(s)) { + char c_open[2] = {s[0], '\0'}; + char c_close[2] = {groupcreverse(c_open[0]), '\0'}; + if (sextract(s, c_open, c_close, rdata) > 0) + times++; + s = rdata; + } + return times; +} + +void test_sexpand() { + + char rdata[1024]; + rassert(sexpand("[a]", rdata) == 1); + rassert(!strcmp(rdata, "a")); + rassert(sexpand("(a)", rdata) == 1); + rassert(!strcmp(rdata, "a")); + rassert(sexpand("[a)", rdata) == 0); + rassert(!strcmp(rdata, "")); + rassert(sexpand("(a]", rdata) == 0); + rassert(!strcmp(rdata, "")); + rassert(sexpand("[{(a)}]", rdata) == 3); + rassert(!strcmp(rdata, "a")); +} + +void test_isalpharange() { + rassert(isalpharange("a-z")); + rassert(isalpharange("a-a")); + rassert(isalpharange("z-z")); + rassert(isalpharange("a-Z")); + rassert(isalpharange("Z-a")); + rassert(isalpharange("Z-Z")); + rassert(!isalpharange("-a")); + rassert(!isalpharange("a-")); + rassert(!isalpharange("a")); + rassert(!isalpharange("-")); + rassert(!isalpharange("z")); + rassert(!isalpharange("-A")); + rassert(!isalpharange("A-")); + rassert(!isalpharange("A")); + rassert(!isalpharange("-")); + rassert(!isalpharange("Z")); + rassert(!isalpharange("0-9")); +} + +void test_isdigitrange() { + rassert(isdigitrange("0-9")); + rassert(isdigitrange("0-0")); + rassert(isdigitrange("9-9")); + rassert(!isdigitrange("-0")); + rassert(!isdigitrange("0-")); + rassert(!isdigitrange("0")); + rassert(!isdigitrange("-")); + rassert(!isdigitrange("9")); + rassert(!isdigitrange("a-a")); +} + +void test_swith() { + rassert(swith("r", "r")); + rassert(!swith("r", "re")); + rassert(swith("retoor", "r")); + rassert(swith("retoor", "re")); + rassert(swith("retoor", "retoor")); + rassert(!swith("retoor", "retoori")); + rassert(!swith("retoor", "retoorii")); + rassert(!swith("", "")); + rassert(!swith("", "")); +} + +void test_substr() { + int r; + char str[1024]; + r = substr("[-]", 1, 1, str); + rassert(r == 1); + rassert(!strcmp(str, "-")); + + r = substr("[-]", 0, 1, str); + rassert(r == 1); + rassert(!strcmp(str, "[")); + + r = substr("[-]", 2, 1, str); + rassert(r == 1); + rassert(!strcmp(str, "]")); + + r = substr("[-]", 0, 3, str); + rassert(r == 3); + rassert(!strcmp(str, "[-]")); + + r = substr("[-]", 0, 2, str); + rassert(r == 2); + rassert(!strcmp(str, "[-")); +} + +void test_sextract() { + char rdata[1024]; + int pos = 0; + rassert((pos = sextract("(valid)", "(", ")", rdata)) == 6); + rassert(!strcmp("valid", rdata)); + rassert((pos = sextract("{valid}", "{", "}", rdata)) == 6); + rassert(!strcmp("valid", rdata)); + rassert((pos = sextract("{{valid}", "{", "}", rdata)) == -1); + rassert(!strcmp("", rdata)); + rassert((pos = sextract("{valid}}", "{", "}", rdata)) == 6); + rassert(!strcmp("valid", rdata)); + rassert((pos = sextract("{{valid}}", "{", "}", rdata)) == 8); + rassert(!strcmp("{valid}", rdata)); + rassert((pos = sextract("{[({valid}}", "{", "}", rdata)) == 10); + rassert(!strcmp("[({valid}", rdata)); + rassert((pos = sextract("/*valid*/", "/*", "*/", rdata)) > 0); + rassert(!strcmp("valid", rdata)); + rassert((pos = sextract("/**valid*/", "/**", "*/", rdata)) > 0); + rassert(!strcmp("valid", rdata)); + rassert((pos = sextract("/*valid**/", "/*", "**/", rdata)) > 0); + rassert(!strcmp("valid", rdata)); + rassert((pos = sextract("valid", "", "", rdata)) > + 0); + rassert(!strcmp("valid", rdata)); + rassert((pos = sextract("valid", "", "", rdata)) == -1); + rassert(!strcmp("", rdata)); +} + +void test_latleast() { + + rassert(latleast("", 0)); + rassert(latleast("a", 1)); + rassert(latleast("aa", 1)); + rassert(latleast("aaa", 1)); + rassert(latleast("aa", 2)); + rassert(latleast("aaa", 2)); + rassert(!latleast("a", 2)); +} +bool iswhitespace(char c) { + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} +void rrex_functions_test() { + rtest_banner("rrex functions") test_isalpharange(); + test_isdigitrange(); + test_swith(); + test_substr(); + test_sextract(); + test_isgrouping(); + test_sexpand(); + test_latleast(); + test_exprtok(); +} diff --git a/rrex2.c b/rrex2.c new file mode 100644 index 0000000..678264f --- /dev/null +++ b/rrex2.c @@ -0,0 +1,242 @@ +#include "rrex2.h" +#include +#include + +void cregex_repeat(char *s, char *r) { + // Get object from shared data object. rbf is session variable of current + // bench function. + regex_t *regex = (regex_t *)rbf->data; + // Only get's executed at beginning of the benchmark. Executed once. + if (rbf->first) { + // Set session data + regex = (regex_t *)malloc(sizeof(regex_t)); + rbf->data = regex; + regcomp(regex, r, REG_EXTENDED); + } + // The code to benchmark + rassert(!regexec(regex, s, 0, NULL, 0)) + // Is executed only once at end of benchmark + if (rbf->last) regfree(regex); +} +void rrex_repeat(char *s, char *r) { + char *bdata = (char *)rbf->data; + + if (rbf->first) { + bdata = (char *)malloc(4096); + rrex_compile(r, bdata); + rbf->data = bdata; + } + + rassert(rrex_match(s, bdata)); + + if (rbf->last) { + free(rbf->data); + } +} + +int wins_rrex = 0; +int loss_rrex = 0; +nsecs_t total_execution_time = 0; +long total_times = 0; + +bool validate_dutch_zipcode_c(char *code) { + if (strlen(code) != 7) + return false; + for (int i = 0; i < 4; i++) { + if (!isdigit(code[i])) + return false; + } + if (!iswhitespace(code[4])) { + return false; + } + for (int i = 6; i < 7; i++) { + if (!isalpha(code[i])) + return false; + } + return true; +} +bool validate_dutch_zipcode_c_literal(char *code) { + if (strlen(code) != 7) + return false; + return isdigit(code[0]) && isdigit(code[1]) && isdigit(code[2]) && + isdigit(code[3]) && iswhitespace(code[4]) && isalpha(code[5]) && + isalpha(code[6]); +} + +void validate_dutch_zipcode_creg(char *s) { + regex_t regex; + char *pattern = "\\d{4} [a-zA-Z]{2}"; + int ret = regcomp(®ex, pattern, REG_EXTENDED); + if (ret) { + printf("cregex comp error\b"); + exit(0); + } + ret = regexec(®ex, s, 0, NULL, 0); + if (!ret) { + printf("cregex exec error\b"); + exit(0); + } + regfree(®ex); +} + +void validate_dutch_zipcode_creg_precompiled(char *s) { + regex_t *regex = (regex_t *)rbf->data; + // Only get's executed at beginning of the benchmark. Executed once. + if (rbf->first) { + // Set session data + regex = (regex_t *)malloc(sizeof(regex_t)); + rbf->data = regex; + char *pattern = "\\d{4} [a-zA-Z]{2}"; + regcomp(regex, pattern, REG_EXTENDED); + } + // The code to benchmark + int ret = regexec(regex, s, 0, NULL, 0); + if (!ret) { + printf("cregex exec error\b"); + exit(0); + } + // Is executed only once at end of benchmark + if (rbf->last) + regfree(regex); +} +void validate_dutch_zipcode_rrex_precompiled(char *s) { + char *bcode = (char *)rbf->data; + if (rbf->first) { + bcode = (char *)malloc(20); + rrex_compile("\\d{4} [a-zA-Z]{2}", bcode); + rbf->data = bcode; + } + rrex_match(s, bcode); + if (rbf->last) { + free(bcode); + } +} + +bool validate_dutch_zipcode_rrex(char *s) { + return rrex(s, "\\d{4} [a-zA-Z]{2}"); +} + +void benchmark_dutch_zipcode(long times, char *s) { + rbench_t *r = rbench_new(); + r->show_progress = false; + r->stdout = false; + r->add_function(r, "rrex", "zipcode", (void *)validate_dutch_zipcode_rrex); + r->add_function(r, "rrex compiled", "zipcode", + (void *)validate_dutch_zipcode_rrex_precompiled); + r->add_function(r, "creg", "zipcode", (void *)validate_dutch_zipcode_creg); + r->add_function(r, "creg compiled", "zipcode", + (void *)validate_dutch_zipcode_creg_precompiled); + r->add_function(r, "native c", "zipcode c", + (void *)validate_dutch_zipcode_c); + r->add_function(r, "native c literal", "zipcode c", + (void *)validate_dutch_zipcode_c_literal); + printf("Benchmarking validation of %s with rrex and native c code.\n", s); + r->execute1(r, times, s); + rbench_free(r); +} + +void benchmark(long times, char *s, char *e) { + rprint("Benchmark \\l string:<%s> expr:<%s>\t\n", s, e); + rbench_t *r; + r = rbench_new(); + r->show_progress = false; + r->stdout = false; + r->add_function(r, "executor", "rrex", (void *)rrex_repeat); + r->add_function(r, "executor", "clib", (void *)cregex_repeat); + if (r->execute2(r, times, s, e)->winner == 1) { + wins_rrex++; + } else { + loss_rrex++; + } + total_execution_time += r->execution_time; + total_times += times * 2; + rbench_free(r); +} + +void rrex_benchmark_tests(long times) { + benchmark_dutch_zipcode(times / 10, "7245 SR"); + benchmark_dutch_zipcode(times / 10, "A245 SR"); + benchmark_dutch_zipcode(times / 10, "7245 S3"); + benchmark(times, "abababc", "^(ab)+c$"); + // c regex does not support: + // benchmark(times,"123a33","\\d+a\\d+$"); + benchmark(times, "9-3", "([3-9]-[3-9])"); + benchmark(times, "1234A", "[1-4]{4}A"); + benchmark(times, "abcdef", "abcd?ef"); + benchmark(times, "ce", "(a|b|c|d)e"); + benchmark(times, "a", "(a)"); + benchmark(times, "aa", "(a){2}"); + // benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaq", "[^xyzv]+q$"); + benchmark(times, "abcabcabcabcabcabc", "[acb][acb]{4}"); + benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "[1A-Z0-9a12345]{33}"); + benchmark(times, "abcd", "abcd"); + benchmark(times, "aaaaaaaaa", "a{9}"); + benchmark(times, "a", "[abc]"); + benchmark(times, "aa", "[abc]{2}"); + benchmark(times, "ab", "[abc]{2}"); + benchmark(times, "ac", "[abc]{2}"); + benchmark(times, "c", "[abc]"); + benchmark(times, "123", "[0-9][0-9][0-9]"); + benchmark(times, "ab*", "[a-z]b."); + benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ".{33}"); + // benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaq", "[dbac]+q$"); + benchmark(times, "#include \"test.h\"", "^#include *\"[a-z\\.]+\"$"); + benchmark(times, "abcdefgh", "^.*gh$"); + benchmark(times, "randomtextbeforeabcdefgh", "^random.*gh$"); + benchmark(times, "abcdefg", "a?bcdf?ef?g"); + printf("Times won: %d / %d\n", wins_rrex, wins_rrex + loss_rrex); + printf("Total execution time: %s\n", format_time(total_execution_time)); + printf("Total times: %s\n", rformat_number(total_times)); +} + +void repl() { + + while (true) { + char s[4096]; + char e[4096]; + rprint("%s", "Write a string to parse:\n"); + rreadline(s, 1024, true); + rprint("Write a reqular expression:\n"); + rreadline(e, 1024, true); + rprint("\\t"); + bool valid = rrex(s, e); + if (valid) { + rprintgf(stdout, "\\T %s", valid ? "valid\n" : "invalid\n"); + } else { + rprintrf(stdout, "\\T %s", valid ? "valid\n" : "invalid\n"); + } + } +} + +void rrex_tests() { + rrex_functions_test(); + rrex_compiler_tests(); + rrex_executor_tests(); + __attribute__((unused)) int res = rtest_end(""); + rprintg("Tests passed.\n\n"); + sleep(1); +} + +int main(int argc, char *argv[]) { + /* + 20000000 for 140s (1 billion times) + 16000000 for 100s + 32000000 for 200s + 4000000 for 30s + 2000000 for 15s -> this is minimum to get consistent result + 1000000 for 7.5s + */ + long times = 2000000; + if (argc > 1) { + if (!strcmp(argv[1], "cli")) { + repl(); + return 0; + } else if (!strcmp(argv[1], "test")) { + times = 20; + } + } + rrex_tests(); + rrex_benchmark_tests(times); + return 0; +} \ No newline at end of file diff --git a/rrex2.h b/rrex2.h new file mode 100644 index 0000000..ab08f19 --- /dev/null +++ b/rrex2.h @@ -0,0 +1,447 @@ +#include "compiler.h" +#include "rlib.h" +#include + +#define ifwhile(cond, action) \ + bool _did_doit; \ + _did_doit = false; \ + while (cond) { \ + action \ + }; \ + if (_did_doit) + +// bool is_valid = expr ? 1 : 0; +// repeat: + +// bool valid = expr != NULL *expr > 0; + +// bool _expr_true = false; +// if(res){ +// _expr_true = true; +//} +// bool ifwhile(bool res){ +// +//} +struct rrex_executor_t; + +typedef bool (*rrex_function)(struct rrex_executor_t *); + +typedef struct rrex_executor_t { + char *previous_position; + char previous; + char *bdata; + char *_bdata; + char *sdata; + char *_sdata; + + long current; + bool valid; + rrex_function functions[30]; + +} rrex_executor_t; + +bool rrex_match(char *sdata, char *bdata); +bool rrex_execute_one(rrex_executor_t *t); +bool rrex(char *s, char *r); + +bool rrex(char *s, char *r) { + char b[4096]; + rrex_compile(r, b); + return rrex_match(s, b); +} + +bool rrex_match_sol(rrex_executor_t *executor) { + executor->previous = RN_ROOF; + executor->previous_position = executor->bdata; + bool valid = executor->sdata == executor->_sdata; + if (valid) { + executor->bdata++; + } + return valid; +} +bool rrex_match_dot(rrex_executor_t *executor) { + executor->previous = RN_DOT; + executor->previous_position = executor->bdata; + if ((executor->sdata)[0] != '\n') { + executor->sdata++; + executor->bdata++; + return true; + } + return false; +} + +bool rrex_match_digit(rrex_executor_t *executor) { + if (isdigit(*executor->sdata)) { + executor->sdata++; + executor->bdata++; + return true; + } + return false; +} + +bool rrex_match_whitespace(rrex_executor_t *executor) { + if (*executor->sdata == ' ' || *executor->sdata == '\t' || + *executor->sdata == '\n' || *executor->sdata == '\r') { + executor->sdata++; + executor->bdata++; + return true; + } + return false; +} + +bool rrex_match_word(rrex_executor_t *executor) { + if (isalpha((executor->sdata)[0]) || (executor->sdata)[0] == '_') { + executor->sdata++; + executor->bdata++; + return true; + } + return false; +} + +bool rrex_match_not_word(rrex_executor_t *executor) { + if (!(isalpha(*executor->sdata) || *executor->sdata == '_')) { + executor->sdata++; + executor->bdata++; + return true; + } + return false; +} + +bool rrex_match_not_digit(rrex_executor_t *executor) { + if (!(isdigit(*executor->sdata))) { + executor->sdata++; + executor->bdata++; + return true; + } + return false; +} +bool rrex_match_dollar(rrex_executor_t *executor) { + if (*executor->sdata == '\0') { + executor->bdata++; + return true; + } + return false; +} + +bool rrex_match_literal(rrex_executor_t *executor) { + if (*executor->bdata == *executor->sdata) { + executor->bdata++; + executor->sdata++; + return true; + } + return false; +} + +bool rrex_match_group(rrex_executor_t *executor) { + bool v = true; + executor->bdata++; + char *sdata_before_fail = executor->sdata; + while (v && *executor->bdata != RN_GROUP_END) { + v = rrex_execute_one(executor); + if (!v) { + while (*executor->bdata != RN_GROUP_END) { + if (*executor->bdata == RN_PIPE) { + v = true; + executor->bdata++; + break; + } + executor->bdata++; + } + } else if (*executor->bdata == RN_PIPE) { + break; + } + } + while (*executor->bdata != RN_GROUP_END) { + executor->bdata++; + } + executor->bdata++; + if (!v) { + executor->sdata = sdata_before_fail; + } + return v; +} + +bool rrex_match_choice(rrex_executor_t *executor) { + bool v; + + executor->bdata++; + bool reverse = *executor->bdata == RN_ROOF; + if (reverse) + executor->bdata++; + + while (*executor->bdata != RN_CHOICE_END) { + v = rrex_execute_one(executor); + if (reverse) { + + v = !v; + if (v) + executor->sdata++; + } + if (v) { + break; + } else { + + if (!reverse) + executor->bdata++; + } + } + while (*executor->bdata != RN_CHOICE_END) { + executor->bdata++; + } + + executor->bdata++; + + return v; +} + +bool rrex_match_optional(rrex_executor_t *executor) { + executor->bdata++; + char *optional_start = executor->bdata; + bool v = rrex_execute_one(executor); + if (!v) { + executor->bdata = optional_start; + char closer = 0; + if (*executor->bdata == RN_CHOICE_START) { + closer = RN_CHOICE_END; + } + if (*executor->bdata == RN_GROUP_START) { + closer = RN_GROUP_END; + } + if (closer) { + while (*executor->bdata != closer) { + executor->bdata++; + } + } + executor->bdata++; + } + return true; +} + +bool rrex_match_at_least_one(rrex_executor_t *executor) { + bool v = true; + bool once_valid; + executor->bdata++; + char *method_position = executor->previous_position; + char *next = executor->bdata; + while (v) { + executor->bdata = method_position; + v = rrex_execute_one(executor); + + if (v) + once_valid = true; + + executor->bdata = next; + bool v_right = rrex_execute_one(executor); + if (v_right) { + once_valid = true; + break; + } + } + return once_valid; +} + +bool rrex_match_range(rrex_executor_t *executor) { + // Go to first parameter and remember + executor->bdata++; + char char_start = *executor->bdata; + // Go to second parameter and remember + executor->bdata++; + char char_end = *executor->bdata; + // Swap parameters if first one is higher than second one + if (char_start > char_end) { + char temp = char_end; + char_end = char_start; + char_start = temp; + } + // Compare if current char in sdata is between parameters + if (*executor->sdata >= char_start && *executor->sdata <= char_end) { + executor->bdata++; + executor->sdata++; + return true; + } + // Set pointer before parameters. Back to R. + executor->bdata--; + executor->bdata--; + return false; +} + +bool rrex_match_plus(rrex_executor_t *executor) { + char *plus_position = executor->bdata; + char *next = plus_position + 1; + char *to_repeat = executor->previous_position; + // Return value + bool valid = true; + bool matched_once = false; + char *sdata_before_fail; + while (valid) { + // Check if EOF is reached + if (!*executor->sdata) { + break; + } + executor->bdata = to_repeat; + sdata_before_fail = executor->sdata; + valid = rrex_execute_one(executor); + if (valid) { + matched_once = true; + } else { + // should other function do + executor->sdata = sdata_before_fail; + } + if (!valid && *(executor->bdata = next) && rrex_execute_one(executor)) { + // if(!valid) + break; + } + } + if (matched_once && executor->bdata == plus_position) { + // Move pointer to after RN_PLUS sign. + executor->bdata++; + } + + return matched_once; +} + +bool rrex_execute_one(rrex_executor_t *executor) { + bool valid; + executor->current = *executor->bdata; + int previous = executor->current; + + char *previous_position = executor->bdata; + if (executor->current > 31) + executor->current = RN_LITERAL; + valid = executor->functions[executor->current](executor); + // executor->current = *executor->bdata; + executor->previous = previous; + executor->previous_position = previous_position; + return valid; +} + +bool rrex_match(char *sdata, char *bdata) { + rrex_executor_t executor; + executor.bdata = bdata; + executor._bdata = bdata; + executor.sdata = sdata; + executor._sdata = sdata; + executor.previous_position = executor.bdata; + executor.functions[RN_ARANGE] = rrex_match_range; + executor.functions[RN_CHOICE_START] = rrex_match_choice; + executor.functions[RN_DOLLAR] = rrex_match_dollar; + executor.functions[RN_DOT] = rrex_match_dot; + executor.functions[RN_DRANGE] = rrex_match_range; + executor.functions[RN_LITERAL] = rrex_match_literal; + executor.functions[RN_SLASH_CD] = rrex_match_not_digit; + executor.functions[RN_SLASH_CW] = rrex_match_not_word; + executor.functions[RN_PLUS] = rrex_match_plus; + executor.functions[RN_ASTERISK] = rrex_match_at_least_one; + executor.functions[RN_WHITESPACE] = rrex_match_whitespace; + executor.functions[RN_GROUP_START] = rrex_match_group; + executor.functions[RN_QUESTION] = rrex_match_optional; + executor.functions[RN_ROOF] = rrex_match_sol; + executor.functions[RN_DIGIT] = rrex_match_digit; + executor.functions[RN_ALPHA] = rrex_match_word; + rrex_executor_t *ex = &executor; + char *s_padding = ex->sdata; + bool valid = true; + while (valid && *ex->bdata) { + valid = rrex_execute_one(&executor); + if (!valid && *ex->sdata) { + if (*ex->_bdata == RN_ROOF) { + break; + } + s_padding++; + ex->sdata = s_padding; + ex->bdata = ex->_bdata; + if (*ex->bdata && *ex->sdata) + valid = true; + } + } + return valid; +} + +void rrex_executor_tests() { + rtest_banner("rrex regular expressions"); + + // rassert(rrex("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaq", "[^qxyzv]+q$")); + + rassert(rrex("abababa", "^(ab)+a$")); + + rassert(rrex(" a ", "\\sa\\s")); + rassert(!rrex("a", "\\s")); + rassert(rrex("abc", "ab[def]?c")); + rassert(rrex("abc", "ab(d|e|f)?c")); + rassert(rrex("1990-01-13", + "^(19|20)\\d\\d-(0[1-9]|1[0-2])-(0[1-9]|[12]\\d|3[01])$")); + rassert(rrex("1990-01-13", "(19|20)\\d\\d-[0?1]\\d-[0123]\\d")); + // rassert(rrex("1990-1-3", "(19|20)\\d\\d-[0?1]\\d-[0123]\\d")); + // rassert(rrex("1990-1-3", "(19|20)\\d\\d-[01]?\\d-[0123]\\d")); + rassert( + rrex("1990-13-25", "(19|20)\\d\\d-([01]\\d?||\\d)-([0123]\\d|\\d)$")); + rassert( + !rrex("1990-13-45", "(19|20)\\d\\d-([01]\\d?||\\d)-([0123]\\d|\\d)$")) + //(19|20)\d\d-[01]?\d-[0123]\d + rassert(rrex("a", "[zsa]")); + rassert(rrex("abcdefg", "abcd?efg")); + rassert(rrex("abcefg", "abcd?efg")); + rassert(rrex("ce", "(a|b|c|d)e")); + rassert(rrex("A", "A-Z")); + rassert(rrex("a", "a-Z")); + rassert(rrex("abcab", "[abc][acb]{4}$")); + rassert(rrex("aa", "\\w{2}$")); + + rassert(rrex("a", "[ca]")); + rassert(rrex("1-4", "1\\-4")); + + rassert(rrex("a", "[ba]")); + rassert(rrex("5", "4-9")); + rassert(rrex("4", "4-9")); + rassert(rrex("9", "4-9")); + rassert(rrex("123A", "1-41-41-4A")); + rassert(!rrex("123B", "1-41-41-4A")); + rassert(!rrex("1", "4-9")); + rassert(rrex("abca", "[abc][abc][abc]a$")); + + rassert(rrex("abca", "[a-z][abc][abc]a")); + rassert(rrex("abca", "[\\w][abc][abc]a")); + rassert(rrex("a5a5g!a", "a0-9a-z\\d\\D\\Wa")); + rassert(!rrex("1", "\\D")); + rassert(!rrex("a", "\\W")); + rassert(!rrex("1", "\\w")); + rassert(!rrex("a", "\\d")); + rassert(!rrex("\n", ".")); + rassert(rrex("a", "a$")); + rassert(rrex("a1ba1ba1b", "[a-z\\db]{3}")); + rassert(rrex("abbc", "a{1}[a-z]{2}c{1}")); + rassert(rrex("aA", "[a-zA-Z]{2}")); + + rassert(!rrex("123", "\\d+a")); + rassert(rrex("123a", "[123]+a")); + + printf("JSSS\n"); + rassert(rrex("123", "[123]+")); + rassert(!rrex("123b", "[123]+a")); + // rassert(!rrex("123", "[123]+b")); NOT READY YET + + rassert(rrex("abababc", "^(ab)+c$")); + rassert(!rrex("abababb", "^(ab)+a$")); + rassert(!rrex("abababa", "^(ab)+b$")); + rassert(!rrex("abdabdabda", "^(abc)+a$")); + rassert(!rrex("abababa", "^(abc)+a$")); + + rassert(rrex("123a33", "\\d+a\\d+")); + rassert(!rrex("123ab", "\\d+$")); + + rassert(rrex("567", "[^1234]")); + rassert(rrex("400", "[^5]")); + rassert(!rrex("132213gh", ".*gd")); + rassert(!rrex("132213gd", ".*gh")); + rassert(rrex("#include \"test.h\"x", "#include *\"[a-z\\.]*\"x")); + + // rassert(rrex("#include \"test.h\"x", "#include.*\".*\"x")); + rassert(!rrex("#include \"test.h\"y", ".*#include.*\".*\"x")); + + rassert(rrex("123test", "^123")); + rassert(rrex("test123", "123")); + rassert(!rrex("test123", "^123")); + rassert(rrex("test123", "123$")); + rassert(rrex("test123test", "123")); + rassert(!rrex("test123test", "123$")); +} diff --git a/rrex3 b/rrex3 new file mode 100755 index 0000000..3375bdb Binary files /dev/null and b/rrex3 differ diff --git a/rrex3.c b/rrex3.c new file mode 100644 index 0000000..090aee2 --- /dev/null +++ b/rrex3.c @@ -0,0 +1,79 @@ +#define RREX3_DEBUG 1 +#include "rrex3.h" +#include "rlib.h" + +#include + +void benchmark(int times, char *str, char *expr) { + + regmatch_t matches[10]; + printf("Matching \"%s\" with \"%s\".\n", str, expr); + regex_t regex; + if (regcomp(®ex, expr, REG_EXTENDED)) { + printf("Creg: error in regular expression.\n"); + exit(1); + } + printf("creg: "); + RBENCH(times, { + if (regexec(®ex, str, 0, matches, 0)) { + printf("Creg: error executing regular expression.\n"); + } + }) + regfree(®ex); + ; + rrex3_t *rrex = rrex3_compile(NULL, expr); + printf("rrex3 (%s): ", rrex->compiled); + RBENCH(times, { + if (rrex3(rrex, str, NULL)) { + + } else { + printf("Rrex3: error\n"); + exit(0); + } + }); + rrex3_free(rrex); + printf("\n"); +} + +int main() { + rrex3_test(); + int times = 1; + benchmark(times, "\"stdio.h\"\"string.h\"\"sys/time.h\"", + "\".*\"\".*\"\".*\""); + + benchmark(times, "abcdefghijklmnopqrstuvwxyz", + "abcdefghijklmnopqrstuvwxyz$"); + benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaa$"); + benchmark(times, "abcdefghijklmnopqrstuvwxyz", + "..........................$"); + + // [abcm] failed + benchmark(times, "abcdefghijklmnopqrstuvwxyz", ".*z"); + benchmark(times, "abcde", ".*e"); + benchmark(times, "abcdef", ".*f"); + + benchmark(times, "abcdefghijklmnopqrstuvwxyz", + "[a]b*c+d\\w[f-g][g][h-i][i][^a][abcdefgk][l][m][n][o][p][a-z][r]" + "[s][t][u][v][w].*z$"); + benchmark(times, "zzz", + "[abcdefghijklmnopqrstuvwxyz][abcdefghijklmnopqrstuvwxyz][" + "abcdefghijklmnopqrstuvwxyz]$"); + + benchmark(times, "7245 Sr", "[0-9][0-9][0-9][0-9] ?\\w\\w$"); + + benchmark(times, + "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmn" + "opqrstuvwxyzesting", + "[z-z][e-e]"); + benchmark(times, + "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmn" + "opqrstuvwxyzesting", + "zesting"); + benchmark(times, "\"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.*)\"\"(.*)\"\"(.*)\""); + benchmark(times, " \"stdio.h\"\"string.h\"\"sys/time.h\"", + "\".+\"\".+\"\".+\""); + benchmark(times, " \"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.+)\"\"(.+)\"\"(.+)\""); +} \ No newline at end of file diff --git a/rrex3.h b/rrex3.h new file mode 100644 index 0000000..6b8711e --- /dev/null +++ b/rrex3.h @@ -0,0 +1,1277 @@ +#ifndef RREX3_H +#define RREX3_H +#include +#include +#include +#include +#include +#include +#include +#ifndef RREX3_DEBUG +#define RREX3_DEBUG 0 +#endif + +struct rrex3_t; + +typedef void (*rrex3_function)(struct rrex3_t *); + +typedef struct rrex3_t { + void (*functions[254])(struct rrex3_t *); + void (*slash_functions[254])(struct rrex3_t *); + bool valid; + int match_count; + int match_capacity; + char **matches; + bool exit; + char *__expr; + char *__str; + char *_expr; + char *_str; + char *expr; + char *str; + char *compiled; + bool inside_brackets; + bool inside_parentheses; + bool pattern_error; + bool match_from_start; + char bytecode; + rrex3_function function; + struct { + void (*function)(struct rrex3_t *); + char *expr; + char *str; + char bytecode; + } previous; + struct { + void (*function)(struct rrex3_t *); + char *expr; + char *str; + char bytecode; + } failed; +} rrex3_t; + +static bool isdigitrange(char *s) { + if (!isdigit(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isdigit(*(s + 2)); +} + +static bool isalpharange(char *s) { + if (!isalpha(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isalpha(*(s + 2)); +} + +void rrex3_free_matches(rrex3_t *rrex3) { + if (!rrex3->matches) + return; + for (int i = 0; i < rrex3->match_count; i++) { + free(rrex3->matches[i]); + } + free(rrex3->matches); + rrex3->matches = NULL; + rrex3->match_count = 0; + rrex3->match_capacity = 0; +} + +void rrex3_free(rrex3_t *rrex3) { + if (!rrex3) + return; + if (rrex3->compiled) { + free(rrex3->compiled); + rrex3->compiled = NULL; + } + rrex3_free_matches(rrex3); + free(rrex3); + rrex3 = NULL; +} +static bool rrex3_move(rrex3_t *, bool); +static void rrex3_set_previous(rrex3_t *); +inline static void rrex3_cmp_asterisk(rrex3_t *); +void rrex3_cmp_literal_range(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Range check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char start = *rrex3->expr; + rrex3->expr++; + rrex3->expr++; + char end = *rrex3->expr; + if (*rrex3->str >= start && *rrex3->str <= end) { + rrex3->str++; + rrex3->valid = true; + } else { + rrex3->valid = false; + } + rrex3->expr++; +} + +bool rrex3_is_function(char chr) { + if (chr == ']' || chr == ')' || chr == '\\' || chr == '?' || chr == '+' || + chr == '*') + return true; + return false; +} + +inline static void rrex3_cmp_literal(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + if (rrex3->inside_brackets) { + if (isalpharange(rrex3->expr) || isdigitrange(rrex3->expr)) { + rrex3_cmp_literal_range(rrex3); + return; + } + } +#if RREX3_DEBUG == 1 + printf("Literal check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); + +#endif + if (*rrex3->expr == 0 && !*rrex3->str) { + printf("ERROR, EMPTY CHECK\n"); + // exit(1); + } + if (rrex3->valid == false) { + rrex3->expr++; + return; + } + + if (*rrex3->expr == *rrex3->str) { + rrex3->expr++; + rrex3->str++; + rrex3->valid = true; + // if(*rrex3->expr &&rrex3->functions[(int)*rrex3->expr] == + // rrex3_cmp_literal && !rrex3->inside_brackets && + //! rrex3_is_function(*rrex3->expr)){ rrex3_cmp_literal(rrex3); + // if(rrex3->valid == false){ + // rrex3->expr--; + // rrex3->valid = true; + // } + // } + return; + } + rrex3->expr++; + rrex3->valid = false; +} + +inline static void rrex3_cmp_dot(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Dot check (any char): %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + rrex3->expr++; + if (!rrex3->valid) { + return; + } + if (*rrex3->str && *rrex3->str != '\n') { + rrex3->str++; + if (*rrex3->expr && *rrex3->expr == '.') { + rrex3_cmp_dot(rrex3); + return; + } /*else if(*rrex3->expr && (*rrex3->expr == '*' || *rrex3->expr == + '+')){ char * next = strchr(rrex3->str,*(rrex3->expr + 1)); char * + space = strchr(rrex3->str,'\n'); if(next && (!space || space > next)){ + rrex3->str = next; + } + }*/ + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_question_mark(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Question mark check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + if (rrex3->valid == false) + rrex3->valid = true; + rrex3->expr++; +} + +inline static void rrex3_cmp_whitespace(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Whitespace check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char c = *rrex3->expr; + rrex3->valid = c == ' ' || c == '\n' || c == '\t'; + if (rrex3->valid) { + rrex3->str++; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_whitespace_upper(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Non whitespace check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char c = *rrex3->expr; + rrex3->valid = !(c == ' ' || c == '\n' || c == '\t'); + if (rrex3->valid) { + rrex3->str++; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_plus2(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Plus check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + if (rrex3->valid) { + rrex3->str--; + } else { + return; + } + char *original_expr = rrex3->expr; + char *next = original_expr + 1; + char *loop_expr = rrex3->previous.expr - 1; + if (*loop_expr == '+') { + rrex3->valid = false; + rrex3->pattern_error = true; + rrex3->expr++; + return; + } + bool success_next = false; + bool success_next_once = false; + bool success_current = false; + char *next_next = NULL; + char *next_str = rrex3->str; + while (*rrex3->str) { + // Check if next matches + char *original_str = rrex3->str; + rrex3->expr = next; + rrex3->valid = true; + if (rrex3_move(rrex3, false)) { + success_next = true; + next_next = rrex3->expr; + next_str = rrex3->str; + success_next_once = true; + } else { + success_next = false; + } + if (success_next_once && !success_next) { + break; + } + // Check if current matches + rrex3->str = original_str; + rrex3->expr = loop_expr; + rrex3->valid = true; + if (!*rrex3->str || !rrex3_move(rrex3, false)) { + success_current = false; + } else { + success_current = true; + if (!success_next) { + next_next = rrex3->expr + 1; // +1 is the * itself + next_str = rrex3->str; + } + } + if (success_next && !success_current) { + break; + } + } + if (!next_next) + rrex3->expr = next; + else { + rrex3->expr = next_next; + } + rrex3->str = next_str; + rrex3->valid = true; +} + +inline static void rrex3_cmp_plus(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->expr++; + return; + } + + char *left = rrex3->previous.expr; + // printf("%s\n",rrex3->str); + char *right = rrex3->expr + 1; + if (*right == ')') { + right++; + } + int right_valid = 0; + bool right_valid_once = false; + char *expr = right; + char *right_str = rrex3->str; + ; + char *right_expr = NULL; + char *str = rrex3->str; + bool first_time = true; + bool left_valid = true; + char *str_prev = NULL; + bool valid_from_start = true; + ; + while (*rrex3->str) { + if (!left_valid && !right_valid) { + break; + } + if (right_valid && !left_valid) { + str = right_str; + break; + } + + rrex3->expr = right; + rrex3->str = str; +#if RREX3_DEBUG == 1 + printf("r"); +#endif + if (*rrex3->str && rrex3_move(rrex3, false)) { + right_valid++; + right_str = rrex3->str; + expr = rrex3->expr; + if (!right_valid_once) { + right_expr = rrex3->expr; + right_valid_once = true; + } + } else { + right_valid = 0; + } + if (first_time) { + first_time = false; + valid_from_start = right_valid; + } + + if (right_valid && !valid_from_start && right_valid > 0) { + expr = right_expr - 1; + ; + if (*(right - 1) == ')') { + expr = right - 1; + } + break; + } + + if ((!right_valid && right_valid_once)) { + expr = right_expr; + if (*(right - 1) == ')') { + str = str_prev; + expr = right - 1; + } + break; + } + + str_prev = str; + rrex3->valid = true; + rrex3->str = str; + rrex3->expr = left; +#if RREX3_DEBUG == 1 + printf("l"); +#endif + if (rrex3_move(rrex3, false)) { + left_valid = true; + + str = rrex3->str; + } else { + left_valid = false; + } + } + + rrex3->expr = expr; + rrex3->str = str; + rrex3->valid = true; + +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_asterisk(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->valid = true; + rrex3->expr++; + return; + } + + rrex3->str = rrex3->previous.str; + char *left = rrex3->previous.expr; + // printf("%s\n",rrex3->str); + char *right = rrex3->expr + 1; + if (*right == ')') { + right++; + } + int right_valid = 0; + bool right_valid_once = false; + char *expr = right; + char *right_str = rrex3->str; + ; + char *right_expr = NULL; + char *str = rrex3->str; + bool first_time = true; + bool left_valid = true; + char *str_prev = NULL; + bool valid_from_start = true; + ; + while (*rrex3->str) { + if (!left_valid && !right_valid) { + break; + } + if (right_valid && !left_valid) { + str = right_str; + break; + } + + rrex3->expr = right; + rrex3->str = str; +#if RREX3_DEBUG == 1 + printf("r"); +#endif + if (*rrex3->str && rrex3_move(rrex3, false)) { + right_valid++; + right_str = rrex3->str; + expr = rrex3->expr; + if (!right_valid_once) { + right_expr = rrex3->expr; + right_valid_once = true; + } + } else { + right_valid = 0; + } + if (first_time) { + first_time = false; + valid_from_start = right_valid; + } + + if (right_valid && !valid_from_start && right_valid > 0) { + expr = right_expr - 1; + if (*(right - 1) == ')') { + expr = right - 1; + } + break; + } + + if ((!right_valid && right_valid_once)) { + expr = right_expr; + if (*(right - 1) == ')') { + str = str_prev; + expr = right - 1; + } + break; + } + + str_prev = str; + rrex3->valid = true; + rrex3->str = str; + rrex3->expr = left; +#if RREX3_DEBUG == 1 + printf("l"); +#endif + if (rrex3_move(rrex3, false)) { + left_valid = true; + str = rrex3->str; + } else { + left_valid = false; + } + } + + rrex3->expr = expr; + rrex3->str = str; + rrex3->valid = true; + +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_asterisk2(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->valid = true; + rrex3->expr++; + return; + } + if (*rrex3->previous.expr == '*') { + // Support for ** + rrex3->valid = false; + // rrex3->pattern_error = true; + rrex3->expr++; + return; + } + rrex3->str = rrex3->previous.str; + ; + char *next = rrex3->expr + 1; + char *next_original = NULL; + if (*next == '*') { + next++; + } + if (*next == ')' && *(next + 1)) { + next_original = next; + next++; + } + char *loop_expr = rrex3->previous.expr; + bool success_next = false; + bool success_next_once = false; + bool success_current = false; + char *right_next = NULL; + char *right_str = rrex3->str; + while (*rrex3->str && *rrex3->expr && *rrex3->expr != ')') { + // Remember original_str because it's modified + // by checking right and should be restored + // for checking left so they're matching the + // same value. + char *original_str = rrex3->str; + // Check if right matches. + // if(*next != ')'){ + rrex3->expr = next; + rrex3->valid = true; + if (rrex3_move(rrex3, false)) { + // Match rright. + success_next = true; + if (!next_original) { + if (!success_next_once) { + right_next = rrex3->expr; + } + + } else { + right_next = next_original; + break; + } + right_str = rrex3->str; + success_next_once = true; + } else { + // No match Right. + success_next = false; + } + //} + if (success_next_once && !success_next) { + // Matched previous time but now doesn't. + break; + } + // Check if left matches. + rrex3->str = original_str; + rrex3->expr = loop_expr; + rrex3->valid = true; + if (!rrex3_move(rrex3, false)) { + // No match left. + success_current = false; + } else { + // Match left. + success_current = true; + // NOT SURE< WITHOUT DOET HETZELFDE: + // original_str = rrex3->str; + if (!success_next) { + right_str = rrex3->str; + if (*rrex3->expr != ')') { + right_next = rrex3->expr + 1; // +1 is the * itself + + } else { + + // break; + } + } + } + + if ((success_next && !success_current) || + (!success_next && !success_current)) { + break; + } + } + rrex3->expr = right_next; + rrex3->str = right_str; + rrex3->valid = true; +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_roof(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); +#if RREX3_DEBUG == 1 + printf("expr, *rrex3->str, rrex3->valid); +#endif + rrex3->valid = rrex3->str == rrex3->_str; + rrex3->match_from_start = true; + rrex3->expr++; +} +inline static void rrex3_cmp_dollar(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); +#if RREX3_DEBUG == 1 + printf("Dollar check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (*rrex3->str || !rrex3->valid) { + rrex3->valid = false; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_w(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("Word check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (isalpha(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} +inline static void rrex3_cmp_w_upper(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("!Word check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (!isalpha(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_d(rrex3_t *rrex3) { + + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("Digit check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (isdigit(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} +inline static void rrex3_cmp_d_upper(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("!Digit check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (!isdigit(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_slash(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; + + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->slash_functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); +} + +inline static int collect_digits(rrex3_t *rrex3) { + char output[20]; + unsigned int digit_count = 0; + while (isdigit(*rrex3->expr)) { + + output[digit_count] = *rrex3->expr; + rrex3->expr++; + digit_count++; + } + output[digit_count] = 0; + return atoi(output); +} + +inline static void rrex3_cmp_range(rrex3_t *rrex3) { + char *loop_code = rrex3->previous.expr; + char *expr_original = rrex3->expr; + rrex3->expr++; + int range_start = collect_digits(rrex3) - 1; + int range_end = 0; + if (*rrex3->expr == ',') { + rrex3->expr++; + range_end = collect_digits(rrex3); + } + rrex3->expr++; + int times_valid = 0; + while (*rrex3->str) { + rrex3->expr = loop_code; + rrex3_move(rrex3, false); + if (rrex3->valid == false) { + break; + } else { + times_valid++; + } + if (range_end) { + if (times_valid >= range_start && times_valid == range_end - 1) { + rrex3->valid = true; + } else { + rrex3->valid = false; + } + break; + } else if (range_start) { + if (times_valid == range_start) { + rrex3->valid = true; + break; + } + } + } + rrex3->valid = times_valid >= range_start; + if (rrex3->valid && range_end) { + rrex3->valid = times_valid <= range_end; + } + rrex3->expr = strchr(expr_original, '}') + 1; +} + +inline static void rrex3_cmp_word_start_or_end(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + if (*rrex3->expr != 'B') { + printf("Check word start or end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); + } + +#endif + rrex3_set_previous(rrex3); + bool valid = false; + if (isalpha(*rrex3->str)) { + if (rrex3->_str != rrex3->str) { + if (!isalpha(*(rrex3->str - 1))) { + valid = true; + } + } else { + valid = true; + } + } else if (isalpha(isalpha(*rrex3->str) && !isalpha(*rrex3->str + 1))) { + valid = true; + } + rrex3->expr++; + rrex3->valid = valid; +} +inline static void rrex3_cmp_word_not_start_or_end(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Check word NOT start or end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); + +#endif + rrex3_set_previous(rrex3); + + rrex3_cmp_word_start_or_end(rrex3); + rrex3->valid = !rrex3->valid; +} + +inline static void rrex3_cmp_brackets(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintb("\\l Brackets start: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + char *original_expr = rrex3->expr; + rrex3->expr++; + rrex3->inside_brackets = true; + bool valid_once = false; + bool reversed = false; + if (*rrex3->expr == '^') { + reversed = true; + rrex3->expr++; + } + bool valid = false; + while (*rrex3->expr != ']' && *rrex3->expr != 0) { + rrex3->valid = true; + valid = rrex3_move(rrex3, false); + if (reversed) { + valid = !valid; + } + if (valid) { + valid_once = true; + if (!reversed) { + valid_once = true; + break; + } + } else { + if (reversed) { + valid_once = false; + break; + } + } + } + if (valid_once && reversed) { + rrex3->str++; + } + while (*rrex3->expr != ']' && *rrex3->expr != 0) + rrex3->expr++; + if (*rrex3->expr != 0) + rrex3->expr++; + + rrex3->valid = valid_once; + rrex3->inside_brackets = false; + char *previous_expr = rrex3->expr; + rrex3->expr = original_expr; + rrex3_set_previous(rrex3); + rrex3->expr = previous_expr; +#if RREX3_DEBUG == 1 + rprintb("\\l Brackets end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_pipe(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + +#if RREX3_DEBUG == 1 + printf("Pipe check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (rrex3->valid == true) { + rrex3->exit = true; + } else { + rrex3->valid = true; + } + rrex3->expr++; +} +inline static void rrex3_cmp_parentheses(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprinty("\\l Parentheses start check: %c:%c:%d\n", *rrex3->expr, + *rrex3->str, rrex3->valid); +#endif + + rrex3_set_previous(rrex3); + if (!rrex3->valid) { + rrex3->expr++; + return; + } + if (rrex3->match_count == rrex3->match_capacity) { + + rrex3->match_capacity++; + rrex3->matches = (char **)realloc( + rrex3->matches, rrex3->match_capacity * sizeof(char *)); + } + rrex3->matches[rrex3->match_count] = (char *)malloc(strlen(rrex3->str) + 1); + strcpy(rrex3->matches[rrex3->match_count], rrex3->str); + char *original_expr = rrex3->expr; + char *original_str = rrex3->str; + rrex3->expr++; + rrex3->inside_parentheses = true; + while (*rrex3->expr != ')' && !rrex3->exit) { + rrex3_move(rrex3, false); + } + while (*rrex3->expr != ')') { + rrex3->expr++; + } + rrex3->expr++; + rrex3->inside_parentheses = false; + + char *previous_expr = rrex3->expr; + rrex3->expr = original_expr; + rrex3_set_previous(rrex3); + rrex3->expr = previous_expr; + if (rrex3->valid == false) { + rrex3->str = original_str; + free(rrex3->matches[rrex3->match_count]); + } else { + rrex3->matches[rrex3->match_count] + [strlen(rrex3->matches[rrex3->match_count]) - + strlen(rrex3->str)] = 0; + rrex3->match_count++; + } +#if RREX3_DEBUG == 1 + rprinty("\\l Parentheses end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_reset(rrex3_t *rrex3) { + rrex3_free_matches(rrex3); + rrex3->valid = true; + rrex3->pattern_error = false; + rrex3->inside_brackets = false; + rrex3->inside_parentheses = false; + rrex3->exit = false; + rrex3->previous.expr = NULL; + rrex3->previous.str = NULL; + rrex3->previous.bytecode = 0; + rrex3->failed.expr = NULL; + rrex3->failed.str = NULL; + rrex3->failed.bytecode = 0; + rrex3->match_from_start = false; +} + +void rrex3_init(rrex3_t *rrex3) { + for (__uint8_t i = 0; i < 254; i++) { + rrex3->functions[i] = rrex3_cmp_literal; + rrex3->slash_functions[i] = rrex3_cmp_literal; + } + rrex3->functions['?'] = rrex3_cmp_question_mark; + rrex3->functions['^'] = rrex3_cmp_roof; + rrex3->functions['$'] = rrex3_cmp_dollar; + rrex3->functions['.'] = rrex3_cmp_dot; + rrex3->functions['*'] = rrex3_cmp_asterisk; + rrex3->functions['+'] = rrex3_cmp_plus; + rrex3->functions['|'] = rrex3_cmp_pipe; + rrex3->functions['\\'] = rrex3_cmp_slash; + rrex3->functions['{'] = rrex3_cmp_range; + rrex3->functions['['] = rrex3_cmp_brackets; + rrex3->functions['('] = rrex3_cmp_parentheses; + rrex3->slash_functions['w'] = rrex3_cmp_w; + rrex3->slash_functions['W'] = rrex3_cmp_w_upper; + rrex3->slash_functions['d'] = rrex3_cmp_d; + rrex3->slash_functions['D'] = rrex3_cmp_d_upper; + rrex3->slash_functions['s'] = rrex3_cmp_whitespace; + rrex3->slash_functions['S'] = rrex3_cmp_whitespace_upper; + rrex3->slash_functions['b'] = rrex3_cmp_word_start_or_end; + rrex3->slash_functions['B'] = rrex3_cmp_word_not_start_or_end; + rrex3->match_count = 0; + rrex3->match_capacity = 0; + rrex3->matches = NULL; + rrex3->compiled = NULL; + + rrex3_reset(rrex3); +} + +rrex3_t *rrex3_new() { + rrex3_t *rrex3 = (rrex3_t *)malloc(sizeof(rrex3_t)); + + rrex3_init(rrex3); + + return rrex3; +} + +rrex3_t *rrex3_compile(rrex3_t *rrex, char *expr) { + + rrex3_t *rrex3 = rrex ? rrex : rrex3_new(); + + char *compiled = (char *)malloc(strlen(expr) + 1); + unsigned int count = 0; + while (*expr) { + if (*expr == '[' && *(expr + 2) == ']') { + *compiled = *(expr + 1); + expr++; + expr++; + } else if (*expr == '[' && *(expr + 1) == '0' && *(expr + 2) == '-' && + *(expr + 3) == '9' && *(expr + 4) == ']') { + *compiled = '\\'; + compiled++; + *compiled = 'd'; + count++; + expr++; + expr++; + expr++; + expr++; + } else { + *compiled = *expr; + } + if (*compiled == '[') { + // in_brackets = true; + + } else if (*compiled == ']') { + // in_brackets = false; + } + expr++; + compiled++; + count++; + } + *compiled = 0; + compiled -= count; + rrex3->compiled = compiled; + return rrex3; +} + +inline static void rrex3_set_previous(rrex3_t *rrex3) { + rrex3->previous.function = rrex3->function; + rrex3->previous.expr = rrex3->expr; + rrex3->previous.str = rrex3->str; + rrex3->previous.bytecode = *rrex3->expr; +} + +static bool rrex3_move(rrex3_t *rrex3, bool resume_on_fail) { + char *original_expr = rrex3->expr; + char *original_str = rrex3->str; + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); + if (!*rrex3->expr && !*rrex3->str) { + rrex3->exit = true; + return rrex3->valid; + } else if (!*rrex3->expr) { + // rrex3->valid = true; + return rrex3->valid; + } + if (rrex3->pattern_error) { + rrex3->valid = false; + return rrex3->valid; + } + if (resume_on_fail && !rrex3->valid && *rrex3->expr) { + + // rrex3_set_previous(rrex3); + rrex3->failed.bytecode = rrex3->bytecode; + rrex3->failed.function = rrex3->function; + rrex3->failed.expr = original_expr; + rrex3->failed.str = original_str; + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); + + if (!rrex3->valid && !rrex3->pattern_error) { + + if (*rrex3->str) { + char *pipe_position = strstr(rrex3->expr, "|"); + if (pipe_position != NULL) { + rrex3->expr = pipe_position + 1; + rrex3->str = rrex3->_str; + rrex3->valid = true; + return true; + } + } + if (rrex3->match_from_start) { + rrex3->valid = false; + return rrex3->valid; + } + if (!*rrex3->str++) { + rrex3->valid = false; + return rrex3->valid; + } + rrex3->expr = rrex3->_expr; + if (*rrex3->str) + rrex3->valid = true; + } + } else { + } + return rrex3->valid; +} + +rrex3_t *rrex3(rrex3_t *rrex3, char *str, char *expr) { +#if RREX3_DEBUG == 1 + printf("Regex check: %s:%s:%d\n", expr, str, 1); +#endif + bool self_initialized = false; + if (rrex3 == NULL) { + self_initialized = true; + rrex3 = rrex3_new(); + } else { + rrex3_reset(rrex3); + } + + rrex3->_str = str; + rrex3->_expr = rrex3->compiled ? rrex3->compiled : expr; + rrex3->str = rrex3->_str; + rrex3->expr = rrex3->_expr; + while (*rrex3->expr && !rrex3->exit) { + if (!rrex3_move(rrex3, true)) + return NULL; + } + rrex3->expr = rrex3->_expr; + if (rrex3->valid) { + + return rrex3; + } else { + if (self_initialized) { + rrex3_free(rrex3); + } + return NULL; + } +} + +void rrex3_test() { + rrex3_t *rrex = rrex3_new(); + + assert(rrex3(rrex, "#define abc ", "#define *(\\w.*)\n$")); + + exit(0); + + assert(rrex3(rrex, "\"stdio.h\" \"string.h\"\"sys/time.h\"", + "\"(.*)\"\"(.*)\"\"(.*)\"")); + + assert(rrex3(rrex, "aaaaaaa", "a*a$")); + + // assert(rrex3("ababa", "a*b*a*b*a$")); + assert(rrex3(rrex, "#include\"test.h\"a", "#include.*\".*\"a$")); + assert(rrex3(rrex, "#include \"test.h\"a", "#include.*\".*\"a$")); + assert(rrex3(rrex, "aaaaaad", "a*d$")); + assert(rrex3(rrex, "abcdef", "abd?cdef")); + assert(!rrex3(rrex, "abcdef", "abd?def")); + assert(rrex3(rrex, "abcdef", "def")); + assert(!rrex3(rrex, "abcdef", "^def")); + assert(rrex3(rrex, "abcdef", "def$")); + assert(!rrex3(rrex, "abcdef", "^abc$")); + assert(rrex3(rrex, "aB!.#1", "......")); + assert(!rrex3(rrex, "aB!.#\n", " ......")); + assert(!rrex3(rrex, "aaaaaad", "q+d$")); + assert(rrex3(rrex, "aaaaaaa", "a+a$")); + assert(rrex3(rrex, "aaaaaad", "q*d$")); + assert(!rrex3(rrex, "aaaaaad", "^q*d$")); + + // Asterisk function + assert(rrex3(rrex, "123321", "123*321")); + assert(rrex3(rrex, "pony", "p*ony")); + assert(rrex3(rrex, "pppony", "p*ony")); + assert(rrex3(rrex, "ppony", "p*pony")); + assert(rrex3(rrex, "pppony", "pp*pony")); + assert(rrex3(rrex, "pppony", ".*pony")); + assert(rrex3(rrex, "pony", ".*ony")); + assert(rrex3(rrex, "pony", "po*ny")); + // assert(rrex3(rrex,"ppppony", "p*pppony")); + + // Plus function + assert(rrex3(rrex, "pony", "p+ony")); + assert(!rrex3(rrex, "ony", "p+ony")); + assert(rrex3(rrex, "ppony", "p+pony")); + assert(rrex3(rrex, "pppony", "pp+pony")); + assert(rrex3(rrex, "pppony", ".+pony")); + assert(rrex3(rrex, "pony", ".+ony")); + assert(rrex3(rrex, "pony", "po+ny")); + + // Slash functions + assert(rrex3(rrex, "a", "\\w")); + assert(!rrex3(rrex, "1", "\\w")); + assert(rrex3(rrex, "1", "\\W")); + assert(!rrex3(rrex, "a", "\\W")); + assert(rrex3(rrex, "a", "\\S")); + assert(!rrex3(rrex, " ", "\\s")); + assert(!rrex3(rrex, "\t", "\\s")); + assert(!rrex3(rrex, "\n", "\\s")); + assert(rrex3(rrex, "1", "\\d")); + assert(!rrex3(rrex, "a", "\\d")); + assert(rrex3(rrex, "a", "\\D")); + assert(!rrex3(rrex, "1", "\\D")); + assert(rrex3(rrex, "abc", "\\b")); + + assert(rrex3(rrex, "abc", "\\babc")); + assert(!rrex3(rrex, "abc", "a\\b")); + assert(!rrex3(rrex, "abc", "ab\\b")); + assert(!rrex3(rrex, "abc", "abc\\b")); + assert(rrex3(rrex, "abc", "a\\Bbc")); + assert(rrex3(rrex, "abc", "ab\\B")); + assert(!rrex3(rrex, "1ab", "1\\Bab")); + assert(rrex3(rrex, "abc", "a\\Bbc")); + + // Escaping of special chars + assert(rrex3(rrex, "()+*.\\", "\\(\\)\\+\\*\\.\\\\")); + + // Pipe + // assert(rrex3(rrex,"abc","abc|def")); + assert(rrex3(rrex, "abc", "def|jkl|abc")); + assert(rrex3(rrex, "abc", "abc|def")); + + assert(rrex3(rrex, "rhq", "def|rhq|rha")); + assert(rrex3(rrex, "abc", "abc|def")); + + // Repeat + assert(rrex3(rrex, "aaaaa", "a{4}")); + + assert(rrex3(rrex, "aaaa", "a{1,3}a")); + + // Range + assert(rrex3(rrex, "abc", "[abc][abc][abc]$")); + assert(rrex3(rrex, "def", "[^abc][^abc][^abc]$")); + assert(rrex3(rrex, "defabc", "[^abc][^abc][^abc]abc")); + assert(rrex3(rrex, "0-9", "0-9")); + assert(rrex3(rrex, "55-9", "[^6-9]5-9$")); + assert(rrex3(rrex, "a", "[a-z]$")); + assert(rrex3(rrex, "A", "[A-Z]$")); + assert(rrex3(rrex, "5", "[0-9]$")); + assert(!rrex3(rrex, "a", "[^a-z]$")); + assert(!rrex3(rrex, "A", "[^A-Z]$")); + assert(!rrex3(rrex, "5", "[^0-9]$")); + assert(rrex3(rrex, "123abc", "[0-9]*abc$")); + assert(rrex3(rrex, "123123", "[0-9]*$")); + + // Parentheses + + assert(rrex3(rrex, "datadata", "(data)*")); + + assert(rrex3(rrex, "datadatapony", "(data)*pony$")); + + assert(!rrex3(rrex, "datadatapony", "(d*p*ata)*pond$")); + assert(rrex3(rrex, "datadatadato", "(d*p*ata)*dato")); + assert(rrex3(rrex, "datadatadato", "(d*p*ata)*dato$")); + assert(!rrex3(rrex, "datadatadato", "(d*p*a*ta)*gato$")); + + // Matches + assert(rrex3(rrex, "123", "(123)")); + assert(!strcmp(rrex->matches[0], "123")); + + assert(rrex3(rrex, "123321a", "(123)([0-4][2]1)a$")); + assert(!strcmp(rrex->matches[1], "321")); + + assert(rrex3(rrex, "123321a", "(123)([0-4][2]1)a$")); + assert(!strcmp(rrex->matches[1], "321")); + + assert(rrex3(rrex, "aaaabc", "(.*)c")); + + assert(rrex3(rrex, "abcde", ".....$")); + + assert(rrex3(rrex, "abcdefghijklmnopqrstuvwxyz", + "..........................$")); + // printf("(%d)\n", rrex->valid); + + assert(rrex3(rrex, "#include ", "#include.*<(.*)>")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(rrex3(rrex, "#include \"stdlib.h\"", "#include.\"(.*)\"")); + assert(!strcmp(rrex->matches[0], "stdlib.h")); + assert(rrex3(rrex, "\"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.*)\"\"(.*)\"\"(.*)\"")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(!strcmp(rrex->matches[1], "string.h")); + assert(!strcmp(rrex->matches[2], "sys/time.h")); + + assert(rrex3(rrex, " #include ", "#include.+<(.+)>")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(rrex3(rrex, " #include \"stdlib.h\"", "#include.+\"(.+)\"")); + assert(!strcmp(rrex->matches[0], "stdlib.h")); + + assert(rrex3(rrex, " \"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.+)\"\"(.+)\"\"(.+)\"")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(!strcmp(rrex->matches[1], "string.h")); + assert(!strcmp(rrex->matches[2], "sys/time.h")); + + assert(rrex3(rrex, "int abc ", "int (.*)[; ]?$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "int abc;", "int (.*)[; ]?$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "int abc", "int (.*)[; ]?$")); + assert(!strcmp(rrex->matches[0], "abc")); + + assert(rrex3(rrex, "#define abc", "#define (.*)")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "#define abc", "#define (.*)$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "#define abc 1", "#define (.*) (.*)$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(!strcmp(rrex->matches[1], "1")); + + assert(rrex3(rrex, "#define abc 1 ", "#define (.*) (.*) *$")); + assert(!strcmp(rrex->matches[0], "abc")); + printf("<<%s>>\n", rrex->matches[1]); + assert(!strcmp(rrex->matches[1], "1")); + + assert(rrex3(rrex, "#define abc \"test with spaces\" ", + "#define (.*) *\"(.*)\" *$")); + assert(!strcmp(rrex->matches[0], "abc")); + printf("<<%s>>\n", rrex->matches[1]); + assert(!strcmp(rrex->matches[1], "test with spaces")); + + rrex3_free(rrex); +} +#endif \ No newline at end of file diff --git a/rrex3all.c b/rrex3all.c new file mode 100644 index 0000000..43905d7 --- /dev/null +++ b/rrex3all.c @@ -0,0 +1,4931 @@ +// RETOOR - Sep 9 2024 +#define RREX3_DEBUG 1 +#ifndef RREX3_H +#define RREX3_H +#include +#include +#include +#include +#include +#include +#include +#ifndef RREX3_DEBUG +#define RREX3_DEBUG 0 +#endif + +struct rrex3_t; + +typedef void (*rrex3_function)(struct rrex3_t *); + +typedef struct rrex3_t { + void (*functions[254])(struct rrex3_t *); + void (*slash_functions[254])(struct rrex3_t *); + bool valid; + int match_count; + int match_capacity; + char **matches; + bool exit; + char *__expr; + char *__str; + char *_expr; + char *_str; + char *expr; + char *str; + char *compiled; + bool inside_brackets; + bool inside_parentheses; + bool pattern_error; + bool match_from_start; + char bytecode; + rrex3_function function; + struct { + void (*function)(struct rrex3_t *); + char *expr; + char *str; + char bytecode; + } previous; + struct { + void (*function)(struct rrex3_t *); + char *expr; + char *str; + char bytecode; + } failed; +} rrex3_t; + +static bool isdigitrange(char *s) { + if (!isdigit(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isdigit(*(s + 2)); +} + +static bool isalpharange(char *s) { + if (!isalpha(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isalpha(*(s + 2)); +} + +void rrex3_free_matches(rrex3_t *rrex3) { + if (!rrex3->matches) + return; + for (int i = 0; i < rrex3->match_count; i++) { + free(rrex3->matches[i]); + } + free(rrex3->matches); + rrex3->matches = NULL; + rrex3->match_count = 0; + rrex3->match_capacity = 0; +} + +void rrex3_free(rrex3_t *rrex3) { + if (!rrex3) + return; + if (rrex3->compiled) { + free(rrex3->compiled); + rrex3->compiled = NULL; + } + rrex3_free_matches(rrex3); + free(rrex3); + rrex3 = NULL; +} +static bool rrex3_move(rrex3_t *, bool); +static void rrex3_set_previous(rrex3_t *); +inline static void rrex3_cmp_asterisk(rrex3_t *); +void rrex3_cmp_literal_range(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Range check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char start = *rrex3->expr; + rrex3->expr++; + rrex3->expr++; + char end = *rrex3->expr; + if (*rrex3->str >= start && *rrex3->str <= end) { + rrex3->str++; + rrex3->valid = true; + } else { + rrex3->valid = false; + } + rrex3->expr++; +} + +bool rrex3_is_function(char chr) { + if (chr == ']' || chr == ')' || chr == '\\' || chr == '?' || chr == '+' || + chr == '*') + return true; + return false; +} + +inline static void rrex3_cmp_literal(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + if (rrex3->inside_brackets) { + if (isalpharange(rrex3->expr) || isdigitrange(rrex3->expr)) { + rrex3_cmp_literal_range(rrex3); + return; + } + } +#if RREX3_DEBUG == 1 + printf("Literal check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); + +#endif + if (*rrex3->expr == 0 && !*rrex3->str) { + printf("ERROR, EMPTY CHECK\n"); + // exit(1); + } + if (rrex3->valid == false) { + rrex3->expr++; + return; + } + + if (*rrex3->expr == *rrex3->str) { + rrex3->expr++; + rrex3->str++; + rrex3->valid = true; + // if(*rrex3->expr &&rrex3->functions[(int)*rrex3->expr] == + // rrex3_cmp_literal && !rrex3->inside_brackets && + //! rrex3_is_function(*rrex3->expr)){ rrex3_cmp_literal(rrex3); + // if(rrex3->valid == false){ + // rrex3->expr--; + // rrex3->valid = true; + // } + // } + return; + } + rrex3->expr++; + rrex3->valid = false; +} + +inline static void rrex3_cmp_dot(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Dot check (any char): %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + rrex3->expr++; + if (!rrex3->valid) { + return; + } + if (*rrex3->str && *rrex3->str != '\n') { + rrex3->str++; + if (*rrex3->expr && *rrex3->expr == '.') { + rrex3_cmp_dot(rrex3); + return; + } /*else if(*rrex3->expr && (*rrex3->expr == '*' || *rrex3->expr == + '+')){ char * next = strchr(rrex3->str,*(rrex3->expr + 1)); char * + space = strchr(rrex3->str,'\n'); if(next && (!space || space > next)){ + rrex3->str = next; + } + }*/ + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_question_mark(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Question mark check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + if (rrex3->valid == false) + rrex3->valid = true; + rrex3->expr++; +} + +inline static void rrex3_cmp_whitespace(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Whitespace check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char c = *rrex3->expr; + rrex3->valid = c == ' ' || c == '\n' || c == '\t'; + if (rrex3->valid) { + rrex3->str++; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_whitespace_upper(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Non whitespace check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char c = *rrex3->expr; + rrex3->valid = !(c == ' ' || c == '\n' || c == '\t'); + if (rrex3->valid) { + rrex3->str++; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_plus2(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Plus check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + if (rrex3->valid) { + rrex3->str--; + } else { + return; + } + char *original_expr = rrex3->expr; + char *next = original_expr + 1; + char *loop_expr = rrex3->previous.expr - 1; + if (*loop_expr == '+') { + rrex3->valid = false; + rrex3->pattern_error = true; + rrex3->expr++; + return; + } + bool success_next = false; + bool success_next_once = false; + bool success_current = false; + char *next_next = NULL; + char *next_str = rrex3->str; + while (*rrex3->str) { + // Check if next matches + char *original_str = rrex3->str; + rrex3->expr = next; + rrex3->valid = true; + if (rrex3_move(rrex3, false)) { + success_next = true; + next_next = rrex3->expr; + next_str = rrex3->str; + success_next_once = true; + } else { + success_next = false; + } + if (success_next_once && !success_next) { + break; + } + // Check if current matches + rrex3->str = original_str; + rrex3->expr = loop_expr; + rrex3->valid = true; + if (!*rrex3->str || !rrex3_move(rrex3, false)) { + success_current = false; + } else { + success_current = true; + if (!success_next) { + next_next = rrex3->expr + 1; // +1 is the * itself + next_str = rrex3->str; + } + } + if (success_next && !success_current) { + break; + } + } + if (!next_next) + rrex3->expr = next; + else { + rrex3->expr = next_next; + } + rrex3->str = next_str; + rrex3->valid = true; +} + +inline static void rrex3_cmp_plus(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->expr++; + return; + } + + char *left = rrex3->previous.expr; + // printf("%s\n",rrex3->str); + char *right = rrex3->expr + 1; + if (*right == ')') { + right++; + } + int right_valid = 0; + bool right_valid_once = false; + char *expr = right; + char *right_str = rrex3->str; + ; + char *right_expr = NULL; + char *str = rrex3->str; + bool first_time = true; + bool left_valid = true; + char *str_prev = NULL; + bool valid_from_start = true; + ; + while (*rrex3->str) { + if (!left_valid && !right_valid) { + break; + } + if (right_valid && !left_valid) { + str = right_str; + break; + } + + rrex3->expr = right; + rrex3->str = str; +#if RREX3_DEBUG == 1 + printf("r"); +#endif + if (*rrex3->str && rrex3_move(rrex3, false)) { + right_valid++; + right_str = rrex3->str; + expr = rrex3->expr; + if (!right_valid_once) { + right_expr = rrex3->expr; + right_valid_once = true; + } + } else { + right_valid = 0; + } + if (first_time) { + first_time = false; + valid_from_start = right_valid; + } + + if (right_valid && !valid_from_start && right_valid > 0) { + expr = right_expr - 1; + ; + if (*(right - 1) == ')') { + expr = right - 1; + } + break; + } + + if ((!right_valid && right_valid_once)) { + expr = right_expr; + if (*(right - 1) == ')') { + str = str_prev; + expr = right - 1; + } + break; + } + + str_prev = str; + rrex3->valid = true; + rrex3->str = str; + rrex3->expr = left; +#if RREX3_DEBUG == 1 + printf("l"); +#endif + if (rrex3_move(rrex3, false)) { + left_valid = true; + + str = rrex3->str; + } else { + left_valid = false; + } + } + + rrex3->expr = expr; + rrex3->str = str; + rrex3->valid = true; + +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_asterisk(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->valid = true; + rrex3->expr++; + return; + } + + rrex3->str = rrex3->previous.str; + char *left = rrex3->previous.expr; + // printf("%s\n",rrex3->str); + char *right = rrex3->expr + 1; + if (*right == ')') { + right++; + } + int right_valid = 0; + bool right_valid_once = false; + char *expr = right; + char *right_str = rrex3->str; + ; + char *right_expr = NULL; + char *str = rrex3->str; + bool first_time = true; + bool left_valid = true; + char *str_prev = NULL; + bool valid_from_start = true; + ; + while (*rrex3->str) { + if (!left_valid && !right_valid) { + break; + } + if (right_valid && !left_valid) { + str = right_str; + break; + } + + rrex3->expr = right; + rrex3->str = str; +#if RREX3_DEBUG == 1 + printf("r"); +#endif + if (*rrex3->str && rrex3_move(rrex3, false)) { + right_valid++; + right_str = rrex3->str; + expr = rrex3->expr; + if (!right_valid_once) { + right_expr = rrex3->expr; + right_valid_once = true; + } + } else { + right_valid = 0; + } + if (first_time) { + first_time = false; + valid_from_start = right_valid; + } + + if (right_valid && !valid_from_start && right_valid > 0) { + expr = right_expr - 1; + if (*(right - 1) == ')') { + expr = right - 1; + } + break; + } + + if ((!right_valid && right_valid_once)) { + expr = right_expr; + if (*(right - 1) == ')') { + str = str_prev; + expr = right - 1; + } + break; + } + + str_prev = str; + rrex3->valid = true; + rrex3->str = str; + rrex3->expr = left; +#if RREX3_DEBUG == 1 + printf("l"); +#endif + if (rrex3_move(rrex3, false)) { + left_valid = true; + str = rrex3->str; + } else { + left_valid = false; + } + } + + rrex3->expr = expr; + rrex3->str = str; + rrex3->valid = true; + +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_asterisk2(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->valid = true; + rrex3->expr++; + return; + } + if (*rrex3->previous.expr == '*') { + // Support for ** + rrex3->valid = false; + // rrex3->pattern_error = true; + rrex3->expr++; + return; + } + rrex3->str = rrex3->previous.str; + ; + char *next = rrex3->expr + 1; + char *next_original = NULL; + if (*next == '*') { + next++; + } + if (*next == ')' && *(next + 1)) { + next_original = next; + next++; + } + char *loop_expr = rrex3->previous.expr; + bool success_next = false; + bool success_next_once = false; + bool success_current = false; + char *right_next = NULL; + char *right_str = rrex3->str; + while (*rrex3->str && *rrex3->expr && *rrex3->expr != ')') { + // Remember original_str because it's modified + // by checking right and should be restored + // for checking left so they're matching the + // same value. + char *original_str = rrex3->str; + // Check if right matches. + // if(*next != ')'){ + rrex3->expr = next; + rrex3->valid = true; + if (rrex3_move(rrex3, false)) { + // Match rright. + success_next = true; + if (!next_original) { + if (!success_next_once) { + right_next = rrex3->expr; + } + + } else { + right_next = next_original; + break; + } + right_str = rrex3->str; + success_next_once = true; + } else { + // No match Right. + success_next = false; + } + //} + if (success_next_once && !success_next) { + // Matched previous time but now doesn't. + break; + } + // Check if left matches. + rrex3->str = original_str; + rrex3->expr = loop_expr; + rrex3->valid = true; + if (!rrex3_move(rrex3, false)) { + // No match left. + success_current = false; + } else { + // Match left. + success_current = true; + // NOT SURE< WITHOUT DOET HETZELFDE: + // original_str = rrex3->str; + if (!success_next) { + right_str = rrex3->str; + if (*rrex3->expr != ')') { + right_next = rrex3->expr + 1; // +1 is the * itself + + } else { + + // break; + } + } + } + + if ((success_next && !success_current) || + (!success_next && !success_current)) { + break; + } + } + rrex3->expr = right_next; + rrex3->str = right_str; + rrex3->valid = true; +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_roof(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); +#if RREX3_DEBUG == 1 + printf("expr, *rrex3->str, rrex3->valid); +#endif + rrex3->valid = rrex3->str == rrex3->_str; + rrex3->match_from_start = true; + rrex3->expr++; +} +inline static void rrex3_cmp_dollar(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); +#if RREX3_DEBUG == 1 + printf("Dollar check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (*rrex3->str || !rrex3->valid) { + rrex3->valid = false; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_w(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("Word check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (isalpha(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} +inline static void rrex3_cmp_w_upper(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("!Word check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (!isalpha(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_d(rrex3_t *rrex3) { + + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("Digit check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (isdigit(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} +inline static void rrex3_cmp_d_upper(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("!Digit check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (!isdigit(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_slash(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; + + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->slash_functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); +} + +inline static int collect_digits(rrex3_t *rrex3) { + char output[20]; + unsigned int digit_count = 0; + while (isdigit(*rrex3->expr)) { + + output[digit_count] = *rrex3->expr; + rrex3->expr++; + digit_count++; + } + output[digit_count] = 0; + return atoi(output); +} + +inline static void rrex3_cmp_range(rrex3_t *rrex3) { + char *loop_code = rrex3->previous.expr; + char *expr_original = rrex3->expr; + rrex3->expr++; + int range_start = collect_digits(rrex3) - 1; + int range_end = 0; + if (*rrex3->expr == ',') { + rrex3->expr++; + range_end = collect_digits(rrex3); + } + rrex3->expr++; + int times_valid = 0; + while (*rrex3->str) { + rrex3->expr = loop_code; + rrex3_move(rrex3, false); + if (rrex3->valid == false) { + break; + } else { + times_valid++; + } + if (range_end) { + if (times_valid >= range_start && times_valid == range_end - 1) { + rrex3->valid = true; + } else { + rrex3->valid = false; + } + break; + } else if (range_start) { + if (times_valid == range_start) { + rrex3->valid = true; + break; + } + } + } + rrex3->valid = times_valid >= range_start; + if (rrex3->valid && range_end) { + rrex3->valid = times_valid <= range_end; + } + rrex3->expr = strchr(expr_original, '}') + 1; +} + +inline static void rrex3_cmp_word_start_or_end(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + if (*rrex3->expr != 'B') { + printf("Check word start or end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); + } + +#endif + rrex3_set_previous(rrex3); + bool valid = false; + if (isalpha(*rrex3->str)) { + if (rrex3->_str != rrex3->str) { + if (!isalpha(*(rrex3->str - 1))) { + valid = true; + } + } else { + valid = true; + } + } else if (isalpha(isalpha(*rrex3->str) && !isalpha(*rrex3->str + 1))) { + valid = true; + } + rrex3->expr++; + rrex3->valid = valid; +} +inline static void rrex3_cmp_word_not_start_or_end(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Check word NOT start or end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); + +#endif + rrex3_set_previous(rrex3); + + rrex3_cmp_word_start_or_end(rrex3); + rrex3->valid = !rrex3->valid; +} + +inline static void rrex3_cmp_brackets(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintb("\\l Brackets start: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + char *original_expr = rrex3->expr; + rrex3->expr++; + rrex3->inside_brackets = true; + bool valid_once = false; + bool reversed = false; + if (*rrex3->expr == '^') { + reversed = true; + rrex3->expr++; + } + bool valid = false; + while (*rrex3->expr != ']' && *rrex3->expr != 0) { + rrex3->valid = true; + valid = rrex3_move(rrex3, false); + if (reversed) { + valid = !valid; + } + if (valid) { + valid_once = true; + if (!reversed) { + valid_once = true; + break; + } + } else { + if (reversed) { + valid_once = false; + break; + } + } + } + if (valid_once && reversed) { + rrex3->str++; + } + while (*rrex3->expr != ']' && *rrex3->expr != 0) + rrex3->expr++; + if (*rrex3->expr != 0) + rrex3->expr++; + + rrex3->valid = valid_once; + rrex3->inside_brackets = false; + char *previous_expr = rrex3->expr; + rrex3->expr = original_expr; + rrex3_set_previous(rrex3); + rrex3->expr = previous_expr; +#if RREX3_DEBUG == 1 + rprintb("\\l Brackets end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_pipe(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + +#if RREX3_DEBUG == 1 + printf("Pipe check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (rrex3->valid == true) { + rrex3->exit = true; + } else { + rrex3->valid = true; + } + rrex3->expr++; +} +inline static void rrex3_cmp_parentheses(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprinty("\\l Parentheses start check: %c:%c:%d\n", *rrex3->expr, + *rrex3->str, rrex3->valid); +#endif + + rrex3_set_previous(rrex3); + if (!rrex3->valid) { + rrex3->expr++; + return; + } + if (rrex3->match_count == rrex3->match_capacity) { + + rrex3->match_capacity++; + rrex3->matches = (char **)realloc( + rrex3->matches, rrex3->match_capacity * sizeof(char *)); + } + rrex3->matches[rrex3->match_count] = (char *)malloc(strlen(rrex3->str) + 1); + strcpy(rrex3->matches[rrex3->match_count], rrex3->str); + char *original_expr = rrex3->expr; + char *original_str = rrex3->str; + rrex3->expr++; + rrex3->inside_parentheses = true; + while (*rrex3->expr != ')' && !rrex3->exit) { + rrex3_move(rrex3, false); + } + while (*rrex3->expr != ')') { + rrex3->expr++; + } + rrex3->expr++; + rrex3->inside_parentheses = false; + + char *previous_expr = rrex3->expr; + rrex3->expr = original_expr; + rrex3_set_previous(rrex3); + rrex3->expr = previous_expr; + if (rrex3->valid == false) { + rrex3->str = original_str; + free(rrex3->matches[rrex3->match_count]); + } else { + rrex3->matches[rrex3->match_count] + [strlen(rrex3->matches[rrex3->match_count]) - + strlen(rrex3->str)] = 0; + rrex3->match_count++; + } +#if RREX3_DEBUG == 1 + rprinty("\\l Parentheses end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_reset(rrex3_t *rrex3) { + rrex3_free_matches(rrex3); + rrex3->valid = true; + rrex3->pattern_error = false; + rrex3->inside_brackets = false; + rrex3->inside_parentheses = false; + rrex3->exit = false; + rrex3->previous.expr = NULL; + rrex3->previous.str = NULL; + rrex3->previous.bytecode = 0; + rrex3->failed.expr = NULL; + rrex3->failed.str = NULL; + rrex3->failed.bytecode = 0; + rrex3->match_from_start = false; +} + +void rrex3_init(rrex3_t *rrex3) { + for (__uint8_t i = 0; i < 254; i++) { + rrex3->functions[i] = rrex3_cmp_literal; + rrex3->slash_functions[i] = rrex3_cmp_literal; + } + rrex3->functions['?'] = rrex3_cmp_question_mark; + rrex3->functions['^'] = rrex3_cmp_roof; + rrex3->functions['$'] = rrex3_cmp_dollar; + rrex3->functions['.'] = rrex3_cmp_dot; + rrex3->functions['*'] = rrex3_cmp_asterisk; + rrex3->functions['+'] = rrex3_cmp_plus; + rrex3->functions['|'] = rrex3_cmp_pipe; + rrex3->functions['\\'] = rrex3_cmp_slash; + rrex3->functions['{'] = rrex3_cmp_range; + rrex3->functions['['] = rrex3_cmp_brackets; + rrex3->functions['('] = rrex3_cmp_parentheses; + rrex3->slash_functions['w'] = rrex3_cmp_w; + rrex3->slash_functions['W'] = rrex3_cmp_w_upper; + rrex3->slash_functions['d'] = rrex3_cmp_d; + rrex3->slash_functions['D'] = rrex3_cmp_d_upper; + rrex3->slash_functions['s'] = rrex3_cmp_whitespace; + rrex3->slash_functions['S'] = rrex3_cmp_whitespace_upper; + rrex3->slash_functions['b'] = rrex3_cmp_word_start_or_end; + rrex3->slash_functions['B'] = rrex3_cmp_word_not_start_or_end; + rrex3->match_count = 0; + rrex3->match_capacity = 0; + rrex3->matches = NULL; + rrex3->compiled = NULL; + + rrex3_reset(rrex3); +} + +rrex3_t *rrex3_new() { + rrex3_t *rrex3 = (rrex3_t *)malloc(sizeof(rrex3_t)); + + rrex3_init(rrex3); + + return rrex3; +} + +rrex3_t *rrex3_compile(rrex3_t *rrex, char *expr) { + + rrex3_t *rrex3 = rrex ? rrex : rrex3_new(); + + char *compiled = (char *)malloc(strlen(expr) + 1); + unsigned int count = 0; + while (*expr) { + if (*expr == '[' && *(expr + 2) == ']') { + *compiled = *(expr + 1); + expr++; + expr++; + } else if (*expr == '[' && *(expr + 1) == '0' && *(expr + 2) == '-' && + *(expr + 3) == '9' && *(expr + 4) == ']') { + *compiled = '\\'; + compiled++; + *compiled = 'd'; + count++; + expr++; + expr++; + expr++; + expr++; + } else { + *compiled = *expr; + } + if (*compiled == '[') { + // in_brackets = true; + + } else if (*compiled == ']') { + // in_brackets = false; + } + expr++; + compiled++; + count++; + } + *compiled = 0; + compiled -= count; + rrex3->compiled = compiled; + return rrex3; +} + +inline static void rrex3_set_previous(rrex3_t *rrex3) { + rrex3->previous.function = rrex3->function; + rrex3->previous.expr = rrex3->expr; + rrex3->previous.str = rrex3->str; + rrex3->previous.bytecode = *rrex3->expr; +} + +static bool rrex3_move(rrex3_t *rrex3, bool resume_on_fail) { + char *original_expr = rrex3->expr; + char *original_str = rrex3->str; + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); + if (!*rrex3->expr && !*rrex3->str) { + rrex3->exit = true; + return rrex3->valid; + } else if (!*rrex3->expr) { + // rrex3->valid = true; + return rrex3->valid; + } + if (rrex3->pattern_error) { + rrex3->valid = false; + return rrex3->valid; + } + if (resume_on_fail && !rrex3->valid && *rrex3->expr) { + + // rrex3_set_previous(rrex3); + rrex3->failed.bytecode = rrex3->bytecode; + rrex3->failed.function = rrex3->function; + rrex3->failed.expr = original_expr; + rrex3->failed.str = original_str; + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); + + if (!rrex3->valid && !rrex3->pattern_error) { + + if (*rrex3->str) { + char *pipe_position = strstr(rrex3->expr, "|"); + if (pipe_position != NULL) { + rrex3->expr = pipe_position + 1; + rrex3->str = rrex3->_str; + rrex3->valid = true; + return true; + } + } + if (rrex3->match_from_start) { + rrex3->valid = false; + return rrex3->valid; + } + if (!*rrex3->str++) { + rrex3->valid = false; + return rrex3->valid; + } + rrex3->expr = rrex3->_expr; + if (*rrex3->str) + rrex3->valid = true; + } + } else { + } + return rrex3->valid; +} + +rrex3_t *rrex3(rrex3_t *rrex3, char *str, char *expr) { +#if RREX3_DEBUG == 1 + printf("Regex check: %s:%s:%d\n", expr, str, 1); +#endif + bool self_initialized = false; + if (rrex3 == NULL) { + self_initialized = true; + rrex3 = rrex3_new(); + } else { + rrex3_reset(rrex3); + } + + rrex3->_str = str; + rrex3->_expr = rrex3->compiled ? rrex3->compiled : expr; + rrex3->str = rrex3->_str; + rrex3->expr = rrex3->_expr; + while (*rrex3->expr && !rrex3->exit) { + if (!rrex3_move(rrex3, true)) + return NULL; + } + rrex3->expr = rrex3->_expr; + if (rrex3->valid) { + + return rrex3; + } else { + if (self_initialized) { + rrex3_free(rrex3); + } + return NULL; + } +} + +void rrex3_test() { + rrex3_t *rrex = rrex3_new(); + + assert(rrex3(rrex, "#define abc ", "#define *(\\w.*)\n$")); + + exit(0); + + assert(rrex3(rrex, "\"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.*)\"\"(.*)\"\"(.*)\"")); + + assert(rrex3(rrex, "aaaaaaa", "a*a$")); + + // assert(rrex3("ababa", "a*b*a*b*a$")); + assert(rrex3(rrex, "#include\"test.h\"a", "#include.*\".*\"a$")); + assert(rrex3(rrex, "#include \"test.h\"a", "#include.*\".*\"a$")); + assert(rrex3(rrex, "aaaaaad", "a*d$")); + assert(rrex3(rrex, "abcdef", "abd?cdef")); + assert(!rrex3(rrex, "abcdef", "abd?def")); + assert(rrex3(rrex, "abcdef", "def")); + assert(!rrex3(rrex, "abcdef", "^def")); + assert(rrex3(rrex, "abcdef", "def$")); + assert(!rrex3(rrex, "abcdef", "^abc$")); + assert(rrex3(rrex, "aB!.#1", "......")); + assert(!rrex3(rrex, "aB!.#\n", " ......")); + assert(!rrex3(rrex, "aaaaaad", "q+d$")); + assert(rrex3(rrex, "aaaaaaa", "a+a$")); + assert(rrex3(rrex, "aaaaaad", "q*d$")); + assert(!rrex3(rrex, "aaaaaad", "^q*d$")); + + // Asterisk function + assert(rrex3(rrex, "123321", "123*321")); + assert(rrex3(rrex, "pony", "p*ony")); + assert(rrex3(rrex, "pppony", "p*ony")); + assert(rrex3(rrex, "ppony", "p*pony")); + assert(rrex3(rrex, "pppony", "pp*pony")); + assert(rrex3(rrex, "pppony", ".*pony")); + assert(rrex3(rrex, "pony", ".*ony")); + assert(rrex3(rrex, "pony", "po*ny")); + // assert(rrex3(rrex,"ppppony", "p*pppony")); + + // Plus function + assert(rrex3(rrex, "pony", "p+ony")); + assert(!rrex3(rrex, "ony", "p+ony")); + assert(rrex3(rrex, "ppony", "p+pony")); + assert(rrex3(rrex, "pppony", "pp+pony")); + assert(rrex3(rrex, "pppony", ".+pony")); + assert(rrex3(rrex, "pony", ".+ony")); + assert(rrex3(rrex, "pony", "po+ny")); + + // Slash functions + assert(rrex3(rrex, "a", "\\w")); + assert(!rrex3(rrex, "1", "\\w")); + assert(rrex3(rrex, "1", "\\W")); + assert(!rrex3(rrex, "a", "\\W")); + assert(rrex3(rrex, "a", "\\S")); + assert(!rrex3(rrex, " ", "\\s")); + assert(!rrex3(rrex, "\t", "\\s")); + assert(!rrex3(rrex, "\n", "\\s")); + assert(rrex3(rrex, "1", "\\d")); + assert(!rrex3(rrex, "a", "\\d")); + assert(rrex3(rrex, "a", "\\D")); + assert(!rrex3(rrex, "1", "\\D")); + assert(rrex3(rrex, "abc", "\\b")); + + assert(rrex3(rrex, "abc", "\\babc")); + assert(!rrex3(rrex, "abc", "a\\b")); + assert(!rrex3(rrex, "abc", "ab\\b")); + assert(!rrex3(rrex, "abc", "abc\\b")); + assert(rrex3(rrex, "abc", "a\\Bbc")); + assert(rrex3(rrex, "abc", "ab\\B")); + assert(!rrex3(rrex, "1ab", "1\\Bab")); + assert(rrex3(rrex, "abc", "a\\Bbc")); + + // Escaping of special chars + assert(rrex3(rrex, "()+*.\\", "\\(\\)\\+\\*\\.\\\\")); + + // Pipe + // assert(rrex3(rrex,"abc","abc|def")); + assert(rrex3(rrex, "abc", "def|jkl|abc")); + assert(rrex3(rrex, "abc", "abc|def")); + + assert(rrex3(rrex, "rhq", "def|rhq|rha")); + assert(rrex3(rrex, "abc", "abc|def")); + + // Repeat + assert(rrex3(rrex, "aaaaa", "a{4}")); + + assert(rrex3(rrex, "aaaa", "a{1,3}a")); + + // Range + assert(rrex3(rrex, "abc", "[abc][abc][abc]$")); + assert(rrex3(rrex, "def", "[^abc][^abc][^abc]$")); + assert(rrex3(rrex, "defabc", "[^abc][^abc][^abc]abc")); + assert(rrex3(rrex, "0-9", "0-9")); + assert(rrex3(rrex, "55-9", "[^6-9]5-9$")); + assert(rrex3(rrex, "a", "[a-z]$")); + assert(rrex3(rrex, "A", "[A-Z]$")); + assert(rrex3(rrex, "5", "[0-9]$")); + assert(!rrex3(rrex, "a", "[^a-z]$")); + assert(!rrex3(rrex, "A", "[^A-Z]$")); + assert(!rrex3(rrex, "5", "[^0-9]$")); + assert(rrex3(rrex, "123abc", "[0-9]*abc$")); + assert(rrex3(rrex, "123123", "[0-9]*$")); + + // Parentheses + + assert(rrex3(rrex, "datadata", "(data)*")); + + assert(rrex3(rrex, "datadatapony", "(data)*pony$")); + + assert(!rrex3(rrex, "datadatapony", "(d*p*ata)*pond$")); + assert(rrex3(rrex, "datadatadato", "(d*p*ata)*dato")); + assert(rrex3(rrex, "datadatadato", "(d*p*ata)*dato$")); + assert(!rrex3(rrex, "datadatadato", "(d*p*a*ta)*gato$")); + + // Matches + assert(rrex3(rrex, "123", "(123)")); + assert(!strcmp(rrex->matches[0], "123")); + + assert(rrex3(rrex, "123321a", "(123)([0-4][2]1)a$")); + assert(!strcmp(rrex->matches[1], "321")); + + assert(rrex3(rrex, "123321a", "(123)([0-4][2]1)a$")); + assert(!strcmp(rrex->matches[1], "321")); + + assert(rrex3(rrex, "aaaabc", "(.*)c")); + + assert(rrex3(rrex, "abcde", ".....$")); + + assert(rrex3(rrex, "abcdefghijklmnopqrstuvwxyz", + "..........................$")); + // printf("(%d)\n", rrex->valid); + + assert(rrex3(rrex, "#include ", "#include.*<(.*)>")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(rrex3(rrex, "#include \"stdlib.h\"", "#include.\"(.*)\"")); + assert(!strcmp(rrex->matches[0], "stdlib.h")); + assert(rrex3(rrex, "\"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.*)\"\"(.*)\"\"(.*)\"")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(!strcmp(rrex->matches[1], "string.h")); + assert(!strcmp(rrex->matches[2], "sys/time.h")); + + assert(rrex3(rrex, " #include ", "#include.+<(.+)>")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(rrex3(rrex, " #include \"stdlib.h\"", "#include.+\"(.+)\"")); + assert(!strcmp(rrex->matches[0], "stdlib.h")); + + assert(rrex3(rrex, " \"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.+)\"\"(.+)\"\"(.+)\"")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(!strcmp(rrex->matches[1], "string.h")); + assert(!strcmp(rrex->matches[2], "sys/time.h")); + + assert(rrex3(rrex, "int abc ", "int (.*)[; ]?$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "int abc;", "int (.*)[; ]?$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "int abc", "int (.*)[; ]?$")); + assert(!strcmp(rrex->matches[0], "abc")); + + assert(rrex3(rrex, "#define abc", "#define (.*)")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "#define abc", "#define (.*)$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(rrex3(rrex, "#define abc 1", "#define (.*) (.*)$")); + assert(!strcmp(rrex->matches[0], "abc")); + assert(!strcmp(rrex->matches[1], "1")); + + assert(rrex3(rrex, "#define abc 1 ", "#define (.*) (.*) *$")); + assert(!strcmp(rrex->matches[0], "abc")); + printf("<<%s>>\n", rrex->matches[1]); + assert(!strcmp(rrex->matches[1], "1")); + + assert(rrex3(rrex, "#define abc \"test with spaces\" ", + "#define (.*) *\"(.*)\" *$")); + assert(!strcmp(rrex->matches[0], "abc")); + printf("<<%s>>\n", rrex->matches[1]); + assert(!strcmp(rrex->matches[1], "test with spaces")); + + rrex3_free(rrex); +} +#endif +// RETOOR - Sep 3 2024 +// MIT License +// =========== + +// Copyright (c) 2024 Retoor + +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: + +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. + +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +#ifndef RLIB_H +#define RLIB_H +// BEGIN OF RLIB +#ifndef RPRINT_H +#define RPRINT_H + +#ifndef RLIB_TIME +#define RLIB_TIME + +#include +#include +#include +#include +#include +#include + +#ifndef CLOCK_MONOTONIC +#define CLOCK_MONOTONIC 1 +#endif + +typedef unsigned long long msecs_t; +typedef uint64_t nsecs_t; + +nsecs_t nsecs() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (int64_t)ts.tv_sec * 1000000000LL + (int64_t)ts.tv_nsec; +} + +msecs_t rnsecs_to_msecs(nsecs_t nsecs) { return nsecs / 1000 / 1000; } + +nsecs_t rmsecs_to_nsecs(msecs_t msecs) { return msecs * 1000 * 1000; } + +msecs_t usecs() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (long long)(tv.tv_sec) * 1000000 + (long long)(tv.tv_usec); +} + +msecs_t msecs() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (long long)(tv.tv_sec) * 1000 + (tv.tv_usec / 1000); +} +char *msecs_strs(msecs_t ms) { + static char str[22]; + str[0] = 0; + sprintf(str, "%f", ms * 0.001); + for (int i = strlen(str); i > 0; i--) { + if (str[i] > '0') + break; + str[i] = 0; + } + return str; +} +char *msecs_strms(msecs_t ms) { + static char str[22]; + str[0] = 0; + sprintf(str, "%lld", ms); + return str; +} +char *msecs_str(long long ms) { + static char result[30]; + result[0] = 0; + if (ms > 999) { + char *s = msecs_strs(ms); + sprintf(result, "%ss", s); + } else { + char *s = msecs_strms(ms); + sprintf(result, "%sMs", s); + } + return result; +} + +void nsleep(nsecs_t nanoseconds) { + long seconds = 0; + int factor = 0; + while (nanoseconds > 1000000000) { + factor++; + nanoseconds = nanoseconds / 10; + } + if (factor) { + seconds = 1; + factor--; + while (factor) { + seconds = seconds * 10; + factor--; + } + } + + struct timespec req = {seconds, nanoseconds}; + struct timespec rem; + + if (nanosleep(&req, &rem) == -1) { + if (errno == EINTR) { + printf("Sleep was interrupted. Remaining time: %ld.%09ld seconds\n", + rem.tv_sec, rem.tv_nsec); + } else { + perror("nanosleep"); + } + } else { + // printf("Slept for %ld.%09ld seconds\n", req.tv_sec, req.tv_nsec); + } +} + +void ssleep(double s) { + long nanoseconds = (long)(1000000000 * s); + + long seconds = 0; + + struct timespec req = {seconds, nanoseconds}; + struct timespec rem; + + if (nanosleep(&req, &rem) == -1) { + if (errno == EINTR) { + printf("Sleep was interrupted. Remaining time: %ld.%09ld seconds\n", + rem.tv_sec, rem.tv_nsec); + } else { + perror("nanosleep"); + } + } else { + // printf("Slept for %ld.%09ld seconds\n", req.tv_sec, req.tv_nsec); + } +} +void msleep(long miliseonds) { + long nanoseconds = miliseonds * 1000000; + nsleep(nanoseconds); +} + +char *format_time(int64_t nanoseconds) { + static char output[1024]; + size_t output_size = sizeof(output); + output[0] = 0; + if (nanoseconds < 1000) { + // Less than 1 microsecond + snprintf(output, output_size, "%ldns", nanoseconds); + } else if (nanoseconds < 1000000) { + // Less than 1 millisecond + double us = nanoseconds / 1000.0; + snprintf(output, output_size, "%.2fµs", us); + } else if (nanoseconds < 1000000000) { + // Less than 1 second + double ms = nanoseconds / 1000000.0; + snprintf(output, output_size, "%.2fms", ms); + } else { + // 1 second or more + double s = nanoseconds / 1000000000.0; + snprintf(output, output_size, "%.2fs", s); + } + return output; +} + +#endif +#include +#include +#include +#include +#include + +long rpline_number = 0; +nsecs_t rprtime = 0; + +int8_t _env_rdisable_colors = -1; +bool _rprint_enable_colors = true; + +bool rprint_is_color_enabled() { + if (_env_rdisable_colors == -1) { + _env_rdisable_colors = getenv("RDISABLE_COLORS") != NULL; + } + if (_env_rdisable_colors) { + _rprint_enable_colors = false; + } + return _rprint_enable_colors; +} + +void rprint_disable_colors() { _rprint_enable_colors = false; } +void rprint_enable_colors() { _rprint_enable_colors = true; } +void rprint_toggle_colors() { _rprint_enable_colors = !_rprint_enable_colors; } + +void rclear() { printf("\033[2J"); } + +void rprintpf(FILE *f, const char *prefix, const char *format, va_list args) { + char *pprefix = (char *)prefix; + char *pformat = (char *)format; + bool reset_color = false; + bool press_any_key = false; + char new_format[4096]; + bool enable_color = rprint_is_color_enabled(); + memset(new_format, 0, 4096); + int new_format_length = 0; + char temp[1000]; + memset(temp, 0, 1000); + if (enable_color && pprefix[0]) { + strcat(new_format, pprefix); + new_format_length += strlen(pprefix); + reset_color = true; + } + while (true) { + if (pformat[0] == '\\' && pformat[1] == 'i') { + strcat(new_format, "\e[3m"); + new_format_length += strlen("\e[3m"); + reset_color = true; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'u') { + strcat(new_format, "\e[4m"); + new_format_length += strlen("\e[4m"); + reset_color = true; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'b') { + strcat(new_format, "\e[1m"); + new_format_length += strlen("\e[1m"); + reset_color = true; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'C') { + press_any_key = true; + rpline_number++; + pformat++; + pformat++; + reset_color = false; + } else if (pformat[0] == '\\' && pformat[1] == 'k') { + press_any_key = true; + rpline_number++; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'c') { + rpline_number++; + strcat(new_format, "\e[2J\e[H"); + new_format_length += strlen("\e[2J\e[H"); + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'L') { + rpline_number++; + temp[0] = 0; + sprintf(temp, "%ld", rpline_number); + strcat(new_format, temp); + new_format_length += strlen(temp); + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'l') { + rpline_number++; + temp[0] = 0; + sprintf(temp, "%.5ld", rpline_number); + strcat(new_format, temp); + new_format_length += strlen(temp); + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 'T') { + nsecs_t nsecs_now = nsecs(); + nsecs_t end = rprtime ? nsecs_now - rprtime : 0; + temp[0] = 0; + sprintf(temp, "%s", format_time(end)); + strcat(new_format, temp); + new_format_length += strlen(temp); + rprtime = nsecs_now; + pformat++; + pformat++; + } else if (pformat[0] == '\\' && pformat[1] == 't') { + rprtime = nsecs(); + pformat++; + pformat++; + } else { + new_format[new_format_length] = *pformat; + new_format_length++; + if (!*pformat) + break; + + // printf("%c",*pformat); + pformat++; + } + } + if (reset_color) { + strcat(new_format, "\e[0m"); + new_format_length += strlen("\e[0m"); + } + + new_format[new_format_length] = 0; + vfprintf(f, new_format, args); + + fflush(stdout); + if (press_any_key) { + nsecs_t s = nsecs(); + fgetc(stdin); + rprtime += nsecs() - s; + } +} + +void rprintp(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "", format, args); + va_end(args); +} + +void rprintf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "", format, args); + va_end(args); +} +void rprint(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "", format, args); + va_end(args); +} +#define printf rprint + +// Print line +void rprintlf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\\l", format, args); + va_end(args); +} +void rprintl(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\\l", format, args); + va_end(args); +} + +// Black +void rprintkf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[30m", format, args); + va_end(args); +} +void rprintk(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[30m", format, args); + va_end(args); +} + +// Red +void rprintrf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[31m", format, args); + va_end(args); +} +void rprintr(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[31m", format, args); + va_end(args); +} + +// Green +void rprintgf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[32m", format, args); + va_end(args); +} +void rprintg(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[32m", format, args); + va_end(args); +} + +// Yellow +void rprintyf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[33m", format, args); + va_end(args); +} +void rprinty(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[33m", format, args); + va_end(args); +} + +// Blue +void rprintbf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[34m", format, args); + va_end(args); +} + +void rprintb(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[34m", format, args); + va_end(args); +} + +// Magenta +void rprintmf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[35m", format, args); + va_end(args); +} +void rprintm(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[35m", format, args); + va_end(args); +} + +// Cyan +void rprintcf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[36m", format, args); + va_end(args); +} +void rprintc(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[36m", format, args); + va_end(args); +} + +// White +void rprintwf(FILE *f, char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(f, "\e[37m", format, args); + va_end(args); +} +void rprintw(char *format, ...) { + va_list args; + va_start(args, format); + rprintpf(stdout, "\e[37m", format, args); + va_end(args); +} +#endif +#ifndef RMATH_H +#define RMATH_H +#include + +#ifndef ceil +double ceil(double x) { + if (x == (double)(long long)x) { + return x; + } else if (x > 0.0) { + return (double)(long long)x + 1.0; + } else { + return (double)(long long)x; + } +} +#endif + +#ifndef floor +double floor(double x) { + if (x >= 0.0) { + return (double)(long long)x; + } else { + double result = (double)(long long)x; + return (result == x) ? result : result - 1.0; + } +} +#endif + +#ifndef modf +double modf(double x, double *iptr) { + double int_part = (x >= 0.0) ? floor(x) : ceil(x); + *iptr = int_part; + return x - int_part; +} +#endif +#endif +#ifndef RMALLOC_H +#define RMALLOC_H +#include +#include +#include + +unsigned long long rmalloc_count = 0; +unsigned long long rmalloc_alloc_count = 0; +unsigned long long int rmalloc_free_count = 0; + +void *rmalloc(size_t size) { + rmalloc_count++; + rmalloc_alloc_count++; + return malloc(size); +} +void *rrealloc(void *obj, size_t size) { + if (obj == NULL) { + rmalloc_count++; + rmalloc_alloc_count++; + } + return realloc(obj, size); +} +void *rfree(void *obj) { + rmalloc_count--; + rmalloc_free_count++; + free(obj); + return NULL; +} + +#define malloc rmalloc +#define realloc rrealloc +#define free rfree + +char *rmalloc_stats() { + static char res[100] = {0}; + sprintf(res, "Memory usage: %lld allocated, %lld freed, %lld in use.", + rmalloc_alloc_count, rmalloc_free_count, rmalloc_count); + return res; +} + +char *rstrdup(char *str) { + + char *res = (char *)strdup(str); + rmalloc_alloc_count++; + rmalloc_count++; + return res; +} + +#endif + +#ifndef RTEST_H +#define RTEST_H +#include +#include +#include +#define debug(fmt, ...) printf("%s:%d: " fmt, __FILE__, __LINE__, __VA_ARGS__); + +char *rcurrent_banner; +int rassert_count = 0; +unsigned short rtest_is_first = 1; +unsigned int rtest_fail_count = 0; + +int rtest_end(char *content) { + // Returns application exit code. 0 == success + printf("%s", content); + printf("\n@assertions: %d\n", rassert_count); + printf("@memory: %s\n", rmalloc_stats()); + + if (rmalloc_count != 0) { + printf("MEMORY ERROR\n"); + return rtest_fail_count > 0; + } + return rtest_fail_count > 0; +} + +void rtest_test_banner(char *content, char *file) { + if (rtest_is_first == 1) { + char delimiter[] = "."; + char *d = delimiter; + char f[2048]; + strcpy(f, file); + printf("%s tests", strtok(f, d)); + rtest_is_first = 0; + setvbuf(stdout, NULL, _IONBF, 0); + } + printf("\n - %s ", content); +} + +bool rtest_test_true_silent(char *expr, int res, int line) { + rassert_count++; + if (res) { + return true; + } + rprintrf(stderr, "\nERROR on line %d: %s", line, expr); + rtest_fail_count++; + return false; +} + +bool rtest_test_true(char *expr, int res, int line) { + rassert_count++; + if (res) { + fprintf(stdout, "."); + return true; + } + rprintrf(stderr, "\nERROR on line %d: %s", line, expr); + rtest_fail_count++; + return false; +} +bool rtest_test_false_silent(char *expr, int res, int line) { + return rtest_test_true_silent(expr, !res, line); +} +bool rtest_test_false(char *expr, int res, int line) { + return rtest_test_true(expr, !res, line); +} +void rtest_test_skip(char *expr, int line) { + rprintgf(stderr, "\n @skip(%s) on line %d\n", expr, line); +} +bool rtest_test_assert(char *expr, int res, int line) { + if (rtest_test_true(expr, res, line)) { + return true; + } + rtest_end(""); + exit(40); +} + +#define rtest_banner(content) \ + rcurrent_banner = content; \ + rtest_test_banner(content, __FILE__); +#define rtest_true(expr) rtest_test_true(#expr, expr, __LINE__); +#define rtest_assert(expr) \ + { \ + int __valid = expr ? 1 : 0; \ + rtest_test_true(#expr, __valid, __LINE__); \ + }; \ + ; + +#define rassert(expr) \ + { \ + int __valid = expr ? 1 : 0; \ + rtest_test_true(#expr, __valid, __LINE__); \ + }; \ + ; +#define rtest_asserts(expr) \ + { \ + int __valid = expr ? 1 : 0; \ + rtest_test_true_silent(#expr, __valid, __LINE__); \ + }; +#define rasserts(expr) \ + { \ + int __valid = expr ? 1 : 0; \ + rtest_test_true_silent(#expr, __valid, __LINE__); \ + }; +#define rtest_false(expr) \ + rprintf(" [%s]\t%s\t\n", expr == 0 ? "OK" : "NOK", #expr); \ + assert_count++; \ + assert(#expr); +#define rtest_skip(expr) rtest_test_skip(#expr, __LINE__); + +FILE *rtest_create_file(char *path, char *content) { + FILE *fd = fopen(path, "wb"); + + char c; + int index = 0; + + while ((c = content[index]) != 0) { + fputc(c, fd); + index++; + } + fclose(fd); + fd = fopen(path, "rb"); + return fd; +} + +void rtest_delete_file(char *path) { unlink(path); } +#endif +#ifndef RREX3_H +#define RREX3_H +#include +#include +#include +#include +#include +#include +#include +#ifndef RREX3_DEBUG +#define RREX3_DEBUG 0 +#endif + +struct rrex3_t; + +typedef void (*rrex3_function)(struct rrex3_t *); + +typedef struct rrex3_t { + void (*functions[254])(struct rrex3_t *); + void (*slash_functions[254])(struct rrex3_t *); + bool valid; + int match_count; + int match_capacity; + char **matches; + bool exit; + char *__expr; + char *__str; + char *_expr; + char *_str; + char *expr; + char *str; + char *compiled; + bool inside_brackets; + bool inside_parentheses; + bool pattern_error; + bool match_from_start; + char bytecode; + rrex3_function function; + struct { + void (*function)(struct rrex3_t *); + char *expr; + char *str; + char bytecode; + } previous; + struct { + void (*function)(struct rrex3_t *); + char *expr; + char *str; + char bytecode; + } failed; +} rrex3_t; + +static bool isdigitrange(char *s) { + if (!isdigit(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isdigit(*(s + 2)); +} + +static bool isalpharange(char *s) { + if (!isalpha(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isalpha(*(s + 2)); +} + +void rrex3_free_matches(rrex3_t *rrex3) { + if (!rrex3->matches) + return; + for (int i = 0; i < rrex3->match_count; i++) { + free(rrex3->matches[i]); + } + free(rrex3->matches); + rrex3->matches = NULL; + rrex3->match_count = 0; + rrex3->match_capacity = 0; +} + +void rrex3_free(rrex3_t *rrex3) { + if (!rrex3) + return; + if (rrex3->compiled) { + free(rrex3->compiled); + rrex3->compiled = NULL; + } + rrex3_free_matches(rrex3); + free(rrex3); + rrex3 = NULL; +} +static bool rrex3_move(rrex3_t *, bool); +static void rrex3_set_previous(rrex3_t *); +inline static void rrex3_cmp_asterisk(rrex3_t *); +void rrex3_cmp_literal_range(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Range check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char start = *rrex3->expr; + rrex3->expr++; + rrex3->expr++; + char end = *rrex3->expr; + if (*rrex3->str >= start && *rrex3->str <= end) { + rrex3->str++; + rrex3->valid = true; + } else { + rrex3->valid = false; + } + rrex3->expr++; +} + +bool rrex3_is_function(char chr) { + if (chr == ']' || chr == ')' || chr == '\\' || chr == '?' || chr == '+' || + chr == '*') + return true; + return false; +} + +inline static void rrex3_cmp_literal(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + if (*rrex3->expr == 0 && !*rrex3->str) { + printf("ERROR, EMPTY CHECK"); + exit(1); + } + if (rrex3->valid == false) { + rrex3->expr++; + return; + } + if (rrex3->inside_brackets) { + if (isalpharange(rrex3->expr) || isdigitrange(rrex3->expr)) { + rrex3_cmp_literal_range(rrex3); + return; + } + } +#if RREX3_DEBUG == 1 + printf("Literal check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (*rrex3->expr == *rrex3->str) { + rrex3->expr++; + rrex3->str++; + rrex3->valid = true; + // if(*rrex3->expr &&rrex3->functions[(int)*rrex3->expr] == + // rrex3_cmp_literal && !rrex3->inside_brackets && + //! rrex3_is_function(*rrex3->expr)){ rrex3_cmp_literal(rrex3); + // if(rrex3->valid == false){ + // rrex3->expr--; + // rrex3->valid = true; + // } + // } + return; + } + rrex3->expr++; + rrex3->valid = false; +} + +inline static void rrex3_cmp_dot(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Dot check (any char): %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + rrex3->expr++; + if (!rrex3->valid) { + return; + } + if (*rrex3->str && *rrex3->str != '\n') { + rrex3->str++; + if (*rrex3->expr && *rrex3->expr == '.') { + rrex3_cmp_dot(rrex3); + return; + } /*else if(*rrex3->expr && (*rrex3->expr == '*' || *rrex3->expr == + '+')){ char * next = strchr(rrex3->str,*(rrex3->expr + 1)); char * + space = strchr(rrex3->str,'\n'); if(next && (!space || space > next)){ + rrex3->str = next; + } + }*/ + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_question_mark(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Question mark check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + if (rrex3->valid == false) + rrex3->valid = true; + rrex3->expr++; +} + +inline static void rrex3_cmp_whitespace(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Whitespace check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char c = *rrex3->expr; + rrex3->valid = c == ' ' || c == '\n' || c == '\t'; + if (rrex3->valid) { + rrex3->str++; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_whitespace_upper(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Non whitespace check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + char c = *rrex3->expr; + rrex3->valid = !(c == ' ' || c == '\n' || c == '\t'); + if (rrex3->valid) { + rrex3->str++; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_plus(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + printf("Plus check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + rrex3_set_previous(rrex3); + + if (rrex3->valid) { + rrex3->str--; + } else { + return; + } + char *original_expr = rrex3->expr; + char *next = original_expr + 1; + char *loop_expr = rrex3->previous.expr - 1; + if (*loop_expr == '+') { + rrex3->valid = false; + rrex3->pattern_error = true; + rrex3->expr++; + return; + } + bool success_next = false; + bool success_next_once = false; + bool success_current = false; + char *next_next = NULL; + char *next_str = rrex3->str; + while (*rrex3->str) { + // Check if next matches + char *original_str = rrex3->str; + rrex3->expr = next; + rrex3->valid = true; + if (rrex3_move(rrex3, false)) { + success_next = true; + next_next = rrex3->expr; + next_str = rrex3->str; + success_next_once = true; + } else { + success_next = false; + } + if (success_next_once && !success_next) { + break; + } + // Check if current matches + rrex3->str = original_str; + rrex3->expr = loop_expr; + rrex3->valid = true; + if (!*rrex3->str || !rrex3_move(rrex3, false)) { + success_current = false; + } else { + success_current = true; + if (!success_next) { + next_next = rrex3->expr + 1; // +1 is the * itself + next_str = rrex3->str; + } + } + if (success_next && !success_current) { + break; + } + } + if (!next_next) + rrex3->expr = next; + else { + rrex3->expr = next_next; + } + rrex3->str = next_str; + rrex3->valid = true; +} + +inline static void rrex3_cmp_asterisk(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintg("Asterisk start check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->valid = true; + rrex3->expr++; + return; + } + if (*rrex3->previous.expr == '*') { + // Support for ** + rrex3->valid = false; + // rrex3->pattern_error = true; + rrex3->expr++; + return; + } + rrex3->str = rrex3->previous.str; + ; + char *next = rrex3->expr + 1; + char *next_original = NULL; + if (*next == '*') { + next++; + } + if (*next == ')' && *(next + 1)) { + next_original = next; + next++; + } + char *loop_expr = rrex3->previous.expr; + bool success_next = false; + bool success_next_once = false; + bool success_current = false; + char *right_next = NULL; + char *right_str = rrex3->str; + while (*rrex3->str && *rrex3->expr && *rrex3->expr != ')') { + // Remember original_str because it's modified + // by checking right and should be restored + // for checking left so they're matching the + // same value. + char *original_str = rrex3->str; + // Check if right matches. + // if(*next != ')'){ + rrex3->expr = next; + rrex3->valid = true; + if (rrex3_move(rrex3, false)) { + // Match rright. + success_next = true; + if (!next_original) { + right_next = rrex3->expr; + } else { + right_next = next_original; + break; + } + right_str = rrex3->str; + success_next_once = true; + } else { + // No match Right. + success_next = false; + } + //} + if (success_next_once && !success_next) { + // Matched previous time but now doesn't. + break; + } + // Check if left matches. + rrex3->str = original_str; + rrex3->expr = loop_expr; + rrex3->valid = true; + if (!rrex3_move(rrex3, false)) { + // No match left. + success_current = false; + } else { + // Match left. + success_current = true; + // NOT SURE< WITHOUT DOET HETZELFDE: + // original_str = rrex3->str; + if (!success_next) { + right_str = rrex3->str; + if (*rrex3->expr != ')') { + right_next = rrex3->expr + 1; // +1 is the * itself + + } else { + + // break; + } + } + } + + if ((success_next && !success_current) || + (!success_next && !success_current)) { + break; + } + } + rrex3->expr = right_next; + rrex3->str = right_str; + rrex3->valid = true; +#if RREX3_DEBUG == 1 + rprintg("Asterisk end check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_roof(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); +#if RREX3_DEBUG == 1 + printf("expr, *rrex3->str, rrex3->valid); +#endif + rrex3->valid = rrex3->str == rrex3->_str; + rrex3->match_from_start = true; + rrex3->expr++; +} +inline static void rrex3_cmp_dollar(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + +#if RREX3_DEBUG == 1 + printf("Dollar check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (*rrex3->str || !rrex3->valid) { + rrex3->valid = false; + } + rrex3->expr++; +} + +inline static void rrex3_cmp_w(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("Word check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (isalpha(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} +inline static void rrex3_cmp_w_upper(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("!Word check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (!isalpha(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_d(rrex3_t *rrex3) { + + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("Digit check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (isdigit(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} +inline static void rrex3_cmp_d_upper(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; +#if RREX3_DEBUG == 1 + printf("!Digit check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (!isdigit(*rrex3->str)) { + rrex3->str++; + } else { + rrex3->valid = false; + } +} + +inline static void rrex3_cmp_slash(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3->expr++; + + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->slash_functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); +} + +inline static int collect_digits(rrex3_t *rrex3) { + char output[20]; + unsigned int digit_count = 0; + while (isdigit(*rrex3->expr)) { + + output[digit_count] = *rrex3->expr; + rrex3->expr++; + digit_count++; + } + output[digit_count] = 0; + return atoi(output); +} + +inline static void rrex3_cmp_range(rrex3_t *rrex3) { + char *loop_code = rrex3->previous.expr; + char *expr_original = rrex3->expr; + rrex3->expr++; + int range_start = collect_digits(rrex3) - 1; + int range_end = 0; + if (*rrex3->expr == ',') { + rrex3->expr++; + range_end = collect_digits(rrex3); + } + rrex3->expr++; + int times_valid = 0; + while (*rrex3->str) { + rrex3->expr = loop_code; + rrex3_move(rrex3, false); + if (rrex3->valid == false) { + break; + } else { + times_valid++; + } + if (range_end) { + if (times_valid >= range_start && times_valid == range_end - 1) { + rrex3->valid = true; + } else { + rrex3->valid = false; + } + break; + } else if (range_start) { + if (times_valid == range_start) { + rrex3->valid = true; + break; + } + } + } + rrex3->valid = times_valid >= range_start; + if (rrex3->valid && range_end) { + rrex3->valid = times_valid <= range_end; + } + rrex3->expr = strchr(expr_original, '}') + 1; +} + +inline static void rrex3_cmp_word_start_or_end(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + bool valid = false; + if (isalpha(*rrex3->str)) { + if (rrex3->_str != rrex3->str) { + if (!isalpha(*(rrex3->str - 1))) { + valid = true; + } + } else { + valid = true; + } + } else if (isalpha(isalpha(*rrex3->str) && !isalpha(*rrex3->str + 1))) { + valid = true; + } + rrex3->expr++; + rrex3->valid = valid; +} +inline static void rrex3_cmp_word_not_start_or_end(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + + rrex3_cmp_word_start_or_end(rrex3); + rrex3->valid = !rrex3->valid; +} + +inline static void rrex3_cmp_brackets(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprintb("\\l Brackets start: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif + rrex3_set_previous(rrex3); + char *original_expr = rrex3->expr; + rrex3->expr++; + rrex3->inside_brackets = true; + bool valid_once = false; + bool reversed = false; + if (*rrex3->expr == '^') { + reversed = true; + rrex3->expr++; + } + bool valid = false; + while (*rrex3->expr != ']' && *rrex3->expr != 0) { + rrex3->valid = true; + valid = rrex3_move(rrex3, false); + if (reversed) { + valid = !valid; + } + if (valid) { + valid_once = true; + if (!reversed) { + valid_once = true; + break; + } + } else { + if (reversed) { + valid_once = false; + break; + } + } + } + if (valid_once && reversed) { + rrex3->str++; + } + while (*rrex3->expr != ']' && *rrex3->expr != 0) + rrex3->expr++; + if (*rrex3->expr != 0) + rrex3->expr++; + + rrex3->valid = valid_once; + rrex3->inside_brackets = false; + char *previous_expr = rrex3->expr; + rrex3->expr = original_expr; + rrex3_set_previous(rrex3); + rrex3->expr = previous_expr; +#if RREX3_DEBUG == 1 + rprintb("\\l Brackets end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_cmp_pipe(rrex3_t *rrex3) { + rrex3_set_previous(rrex3); + +#if RREX3_DEBUG == 1 + printf("Pipe check: %c:%c:%d\n", *rrex3->expr, *rrex3->str, rrex3->valid); +#endif + if (rrex3->valid == true) { + rrex3->exit = true; + } else { + rrex3->valid = true; + } + rrex3->expr++; +} +inline static void rrex3_cmp_parentheses(rrex3_t *rrex3) { +#if RREX3_DEBUG == 1 + rprinty("\\l Parentheses start check: %c:%c:%d\n", *rrex3->expr, + *rrex3->str, rrex3->valid); +#endif + if (!rrex3->valid) { + rrex3->expr++; + return; + } + rrex3_set_previous(rrex3); + if (rrex3->match_count == rrex3->match_capacity) { + + rrex3->match_capacity++; + rrex3->matches = (char **)realloc( + rrex3->matches, rrex3->match_capacity * sizeof(char *)); + } + rrex3->matches[rrex3->match_count] = (char *)malloc(strlen(rrex3->str) + 1); + strcpy(rrex3->matches[rrex3->match_count], rrex3->str); + char *original_expr = rrex3->expr; + char *original_str = rrex3->str; + rrex3->expr++; + rrex3->inside_parentheses = true; + while (*rrex3->expr != ')' && !rrex3->exit) { + rrex3_move(rrex3, false); + } + while (*rrex3->expr != ')') { + rrex3->expr++; + } + rrex3->expr++; + rrex3->inside_parentheses = false; + + char *previous_expr = rrex3->expr; + rrex3->expr = original_expr; + rrex3_set_previous(rrex3); + rrex3->expr = previous_expr; + if (rrex3->valid == false) { + rrex3->str = original_str; + free(rrex3->matches[rrex3->match_count]); + } else { + rrex3->matches[rrex3->match_count] + [strlen(rrex3->matches[rrex3->match_count]) - + strlen(rrex3->str)] = 0; + rrex3->match_count++; + } +#if RREX3_DEBUG == 1 + rprinty("\\l Parentheses end: %c:%c:%d\n", *rrex3->expr, *rrex3->str, + rrex3->valid); +#endif +} + +inline static void rrex3_reset(rrex3_t *rrex3) { + rrex3_free_matches(rrex3); + rrex3->valid = true; + rrex3->pattern_error = false; + rrex3->inside_brackets = false; + rrex3->inside_parentheses = false; + rrex3->exit = false; + rrex3->previous.expr = NULL; + rrex3->previous.str = NULL; + rrex3->previous.bytecode = 0; + rrex3->failed.expr = NULL; + rrex3->failed.str = NULL; + rrex3->failed.bytecode = 0; + rrex3->match_from_start = false; +} + +void rrex3_init(rrex3_t *rrex3) { + for (__uint8_t i = 0; i < 254; i++) { + rrex3->functions[i] = rrex3_cmp_literal; + rrex3->slash_functions[i] = rrex3_cmp_literal; + } + rrex3->functions['?'] = rrex3_cmp_question_mark; + rrex3->functions['^'] = rrex3_cmp_roof; + rrex3->functions['$'] = rrex3_cmp_dollar; + rrex3->functions['.'] = rrex3_cmp_dot; + rrex3->functions['*'] = rrex3_cmp_asterisk; + rrex3->functions['+'] = rrex3_cmp_plus; + rrex3->functions['|'] = rrex3_cmp_pipe; + rrex3->functions['\\'] = rrex3_cmp_slash; + rrex3->functions['{'] = rrex3_cmp_range; + rrex3->functions['['] = rrex3_cmp_brackets; + rrex3->functions['('] = rrex3_cmp_parentheses; + rrex3->slash_functions['w'] = rrex3_cmp_w; + rrex3->slash_functions['W'] = rrex3_cmp_w_upper; + rrex3->slash_functions['d'] = rrex3_cmp_d; + rrex3->slash_functions['D'] = rrex3_cmp_d_upper; + rrex3->slash_functions['s'] = rrex3_cmp_whitespace; + rrex3->slash_functions['S'] = rrex3_cmp_whitespace_upper; + rrex3->slash_functions['b'] = rrex3_cmp_word_start_or_end; + rrex3->slash_functions['B'] = rrex3_cmp_word_not_start_or_end; + rrex3->match_count = 0; + rrex3->match_capacity = 0; + rrex3->matches = NULL; + rrex3->compiled = NULL; + + rrex3_reset(rrex3); +} + +rrex3_t *rrex3_new() { + rrex3_t *rrex3 = (rrex3_t *)malloc(sizeof(rrex3_t)); + + rrex3_init(rrex3); + + return rrex3; +} + +rrex3_t *rrex3_compile(rrex3_t *rrex, char *expr) { + + rrex3_t *rrex3 = rrex ? rrex : rrex3_new(); + + char *compiled = (char *)malloc(strlen(expr) + 1); + unsigned int count = 0; + while (*expr) { + if (*expr == '[' && *(expr + 2) == ']') { + *compiled = *(expr + 1); + expr++; + expr++; + } else if (*expr == '[' && *(expr + 1) == '0' && *(expr + 2) == '-' && + *(expr + 3) == '9' && *(expr + 4) == ']') { + *compiled = '\\'; + compiled++; + *compiled = 'd'; + count++; + expr++; + expr++; + expr++; + expr++; + } else { + *compiled = *expr; + } + if (*compiled == '[') { + // in_brackets = true; + + } else if (*compiled == ']') { + // in_brackets = false; + } + expr++; + compiled++; + count++; + } + *compiled = 0; + compiled -= count; + rrex3->compiled = compiled; + return rrex3; +} + +inline static void rrex3_set_previous(rrex3_t *rrex3) { + rrex3->previous.function = rrex3->function; + rrex3->previous.expr = rrex3->expr; + rrex3->previous.str = rrex3->str; + rrex3->previous.bytecode = *rrex3->expr; +} + +static bool rrex3_move(rrex3_t *rrex3, bool resume_on_fail) { + char *original_expr = rrex3->expr; + char *original_str = rrex3->str; + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); + if (!*rrex3->expr && !*rrex3->str) { + + rrex3->exit = true; + return rrex3->valid; + } + if (rrex3->pattern_error) { + rrex3->valid = false; + return rrex3->valid; + } + if (resume_on_fail && !rrex3->valid && *rrex3->expr) { + // rrex3_set_previous(rrex3); + rrex3->failed.bytecode = rrex3->bytecode; + rrex3->failed.function = rrex3->function; + rrex3->failed.expr = original_expr; + rrex3->failed.str = original_str; + rrex3->bytecode = *rrex3->expr; + rrex3->function = rrex3->functions[(int)rrex3->bytecode]; + rrex3->function(rrex3); + + if (!rrex3->valid && !rrex3->pattern_error) { + + if (*rrex3->str) { + char *pipe_position = strstr(rrex3->expr, "|"); + if (pipe_position != NULL) { + rrex3->expr = pipe_position + 1; + rrex3->str = rrex3->_str; + rrex3->valid = true; + return true; + } + } + if (rrex3->match_from_start) { + rrex3->valid = false; + return rrex3->valid; + } + if (!*rrex3->str++) { + rrex3->valid = false; + return rrex3->valid; + } + rrex3->expr = rrex3->_expr; + if (rrex3->str) + rrex3->valid = true; + } + } + return rrex3->valid; +} + +rrex3_t *rrex3(rrex3_t *rrex3, char *str, char *expr) { +#if RREX3_DEBUG == 1 + printf("Regex check: %s:%s:%d\n", expr, str, 1); +#endif + bool self_initialized = false; + if (rrex3 == NULL) { + self_initialized = true; + rrex3 = rrex3_new(); + } else { + rrex3_reset(rrex3); + } + + rrex3->_str = str; + rrex3->_expr = rrex3->compiled ? rrex3->compiled : expr; + rrex3->str = rrex3->_str; + rrex3->expr = rrex3->_expr; + while (*rrex3->expr && !rrex3->exit) { + if (!rrex3_move(rrex3, true)) + return NULL; + } + if (rrex3->valid) { + return rrex3; + } else { + if (self_initialized) { + rrex3_free(rrex3); + } + return NULL; + } +} + +void rrex3_test() { + rrex3_t *rrex = rrex3_new(); + + assert(rrex3(rrex, "aaaaaaa", "a*a$")); + + // assert(rrex3("ababa", "a*b*a*b*a$")); + assert(rrex3(rrex, "#include\"test.h\"a", "#include.*\".*\"a$")); + assert(rrex3(rrex, "#include \"test.h\"a", "#include.*\".*\"a$")); + assert(rrex3(rrex, "aaaaaad", "a*d$")); + assert(rrex3(rrex, "abcdef", "abd?cdef")); + assert(!rrex3(rrex, "abcdef", "abd?def")); + assert(rrex3(rrex, "abcdef", "def")); + assert(!rrex3(rrex, "abcdef", "^def")); + assert(rrex3(rrex, "abcdef", "def$")); + assert(!rrex3(rrex, "abcdef", "^abc$")); + assert(rrex3(rrex, "aB!.#1", "......")); + assert(!rrex3(rrex, "aB!.#\n", " ......")); + assert(!rrex3(rrex, "aaaaaad", "q+d$")); + assert(rrex3(rrex, "aaaaaaa", "a+a$")); + assert(rrex3(rrex, "aaaaaad", "q*d$")); + assert(!rrex3(rrex, "aaaaaad", "^q*d$")); + + // Asterisk function + assert(rrex3(rrex, "123321", "123*321")); + assert(rrex3(rrex, "pony", "p*ony")); + assert(rrex3(rrex, "pppony", "p*ony")); + assert(rrex3(rrex, "ppony", "p*pony")); + assert(rrex3(rrex, "pppony", "pp*pony")); + assert(rrex3(rrex, "pppony", ".*pony")); + assert(rrex3(rrex, "pony", ".*ony")); + assert(rrex3(rrex, "pony", "po*ny")); + // assert(rrex3(rrex,"ppppony", "p*pppony")); + + // Plus function + assert(rrex3(rrex, "pony", "p+ony")); + assert(!rrex3(rrex, "ony", "p+ony")); + assert(rrex3(rrex, "ppony", "p+pony")); + assert(rrex3(rrex, "pppony", "pp+pony")); + assert(rrex3(rrex, "pppony", ".+pony")); + assert(rrex3(rrex, "pony", ".+ony")); + assert(rrex3(rrex, "pony", "po+ny")); + + // Slash functions + assert(rrex3(rrex, "a", "\\w")); + assert(!rrex3(rrex, "1", "\\w")); + assert(rrex3(rrex, "1", "\\W")); + assert(!rrex3(rrex, "a", "\\W")); + assert(rrex3(rrex, "a", "\\S")); + assert(!rrex3(rrex, " ", "\\s")); + assert(!rrex3(rrex, "\t", "\\s")); + assert(!rrex3(rrex, "\n", "\\s")); + assert(rrex3(rrex, "1", "\\d")); + assert(!rrex3(rrex, "a", "\\d")); + assert(rrex3(rrex, "a", "\\D")); + assert(!rrex3(rrex, "1", "\\D")); + assert(rrex3(rrex, "abc", "\\b")); + + assert(rrex3(rrex, "abc", "\\babc")); + assert(!rrex3(rrex, "abc", "a\\b")); + assert(!rrex3(rrex, "abc", "ab\\b")); + assert(!rrex3(rrex, "abc", "abc\\b")); + assert(rrex3(rrex, "abc", "a\\Bbc")); + assert(rrex3(rrex, "abc", "ab\\B")); + assert(!rrex3(rrex, "1ab", "1\\Bab")); + assert(rrex3(rrex, "abc", "a\\Bbc")); + + // Escaping of special characters test. + assert(rrex3(rrex, "()+*.\\", "\\(\\)\\+\\*\\.\\\\")); + + // Pipe + // assert(rrex3(rrex,"abc","abc|def")); + assert(rrex3(rrex, "abc", "def|jkl|abc")); + assert(rrex3(rrex, "abc", "abc|def")); + + assert(rrex3(rrex, "rhq", "def|rhq|rha")); + assert(rrex3(rrex, "abc", "abc|def")); + + // Repeat + assert(rrex3(rrex, "aaaaa", "a{4}")); + + assert(rrex3(rrex, "aaaa", "a{1,3}a")); + + // Range + assert(rrex3(rrex, "abc", "[abc][abc][abc]$")); + assert(rrex3(rrex, "def", "[^abc][^abc][^abc]$")); + assert(rrex3(rrex, "defabc", "[^abc][^abc][^abc]abc")); + assert(rrex3(rrex, "0-9", "0-9")); + assert(rrex3(rrex, "55-9", "[^6-9]5-9$")); + assert(rrex3(rrex, "a", "[a-z]$")); + assert(rrex3(rrex, "A", "[A-Z]$")); + assert(rrex3(rrex, "5", "[0-9]$")); + assert(!rrex3(rrex, "a", "[^a-z]$")); + assert(!rrex3(rrex, "A", "[^A-Z]$")); + assert(!rrex3(rrex, "5", "[^0-9]$")); + assert(rrex3(rrex, "123abc", "[0-9]*abc$")); + assert(rrex3(rrex, "123123", "[0-9]*$")); + + // Parentheses + + assert(rrex3(rrex, "datadata", "(data)*")); + + assert(rrex3(rrex, "datadatapony", "(data)*pony$")); + + assert(!rrex3(rrex, "datadatapony", "(d*p*ata)*pond$")); + assert(rrex3(rrex, "datadatadato", "(d*p*ata)*dato")); + assert(rrex3(rrex, "datadatadato", "(d*p*ata)*dato$")); + assert(!rrex3(rrex, "datadatadato", "(d*p*a*ta)*gato$")); + + // Matches + assert(rrex3(rrex, "123", "(123)")); + assert(!strcmp(rrex->matches[0], "123")); + + assert(rrex3(rrex, "123321a", "(123)([0-4][2]1)a$")); + assert(!strcmp(rrex->matches[1], "321")); + + assert(rrex3(rrex, "123321a", "(123)([0-4][2]1)a$")); + assert(!strcmp(rrex->matches[1], "321")); + + assert(rrex3(rrex, "aaaabc", "(.*)c")); + + assert(rrex3(rrex, "abcde", ".....$")); + + assert(rrex3(rrex, "abcdefghijklmnopqrstuvwxyz", + "..........................$")); + // printf("(%d)\n", rrex->valid); + + assert(rrex3(rrex, " #include ", "#include.*<(.*)>")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(rrex3(rrex, " #include \"stdlib.h\"", "#include.\"(.*)\"")); + assert(!strcmp(rrex->matches[0], "stdlib.h")); + assert(rrex3(rrex, " \"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.*)\"\"(.*)\"\"(.*)\"")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(!strcmp(rrex->matches[1], "string.h")); + assert(!strcmp(rrex->matches[2], "sys/time.h")); + /* + assert(rrex3(rrex, " #include ", "#include.+<(.+)>")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(rrex3(rrex, " #include \"stdlib.h\"", "#include.+\"(.+)\"")); + assert(!strcmp(rrex->matches[0], "stdlib.h")); + + assert(rrex3(rrex, " \"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.+)\"\"(.+)\"\"(.+)\"")); + assert(!strcmp(rrex->matches[0], "stdio.h")); + assert(!strcmp(rrex->matches[1], "string.h")); + assert(!strcmp(rrex->matches[2], "sys/time.h")); + */ + // assert(rrex3(rrex,"char pony() { + // }","\\b\\w+(\\s+\\*+)?\\s+\\w+\\s*\\([^)]*\\)\s*\\{[^{}]*\\}")); + + rrex3_free(rrex); +} +#endif +#ifndef RARENA_H +#define RARENA_H + +#include +#include + +typedef struct arena_t { + unsigned char *memory; + unsigned int pointer; + unsigned int size; +} arena_t; + +arena_t *arena_construct() { + arena_t *arena = (arena_t *)rmalloc(sizeof(arena_t)); + arena->memory = NULL; + arena->pointer = 0; + arena->size = 0; + return arena; +} + +arena_t *arena_new(size_t size) { + arena_t *arena = arena_construct(); + arena->memory = (unsigned char *)rmalloc(size); + arena->size = size; + return arena; +} + +void *arena_alloc(arena_t *arena, size_t size) { + if (arena->pointer + size > arena->size) { + return NULL; + } + void *p = arena->memory + arena->pointer; + arena->pointer += size; + return p; +} + +void arena_free(arena_t *arena) { + // Just constructed and unused arena memory is NULL so no free needed + if (arena->memory) { + rfree(arena->memory); + } + rfree(arena); +} + +void arena_reset(arena_t *arena) { arena->pointer = 0; } +#endif +#ifndef RLIB_RIO +#define RLIB_RIO +#include +#include +#include +#include +#include +#include +#include +#include + +bool rfile_exists(char *path) { + struct stat s; + return !stat(path, &s); +} + +void rjoin_path(char *p1, char *p2, char *output) { + output[0] = 0; + strcpy(output, p1); + + if (output[strlen(output) - 1] != '/') { + char slash[] = "/"; + strcat(output, slash); + } + if (p2[0] == '/') { + p2++; + } + strcat(output, p2); +} + +int risprivatedir(const char *path) { + struct stat statbuf; + + if (stat(path, &statbuf) != 0) { + perror("stat"); + return -1; + } + + if (!S_ISDIR(statbuf.st_mode)) { + return -2; + } + + if ((statbuf.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) == S_IRWXU) { + return 1; // Private (owner has all permissions, others have none) + } + + return 0; +} +bool risdir(const char *path) { return !risprivatedir(path); } + +void rforfile(char *path, void callback(char *)) { + if (!rfile_exists(path)) + return; + DIR *dir = opendir(path); + struct dirent *d; + while ((d = readdir(dir)) != NULL) { + if (!d) + break; + + if ((d->d_name[0] == '.' && strlen(d->d_name) == 1) || + d->d_name[1] == '.') { + continue; + } + char full_path[4096]; + rjoin_path(path, d->d_name, full_path); + + if (risdir(full_path)) { + callback(full_path); + rforfile(full_path, callback); + } else { + callback(full_path); + } + } + closedir(dir); +} + +bool rfd_wait(int fd, int ms) { + fd_set read_fds; + struct timeval timeout; + + FD_ZERO(&read_fds); + FD_SET(fd, &read_fds); + + timeout.tv_sec = 0; + timeout.tv_usec = 1000 * ms; // 100 milliseconds timeout + + int ret = select(fd + 1, &read_fds, NULL, NULL, &timeout); + return ret > 0 && FD_ISSET(fd, &read_fds); +} + +bool rfd_wait_forever(int fd) { + while ((!rfd_wait(fd, 10))) { + } + return true; +} + +size_t rfile_size(char *path) { + struct stat s; + stat(path, &s); + return s.st_size; +} + +size_t rfile_readb(char *path, void *data, size_t size) { + FILE *fd = fopen(path, "r"); + if (!fd) { + return 0; + } + __attribute__((unused)) size_t bytes_read = + fread(data, size, sizeof(char), fd); + + fclose(fd); + return size; +} + +#endif +#ifndef RSTRING_H +#define RSTRING_H +#include +#include +#include +#include +#include + +unsigned long _r_generate_key_current = 0; + +char *_rcat_int_int(int a, int b) { + static char res[20]; + res[0] = 0; + sprintf(res, "%d%d", a, b); + return res; +} +char *_rcat_int_double(int a, double b) { + static char res[20]; + res[0] = 0; + sprintf(res, "%d%f", a, b); + return res; +} + +char *_rcat_charp_int(char *a, int b) { + char res[20]; + sprintf(res, "%c", b); + return strcat(a, res); +} + +char *_rcat_charp_double(char *a, double b) { + char res[20]; + sprintf(res, "%f", b); + return strcat(a, res); +} + +char *_rcat_charp_charp(char *a, char *b) { + ; + return strcat(a, b); +} +char *_rcat_charp_char(char *a, char b) { + char extra[] = {b, 0}; + return strcat(a, extra); +} +char *_rcat_charp_bool(char *a, bool *b) { + if (b) { + return strcat(a, "true"); + } else { + return strcat(a, "false"); + } +} + +#define rcat(x, y) \ + _Generic((x), \ + int: _Generic((y), \ + int: _rcat_int_int, \ + double: _rcat_int_double, \ + char *: _rcat_charp_charp), \ + char *: _Generic((y), \ + int: _rcat_charp_int, \ + double: _rcat_charp_double, \ + char *: _rcat_charp_charp, \ + char: _rcat_charp_char, \ + bool: _rcat_charp_bool))((x), (y)) + +char *rgenerate_key() { + _r_generate_key_current++; + static char key[100]; + key[0] = 0; + sprintf(key, "%ld", _r_generate_key_current); + return key; +} + +char *rformat_number(long lnumber) { + static char formatted[1024]; + + char number[1024]; + sprintf(number, "%ld", lnumber); + + int len = strlen(number); + int commas_needed = (len - 1) / 3; + int new_len = len + commas_needed; + + formatted[new_len] = '\0'; + + int i = len - 1; + int j = new_len - 1; + int count = 0; + + while (i >= 0) { + if (count == 3) { + formatted[j--] = '.'; + count = 0; + } + formatted[j--] = number[i--]; + count++; + } + return formatted; +} + +bool rstrextractdouble(char *str, double *d1) { + for (size_t i = 0; i < strlen(str); i++) { + if (isdigit(str[i])) { + str += i; + sscanf(str, "%lf", d1); + return true; + } + } + return false; +} + +void rstrstripslashes(const char *content, char *result) { + size_t content_length = strlen((char *)content); + unsigned int index = 0; + for (unsigned int i = 0; i < content_length; i++) { + char c = content[i]; + if (c == '\\') { + i++; + c = content[i]; + if (c == 'r') { + c = '\r'; + } else if (c == 't') { + c = '\t'; + } else if (c == 'b') { + c = '\b'; + } else if (c == 'n') { + c = '\n'; + } else if (c == 'f') { + c = '\f'; + } else if (c == '\\') { + // No need tbh + c = '\\'; + } + } + result[index] = c; + index++; + } + result[index] = 0; +} + +int rstrstartswith(const char *s1, const char *s2) { + if (s1 == NULL) + return s2 == NULL; + if (s1 == s2 || s2 == NULL || *s2 == 0) + return true; + size_t len_s2 = strlen(s2); + size_t len_s1 = strlen(s1); + if (len_s2 > len_s1) + return false; + return !strncmp(s1, s2, len_s2); +} + +bool rstrendswith(const char *s1, const char *s2) { + if (s1 == NULL) + return s2 == NULL; + if (s1 == s2 || s2 == NULL || *s2 == 0) + return true; + size_t len_s2 = strlen(s2); + size_t len_s1 = strlen(s1); + if (len_s2 > len_s1) { + return false; + } + s1 += len_s1 - len_s2; + return !strncmp(s1, s2, len_s2); +} + +void rstraddslashes(const char *content, char *result) { + size_t content_length = strlen((char *)content); + unsigned int index = 0; + for (unsigned int i = 0; i < content_length; i++) { + if (content[i] == '\r') { + result[index] = '\\'; + index++; + result[index] = 'r'; + index++; + continue; + } else if (content[i] == '\t') { + result[index] = '\\'; + index++; + result[index] = 't'; + index++; + continue; + } else if (content[i] == '\n') { + result[index] = '\\'; + index++; + result[index] = 'n'; + index++; + continue; + } else if (content[i] == '\\') { + result[index] = '\\'; + index++; + result[index] = '\\'; + index++; + continue; + } else if (content[i] == '\b') { + result[index] = '\\'; + index++; + result[index] = 'b'; + index++; + continue; + } else if (content[i] == '\f') { + result[index] = '\\'; + index++; + result[index] = 'f'; + index++; + continue; + } + result[index] = content[i]; + index++; + } + result[index] = 0; +} + +int rstrip_whitespace(char *input, char *output) { + output[0] = 0; + int count = 0; + size_t len = strlen(input); + for (size_t i = 0; i < len; i++) { + if (input[i] == '\t' || input[i] == ' ') { + continue; + } + count = i; + size_t j; + for (j = 0; j < len - count; j++) { + output[j] = input[j + count]; + } + output[j] = '\0'; + break; + } + return count; +} + +void rstrtocstring(const char *input, char *output) { + int index = 0; + char clean_input[strlen(input) * 2]; + char *iptr = clean_input; + rstraddslashes(input, clean_input); + output[index] = '"'; + index++; + while (*iptr) { + if (*iptr == '"') { + output[index] = '\\'; + output++; + } else if (*iptr == '\\' && *(iptr + 1) == 'n') { + output[index] = '\\'; + output++; + output[index] = 'n'; + output++; + output[index] = '"'; + output++; + output[index] = '\n'; + output++; + output[index] = '"'; + output++; + iptr++; + iptr++; + continue; + } + output[index] = *iptr; + index++; + iptr++; + } + if (output[index - 1] == '"' && output[index - 2] == '\n') { + output[index - 1] = 0; + } else if (output[index - 1] != '"') { + output[index] = '"'; + output[index + 1] = 0; + } +} + +size_t rstrtokline(char *input, char *output, size_t offset, bool strip_nl) { + + size_t len = strlen(input); + output[0] = 0; + size_t new_offset = 0; + size_t j; + size_t index = 0; + + for (j = offset; j < len + offset; j++) { + if (input[j] == 0) { + index++; + break; + } + index = j - offset; + output[index] = input[j]; + + if (output[index] == '\n') { + index++; + break; + } + } + output[index] = 0; + + new_offset = index + offset; + + if (strip_nl) { + if (output[index - 1] == '\n') { + output[index - 1] = 0; + } + } + return new_offset; +} + +void rstrjoin(char **lines, size_t count, char *glue, char *output) { + output[0] = 0; + for (size_t i = 0; i < count; i++) { + strcat(output, lines[i]); + if (i != count - 1) + strcat(output, glue); + } +} + +int rstrsplit(char *input, char **lines) { + int index = 0; + size_t offset = 0; + char line[1024]; + while ((offset = rstrtokline(input, line, offset, false)) && *line) { + if (!*line) { + break; + } + lines[index] = (char *)malloc(strlen(line) + 1); + strcpy(lines[index], line); + index++; + } + return index; +} + +bool rstartswithnumber(char *str) { return isdigit(str[0]); } + +void rstrmove2(char *str, unsigned int start, size_t length, + unsigned int new_pos) { + size_t str_len = strlen(str); + char new_str[str_len + 1]; + memset(new_str, 0, str_len); + if (start < new_pos) { + strncat(new_str, str + length, str_len - length - start); + new_str[new_pos] = 0; + strncat(new_str, str + start, length); + strcat(new_str, str + strlen(new_str)); + memset(str, 0, str_len); + strcpy(str, new_str); + } else { + strncat(new_str, str + start, length); + strncat(new_str, str, start); + strncat(new_str, str + start + length, str_len - start); + memset(str, 0, str_len); + strcpy(str, new_str); + } + new_str[str_len] = 0; +} + +void rstrmove(char *str, unsigned int start, size_t length, + unsigned int new_pos) { + size_t str_len = strlen(str); + if (start >= str_len || new_pos >= str_len || start + length > str_len) { + return; + } + char temp[length + 1]; + strncpy(temp, str + start, length); + temp[length] = 0; + if (start < new_pos) { + memmove(str + start, str + start + length, new_pos - start); + strncpy(str + new_pos - length + 1, temp, length); + } else { + memmove(str + new_pos + length, str + new_pos, start - new_pos); + strncpy(str + new_pos, temp, length); + } +} + +int cmp_line(const void *left, const void *right) { + char *l = *(char **)left; + char *r = *(char **)right; + + char lstripped[strlen(l) + 1]; + rstrip_whitespace(l, lstripped); + char rstripped[strlen(r) + 1]; + rstrip_whitespace(r, rstripped); + + double d1, d2; + bool found_d1 = rstrextractdouble(lstripped, &d1); + bool found_d2 = rstrextractdouble(rstripped, &d2); + + if (found_d1 && found_d2) { + double frac_part1; + double int_part1; + frac_part1 = modf(d1, &int_part1); + double frac_part2; + double int_part2; + frac_part2 = modf(d2, &int_part2); + if (d1 == d2) { + return strcmp(lstripped, rstripped); + } else if (frac_part1 && frac_part2) { + return d1 > d2; + } else if (frac_part1 && !frac_part2) { + return 1; + } else if (frac_part2 && !frac_part1) { + return -1; + } else if (!frac_part1 && !frac_part2) { + return d1 > d2; + } + } + return 0; +} + +int rstrsort(char *input, char *output) { + char **lines = (char **)malloc(strlen(input) * 10); + int line_count = rstrsplit(input, lines); + qsort(lines, line_count, sizeof(char *), cmp_line); + rstrjoin(lines, line_count, "", output); + free(lines); + return line_count; +} + +#endif +#ifndef RLIB_TERMINAL_H +#define RLIB_TERMINAL_H + +#include +#include +#include +#include + +char *rfcaptured = NULL; + +void rfcapture(FILE *f, char *buff, size_t size) { + rfcaptured = buff; + setvbuf(f, rfcaptured, _IOFBF, size); +} +void rfstopcapture(FILE *f) { setvbuf(f, 0, _IOFBF, 0); } + +bool _r_disable_stdout_toggle = false; + +FILE *_r_original_stdout = NULL; + +bool rr_enable_stdout() { + if (_r_disable_stdout_toggle) + return false; + if (!_r_original_stdout) { + stdout = fopen("/dev/null", "rb"); + return false; + } + if (_r_original_stdout && _r_original_stdout != stdout) { + fclose(stdout); + } + stdout = _r_original_stdout; + return true; +} +bool rr_disable_stdout() { + if (_r_disable_stdout_toggle) { + return false; + } + if (_r_original_stdout == NULL) { + _r_original_stdout = stdout; + } + if (stdout == _r_original_stdout) { + stdout = fopen("/dev/null", "rb"); + return true; + } + return false; +} +bool rr_toggle_stdout() { + if (!_r_original_stdout) { + rr_disable_stdout(); + return true; + } else if (stdout != _r_original_stdout) { + rr_enable_stdout(); + return true; + } else { + rr_disable_stdout(); + return true; + } +} + +typedef struct rprogressbar_t { + unsigned long current_value; + unsigned long min_value; + unsigned long max_value; + unsigned int length; + bool changed; + double percentage; + unsigned int width; + unsigned long draws; + FILE *fout; +} rprogressbar_t; + +rprogressbar_t *rprogressbar_new(long min_value, long max_value, + unsigned int width, FILE *fout) { + rprogressbar_t *pbar = (rprogressbar_t *)malloc(sizeof(rprogressbar_t)); + pbar->min_value = min_value; + pbar->max_value = max_value; + pbar->current_value = min_value; + pbar->width = width; + pbar->draws = 0; + pbar->length = 0; + pbar->changed = false; + pbar->fout = fout ? fout : stdout; + return pbar; +} + +void rprogressbar_free(rprogressbar_t *pbar) { free(pbar); } + +void rprogressbar_draw(rprogressbar_t *pbar) { + if (!pbar->changed) { + return; + } else { + pbar->changed = false; + } + pbar->draws++; + char draws_text[22]; + draws_text[0] = 0; + sprintf(draws_text, "%ld", pbar->draws); + char *draws_textp = draws_text; + // bool draws_text_len = strlen(draws_text); + char bar_begin_char = ' '; + char bar_progress_char = ' '; + char bar_empty_char = ' '; + char bar_end_char = ' '; + char content[4096] = {0}; + char bar_content[1024]; + char buff[2048] = {0}; + bar_content[0] = '\r'; + bar_content[1] = bar_begin_char; + unsigned int index = 2; + for (unsigned long i = 0; i < pbar->length; i++) { + if (*draws_textp) { + bar_content[index] = *draws_textp; + draws_textp++; + } else { + bar_content[index] = bar_progress_char; + } + index++; + } + char infix[] = "\033[0m"; + for (unsigned long i = 0; i < strlen(infix); i++) { + bar_content[index] = infix[i]; + index++; + } + for (unsigned long i = 0; i < pbar->width - pbar->length; i++) { + bar_content[index] = bar_empty_char; + index++; + } + bar_content[index] = bar_end_char; + bar_content[index + 1] = '\0'; + sprintf(buff, "\033[43m%s\033[0m \033[33m%.2f%%\033[0m ", bar_content, + pbar->percentage * 100); + strcat(content, buff); + if (pbar->width == pbar->length) { + strcat(content, "\r"); + for (unsigned long i = 0; i < pbar->width + 10; i++) { + strcat(content, " "); + } + strcat(content, "\r"); + } + fprintf(pbar->fout, "%s", content); + fflush(pbar->fout); +} + +bool rprogressbar_update(rprogressbar_t *pbar, unsigned long value) { + if (value == pbar->current_value) { + return false; + } + pbar->current_value = value; + pbar->percentage = (double)pbar->current_value / + (double)(pbar->max_value - pbar->min_value); + unsigned long new_length = (unsigned long)(pbar->percentage * pbar->width); + pbar->changed = new_length != pbar->length; + if (pbar->changed) { + pbar->length = new_length; + rprogressbar_draw(pbar); + return true; + } + return false; +} + +size_t rreadline(char *data, size_t len, bool strip_ln) { + __attribute__((unused)) char *unused = fgets(data, len, stdin); + size_t length = strlen(data); + if (length && strip_ln) + data[length - 1] = 0; + return length; +} + +void rlib_test_progressbar() { + rtest_banner("Progress bar"); + rprogressbar_t *pbar = rprogressbar_new(0, 1000, 10, stderr); + rprogressbar_draw(pbar); + // No draws executed, nothing to show + rassert(pbar->draws == 0); + rprogressbar_update(pbar, 500); + rassert(pbar->percentage == 0.5); + rprogressbar_update(pbar, 500); + rprogressbar_update(pbar, 501); + rprogressbar_update(pbar, 502); + // Should only have drawn one time since value did change, but percentage + // did not + rassert(pbar->draws == 1); + // Changed is false because update function calls draw + rassert(pbar->changed == false); + rprogressbar_update(pbar, 777); + rassert(pbar->percentage == 0.777); + rprogressbar_update(pbar, 1000); + rassert(pbar->percentage == 1); +} + +#endif +#ifndef RTERM_H +#define RTERM_H +#include +#include +#include +#include +#include +#include +#include + +typedef struct winsize winsize_t; + +typedef struct rshell_keypress_t { + bool pressed; + bool ctrl; + bool shift; + bool escape; + char c; + int ms; + int fd; +} rshell_keypress_t; + +typedef struct rterm_t { + bool show_cursor; + bool show_footer; + rshell_keypress_t key; + void (*before_cursor_move)(struct rterm_t *); + void (*after_cursor_move)(struct rterm_t *); + void (*after_key_press)(struct rterm_t *); + void (*before_key_press)(struct rterm_t *); + void (*before_draw)(struct rterm_t *); + void *session; + unsigned long iterations; + void (*tick)(struct rterm_t *); + char *status_text; + winsize_t size; + struct { + int x; + int y; + int pos; + int available; + } cursor; +} rterm_t; + +typedef void (*rterm_event)(rterm_t *); + +void rterm_init(rterm_t *rterm) { + memset(rterm, 0, sizeof(rterm_t)); + rterm->show_cursor = true; + rterm->show_cursor = true; +} + +void rterm_getwinsize(winsize_t *w) { + // Get the terminal size + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, w) == -1) { + perror("ioctl"); + exit(EXIT_FAILURE); + } +} + +// Terminal setup functions +void enableRawMode(struct termios *orig_termios) { + struct termios raw = *orig_termios; + raw.c_lflag &= ~(ICANON | ECHO); // Disable canonical mode and echoing + raw.c_cc[VMIN] = 0; + raw.c_cc[VTIME] = 1; // Set timeout for read input + + tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw); +} + +void disableRawMode(struct termios *orig_termios) { + tcsetattr(STDIN_FILENO, TCSAFLUSH, + orig_termios); // Restore original terminal settings +} + +void rterm_clear_screen() { + printf("\x1b[2J"); // Clear the entire screen + printf("\x1b[H"); // Move cursor to the home position (0,0) +} + +void setBackgroundColor() { + printf("\x1b[44m"); // Set background color to blue +} + +void rterm_move_cursor(int x, int y) { + + printf("\x1b[%d;%dH", y + 1, x + 1); // Move cursor to (x, y) +} + +void cursor_set(rterm_t *rt, int x, int y) { + rt->cursor.x = x; + rt->cursor.y = y; + rt->cursor.pos = y * rt->size.ws_col + x; + rterm_move_cursor(rt->cursor.x, rt->cursor.y); +} +void cursor_restore(rterm_t *rt) { + rterm_move_cursor(rt->cursor.x, rt->cursor.y); +} + +void rterm_print_status_bar(rterm_t *rt, char c, unsigned long i) { + winsize_t ws = rt->size; + rterm_move_cursor(0, ws.ws_row - 1); + + char output_str[1024]; + output_str[0] = 0; + + // strcat(output_str, "\x1b[48;5;240m"); + + for (int i = 0; i < ws.ws_col; i++) { + strcat(output_str, " "); + } + char content[500]; + content[0] = 0; + if (!rt->status_text) { + sprintf(content, "\rp:%d:%d | k:%c:%d | i:%ld ", rt->cursor.x + 1, + rt->cursor.y + 1, c == 0 ? '0' : c, c, i); + } else { + sprintf(content, "\r%s", rt->status_text); + } + strcat(output_str, content); + // strcat(output_str, "\x1b[0m"); + printf("%s", output_str); + cursor_restore(rt); +} + +void rterm_show_cursor() { + printf("\x1b[?25h"); // Show the cursor +} + +void rterm_hide_cursor() { + printf("\x1b[?25l"); // Hide the cursor +} + +rshell_keypress_t rshell_getkey() { + static rshell_keypress_t press; + press.c = 0; + press.ctrl = false; + press.shift = false; + press.escape = false; + press.pressed = rfd_wait(0, 100); + if (press.pressed) { + press.c = getchar(); + } + char ch = press.c; + if (ch == '\x1b') { + // Get detail + ch = getchar(); + + if (ch == '[') { + // non char key: + press.escape = true; + + ch = getchar(); // is a number. 1 if shift + arrow + press.c = ch; + if (ch >= '0' && ch <= '9') + ch = getchar(); + press.c = ch; + if (ch == ';') { + ch = getchar(); + press.c = ch; + if (ch == '5') { + press.ctrl = true; + press.c = getchar(); // De arrow + } + } + } else { + press.c = ch; + } + } + return press; +} + +// Main function +void rterm_loop(rterm_t *rt) { + struct termios orig_termios; + tcgetattr(STDIN_FILENO, &orig_termios); // Get current terminal attributes + enableRawMode(&orig_termios); + + int x = 0, y = 0; // Initial cursor position + char ch = 0; + ; + while (1) { + rterm_getwinsize(&rt->size); + rt->cursor.available = rt->size.ws_col * rt->size.ws_row; + if (rt->tick) { + rt->tick(rt); + } + + rterm_hide_cursor(); + // setBackgroundColor(); + rterm_clear_screen(); + if (rt->before_draw) { + rt->before_draw(rt); + } + rterm_print_status_bar(rt, ch, rt->iterations); + if (!rt->iterations || (x != rt->cursor.x || y != rt->cursor.y)) { + if (y == rt->size.ws_row) { + y--; + } + if (y < 0) { + y = 0; + } + rt->cursor.x = x; + rt->cursor.y = y; + if (rt->before_cursor_move) + rt->before_cursor_move(rt); + cursor_set(rt, rt->cursor.x, rt->cursor.y); + if (rt->after_cursor_move) + rt->after_cursor_move(rt); + x = rt->cursor.x; + y = rt->cursor.y; + } + if (rt->show_cursor) + rterm_show_cursor(); + fflush(stdout); + + rt->key = rshell_getkey(); + if (rt->key.pressed && rt->before_key_press) { + rt->before_key_press(rt); + } + rshell_keypress_t key = rt->key; + ch = key.c; + if (ch == 'q') + break; // Press 'q' to quit + + // Escape + if (key.escape) { + switch (key.c) { + case 65: // Move up + if (y > -1) + y--; + break; + case 66: // Move down + if (y < rt->size.ws_row) + y++; + break; + case 68: // Move left + if (x > 0) + x--; + if (key.ctrl) + x -= 4; + break; + case 67: // Move right + if (x < rt->size.ws_col) { + x++; + } + if (key.ctrl) { + x += 4; + } + break; + } + } + if (rt->key.pressed && rt->after_key_press) { + rt->after_key_press(rt); + } + rt->iterations++; + + // usleep (1000); + } + + // Cleanup + printf("\x1b[0m"); // Reset colors + rterm_clear_screen(); + disableRawMode(&orig_termios); +} +#endif +#ifndef RTREE_H +#define RTREE_H +#include +#include +#include + +typedef struct rtree_t { + struct rtree_t *next; + struct rtree_t *children; + char c; + void *data; +} rtree_t; + +rtree_t *rtree_new() { + rtree_t *b = (rtree_t *)rmalloc(sizeof(rtree_t)); + b->next = NULL; + b->children = NULL; + b->c = 0; + b->data = NULL; + return b; +} + +rtree_t *rtree_set(rtree_t *b, char *c, void *data) { + while (b) { + if (b->c == 0) { + b->c = *c; + c++; + if (*c == 0) { + b->data = data; + // printf("SET1 %c\n", b->c); + return b; + } + } else if (b->c == *c) { + c++; + if (*c == 0) { + b->data = data; + return b; + } + if (b->children) { + b = b->children; + } else { + b->children = rtree_new(); + b = b->children; + } + } else if (b->next) { + b = b->next; + } else { + b->next = rtree_new(); + b = b->next; + b->c = *c; + c++; + if (*c == 0) { + b->data = data; + return b; + } else { + b->children = rtree_new(); + b = b->children; + } + } + } + return NULL; +} + +rtree_t *rtree_find(rtree_t *b, char *c) { + while (b) { + if (b->c == *c) { + c++; + if (*c == 0) { + return b; + } + b = b->children; + continue; + } + b = b->next; + } + return NULL; +} + +void rtree_free(rtree_t *b) { + if (!b) + return; + rtree_free(b->children); + rtree_free(b->next); + rfree(b); +} + +void *rtree_get(rtree_t *b, char *c) { + rtree_t *t = rtree_find(b, c); + if (t) { + return t->data; + } + return NULL; +} +#endif +#ifndef RLEXER_H +#define RLEXER_H +#include +#include +#include +#include +#include +#include + +#define RTOKEN_VALUE_SIZE 1024 + +typedef enum rtoken_type_t { + RT_UNKNOWN = 0, + RT_SYMBOL, + RT_NUMBER, + RT_STRING, + RT_PUNCT, + RT_OPERATOR, + RT_EOF = 10, + RT_BRACE_OPEN, + RT_CURLY_BRACE_OPEN, + RT_BRACKET_OPEN, + RT_BRACE_CLOSE, + RT_CURLY_BRACE_CLOSE, + RT_BRACKET_CLOSE +} rtoken_type_t; + +typedef struct rtoken_t { + rtoken_type_t type; + char value[RTOKEN_VALUE_SIZE]; + unsigned int line; + unsigned int col; +} rtoken_t; + +static char *_content; +static unsigned int _content_ptr; +static unsigned int _content_line; +static unsigned int _content_col; + +static int isgroupingchar(char c) { + return (c == '{' || c == '}' || c == '(' || c == ')' || c == '[' || + c == ']' || c == '"' || c == '\''); +} + +static int isoperator(char c) { + return (c == '+' || c == '-' || c == '/' || c == '*' || c == '=' || + c == '>' || c == '<' || c == '|' || c == '&'); +} + +static rtoken_t rtoken_new() { + rtoken_t token; + memset(&token, 0, sizeof(token)); + token.type = RT_UNKNOWN; + return token; +} + +rtoken_t rlex_number() { + rtoken_t token = rtoken_new(); + token.col = _content_col; + token.line = _content_line; + bool first_char = true; + int dot_count = 0; + char c; + while (isdigit(c = _content[_content_ptr]) || + (first_char && _content[_content_ptr] == '-') || + (dot_count == 0 && _content[_content_ptr] == '.')) { + if (c == '.') + dot_count++; + first_char = false; + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + } + token.type = RT_NUMBER; + return token; +} + +static rtoken_t rlex_symbol() { + rtoken_t token = rtoken_new(); + + token.col = _content_col; + token.line = _content_line; + char c; + while (isalpha(_content[_content_ptr]) || _content[_content_ptr] == '_') { + c = _content[_content_ptr]; + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + } + token.type = RT_SYMBOL; + return token; +} + +static rtoken_t rlex_operator() { + + rtoken_t token = rtoken_new(); + + token.col = _content_col; + token.line = _content_line; + char c; + bool is_first = true; + while (isoperator(_content[_content_ptr])) { + if (!is_first) { + if (_content[_content_ptr - 1] == '=' && + _content[_content_ptr] == '-') { + break; + } + } + c = _content[_content_ptr]; + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + is_first = false; + } + token.type = RT_OPERATOR; + return token; +} + +static rtoken_t rlex_punct() { + + rtoken_t token = rtoken_new(); + + token.col = _content_col; + token.line = _content_line; + char c; + bool is_first = true; + while (ispunct(_content[_content_ptr])) { + if (!is_first) { + if (_content[_content_ptr] == '"') { + break; + } + if (_content[_content_ptr] == '\'') { + break; + } + if (isgroupingchar(_content[_content_ptr])) { + break; + } + if (isoperator(_content[_content_ptr])) { + break; + } + } + c = _content[_content_ptr]; + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + is_first = false; + } + token.type = RT_PUNCT; + return token; +} + +static rtoken_t rlex_string() { + rtoken_t token = rtoken_new(); + char c; + token.col = _content_col; + token.line = _content_line; + char str_chr = _content[_content_ptr]; + _content_ptr++; + while (_content[_content_ptr] != str_chr) { + c = _content[_content_ptr]; + if (c == '\\') { + _content_ptr++; + c = _content[_content_ptr]; + if (c == 'n') { + c = '\n'; + } else if (c == 'r') { + c = '\r'; + } else if (c == 't') { + c = '\t'; + } else if (c == str_chr) { + c = str_chr; + } + + _content_col++; + } + char chars[] = {c, 0}; + strcat(token.value, chars); + _content_ptr++; + _content_col++; + } + _content_ptr++; + token.type = RT_STRING; + return token; +} + +void rlex(char *content) { + _content = content; + _content_ptr = 0; + _content_col = 1; + _content_line = 1; +} + +static void rlex_repeat_str(char *dest, char *src, unsigned int times) { + for (size_t i = 0; i < times; i++) { + strcat(dest, src); + } +} + +rtoken_t rtoken_create(rtoken_type_t type, char *value) { + rtoken_t token = rtoken_new(); + token.type = type; + token.col = _content_col; + token.line = _content_line; + strcpy(token.value, value); + return token; +} + +rtoken_t rlex_next() { + while (true) { + + _content_col++; + + if (_content[_content_ptr] == 0) { + return rtoken_create(RT_EOF, "eof"); + } else if (_content[_content_ptr] == '\n') { + _content_line++; + _content_col = 1; + _content_ptr++; + } else if (isspace(_content[_content_ptr])) { + _content_ptr++; + } else if (isdigit(_content[_content_ptr]) || + (_content[_content_ptr] == '-' && + isdigit(_content[_content_ptr + 1]))) { + return rlex_number(); + } else if (isalpha(_content[_content_ptr]) || + _content[_content_ptr] == '_') { + return rlex_symbol(); + } else if (_content[_content_ptr] == '"' || + _content[_content_ptr] == '\'') { + return rlex_string(); + } else if (isoperator(_content[_content_ptr])) { + return rlex_operator(); + } else if (ispunct(_content[_content_ptr])) { + if (_content[_content_ptr] == '{') { + + _content_ptr++; + return rtoken_create(RT_CURLY_BRACE_OPEN, "{"); + } + if (_content[_content_ptr] == '}') { + + _content_ptr++; + return rtoken_create(RT_CURLY_BRACE_CLOSE, "}"); + } + if (_content[_content_ptr] == '(') { + + _content_ptr++; + return rtoken_create(RT_BRACE_OPEN, "("); + } + if (_content[_content_ptr] == ')') { + + _content_ptr++; + return rtoken_create(RT_BRACE_CLOSE, ")"); + } + if (_content[_content_ptr] == '[') { + + _content_ptr++; + return rtoken_create(RT_BRACKET_OPEN, "["); + } + if (_content[_content_ptr] == ']') { + + _content_ptr++; + return rtoken_create(RT_BRACKET_CLOSE, "]"); + } + return rlex_punct(); + } + } +} + +char *rlex_format(char *content) { + rlex(content); + char *result = (char *)malloc(strlen(content) + 4096); + result[0] = 0; + unsigned int tab_index = 0; + char *tab_chars = " "; + unsigned int col = 0; + rtoken_t token_previous; + token_previous.value[0] = 0; + token_previous.type = RT_UNKNOWN; + while (true) { + rtoken_t token = rlex_next(); + if (token.type == RT_EOF) { + break; + } + + // col = strlen(token.value); + + if (col == 0) { + rlex_repeat_str(result, tab_chars, tab_index); + // col = strlen(token.value);// strlen(tab_chars) * tab_index; + } + + if (token.type == RT_STRING) { + strcat(result, "\""); + + char string_with_slashes[strlen(token.value) * 2 + 1]; + rstraddslashes(token.value, string_with_slashes); + strcat(result, string_with_slashes); + + strcat(result, "\""); + // col+= strlen(token.value) + 2; + // printf("\n"); + // printf("<<<%s>>>\n",token.value); + + memcpy(&token_previous, &token, sizeof(token)); + continue; + } + if (!(strcmp(token.value, "{"))) { + if (col != 0) { + strcat(result, "\n"); + rlex_repeat_str(result, " ", tab_index); + } + strcat(result, token.value); + + tab_index++; + + strcat(result, "\n"); + + col = 0; + + memcpy(&token_previous, &token, sizeof(token)); + continue; + } else if (!(strcmp(token.value, "}"))) { + unsigned int tab_indexed = 0; + if (tab_index) + tab_index--; + strcat(result, "\n"); + + rlex_repeat_str(result, tab_chars, tab_index); + tab_indexed++; + + strcat(result, token.value); + strcat(result, "\n"); + col = 0; + + memcpy(&token_previous, &token, sizeof(token)); + continue; + } + if ((token_previous.type == RT_SYMBOL && token.type == RT_NUMBER) || + (token_previous.type == RT_NUMBER && token.type == RT_SYMBOL) || + (token_previous.type == RT_PUNCT && token.type == RT_SYMBOL) || + (token_previous.type == RT_BRACE_CLOSE && + token.type == RT_SYMBOL) || + (token_previous.type == RT_SYMBOL && token.type == RT_SYMBOL)) { + if (token_previous.value[0] != ',' && + token_previous.value[0] != '.') { + if (token.type != RT_OPERATOR && token.value[0] != '.') { + strcat(result, "\n"); + rlex_repeat_str(result, tab_chars, tab_index); + } + } + } + + if (token.type == RT_OPERATOR) { + strcat(result, " "); + } + if (token.type == RT_STRING) { + strcat(result, "\""); + } + strcat(result, token.value); + if (token.type == RT_STRING) { + strcat(result, "\""); + } + + if (token.type == RT_OPERATOR) { + strcat(result, " "); + } + if (!strcmp(token.value, ",")) { + strcat(result, " "); + } + col += strlen(token.value); + memcpy(&token_previous, &token, sizeof(token)); + } + return result; +} +#endif +#ifndef RBENCH_H +#define RBENCH_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#define RBENCH(times, action) \ + { \ + unsigned long utimes = (unsigned long)times; \ + nsecs_t start = nsecs(); \ + for (unsigned long i = 0; i < utimes; i++) { \ + { \ + action; \ + } \ + } \ + nsecs_t end = nsecs(); \ + printf("%s\n", format_time(end - start)); \ + } + +#define RBENCHP(times, action) \ + { \ + printf("\n"); \ + nsecs_t start = nsecs(); \ + unsigned int prev_percentage = 0; \ + unsigned long utimes = (unsigned long)times; \ + for (unsigned long i = 0; i < utimes; i++) { \ + unsigned int percentage = \ + ((long double)i / (long double)times) * 100; \ + int percentage_changed = percentage != prev_percentage; \ + __attribute__((unused)) int first = i == 0; \ + __attribute__((unused)) int last = i == utimes - 1; \ + { action; }; \ + if (percentage_changed) { \ + printf("\r%d%%", percentage); \ + fflush(stdout); \ + \ + prev_percentage = percentage; \ + } \ + } \ + nsecs_t end = nsecs(); \ + printf("\r%s\n", format_time(end - start)); \ + } + +struct rbench_t; + +typedef struct rbench_function_t { +#ifdef __cplusplus + void (*call)(); +#else + void(*call); +#endif + char name[256]; + char group[256]; + void *arg; + void *data; + bool first; + bool last; + int argc; + unsigned long times_executed; + + nsecs_t average_execution_time; + nsecs_t total_execution_time; +} rbench_function_t; + +typedef struct rbench_t { + unsigned int function_count; + rbench_function_t functions[100]; + rbench_function_t *current; + rprogressbar_t *progress_bar; + bool show_progress; + int winner; + bool stdout; + unsigned long times; + bool silent; + nsecs_t execution_time; +#ifdef __cplusplus + void (*add_function)(struct rbench_t *r, const char *name, + const char *group, void (*)()); +#else + void (*add_function)(struct rbench_t *r, const char *name, + const char *group, void *); +#endif + void (*rbench_reset)(struct rbench_t *r); + struct rbench_t *(*execute)(struct rbench_t *r, long times); + struct rbench_t *(*execute1)(struct rbench_t *r, long times, void *arg1); + struct rbench_t *(*execute2)(struct rbench_t *r, long times, void *arg1, + void *arg2); + struct rbench_t *(*execute3)(struct rbench_t *r, long times, void *arg1, + void *arg2, void *arg3); + +} rbench_t; + +FILE *_rbench_stdout = NULL; +FILE *_rbench_stdnull = NULL; + +void rbench_toggle_stdout(rbench_t *r) { + if (!r->stdout) { + if (_rbench_stdout == NULL) { + _rbench_stdout = stdout; + } + if (_rbench_stdnull == NULL) { + _rbench_stdnull = fopen("/dev/null", "wb"); + } + if (stdout == _rbench_stdout) { + stdout = _rbench_stdnull; + } else { + stdout = _rbench_stdout; + } + } +} +void rbench_restore_stdout(rbench_t *r) { + if (r->stdout) + return; + if (_rbench_stdout) { + stdout = _rbench_stdout; + } + if (_rbench_stdnull) { + fclose(_rbench_stdnull); + _rbench_stdnull = NULL; + } +} + +rbench_t *rbench_new(); + +rbench_t *_rbench = NULL; +rbench_function_t *rbf; +rbench_t *rbench() { + if (_rbench == NULL) { + _rbench = rbench_new(); + } + return _rbench; +} + +typedef void *(*rbench_call)(); +typedef void *(*rbench_call1)(void *); +typedef void *(*rbench_call2)(void *, void *); +typedef void *(*rbench_call3)(void *, void *, void *); + +#ifdef __cplusplus +void rbench_add_function(rbench_t *rp, const char *name, const char *group, + void (*call)()) { +#else +void rbench_add_function(rbench_t *rp, const char *name, const char *group, + void *call) { +#endif + rbench_function_t *f = &rp->functions[rp->function_count]; + rp->function_count++; + f->average_execution_time = 0; + f->total_execution_time = 0; + f->times_executed = 0; + f->call = call; + strcpy(f->name, name); + strcpy(f->group, group); +} + +void rbench_reset_function(rbench_function_t *f) { + f->average_execution_time = 0; + f->times_executed = 0; + f->total_execution_time = 0; +} + +void rbench_reset(rbench_t *rp) { + for (unsigned int i = 0; i < rp->function_count; i++) { + rbench_reset_function(&rp->functions[i]); + } +} +int rbench_get_winner_index(rbench_t *r) { + int winner = 0; + nsecs_t time = 0; + for (unsigned int i = 0; i < r->function_count; i++) { + if (time == 0 || r->functions[i].total_execution_time < time) { + winner = i; + time = r->functions[i].total_execution_time; + } + } + return winner; +} +bool rbench_was_last_function(rbench_t *r) { + for (unsigned int i = 0; i < r->function_count; i++) { + if (i == r->function_count - 1 && r->current == &r->functions[i]) + return true; + } + return false; +} + +rbench_function_t *rbench_execute_prepare(rbench_t *r, int findex, long times, + int argc) { + rbench_toggle_stdout(r); + if (findex == 0) { + r->execution_time = 0; + } + rbench_function_t *rf = &r->functions[findex]; + rf->argc = argc; + rbf = rf; + r->current = rf; + if (r->show_progress) + r->progress_bar = rprogressbar_new(0, times, 20, stderr); + r->times = times; + // printf(" %s:%s gets executed for %ld times with %d + // arguments.\n",rf->group, rf->name, times,argc); + rbench_reset_function(rf); + + return rf; +} +void rbench_execute_finish(rbench_t *r) { + rbench_toggle_stdout(r); + if (r->progress_bar) { + free(r->progress_bar); + r->progress_bar = NULL; + } + r->current->average_execution_time = + r->current->total_execution_time / r->current->times_executed; + ; + // printf(" %s:%s finished executing in + // %s\n",r->current->group,r->current->name, + // format_time(r->current->total_execution_time)); + // rbench_show_results_function(r->current); + if (rbench_was_last_function(r)) { + rbench_restore_stdout(r); + unsigned int winner_index = rbench_get_winner_index(r); + r->winner = winner_index + 1; + if (!r->silent) + rprintgf(stderr, "Benchmark results:\n"); + nsecs_t total_time = 0; + + for (unsigned int i = 0; i < r->function_count; i++) { + rbf = &r->functions[i]; + total_time += rbf->total_execution_time; + bool is_winner = winner_index == i; + if (is_winner) { + if (!r->silent) + rprintyf(stderr, " > %s:%s:%s\n", + format_time(rbf->total_execution_time), rbf->group, + rbf->name); + } else { + if (!r->silent) + rprintbf(stderr, " %s:%s:%s\n", + format_time(rbf->total_execution_time), rbf->group, + rbf->name); + } + } + if (!r->silent) + rprintgf(stderr, "Total execution time: %s\n", + format_time(total_time)); + } + rbench_restore_stdout(r); + rbf = NULL; + r->current = NULL; +} +struct rbench_t *rbench_execute(rbench_t *r, long times) { + + for (unsigned int i = 0; i < r->function_count; i++) { + + rbench_function_t *f = rbench_execute_prepare(r, i, times, 0); + rbench_call c = (rbench_call)f->call; + nsecs_t start = nsecs(); + f->first = true; + c(); + f->first = false; + f->last = false; + f->times_executed++; + for (int j = 1; j < times; j++) { + c(); + f->times_executed++; + f->last = f->times_executed == r->times - 1; + if (r->progress_bar) { + rprogressbar_update(r->progress_bar, f->times_executed); + } + } + f->total_execution_time = nsecs() - start; + r->execution_time += f->total_execution_time; + rbench_execute_finish(r); + } + return r; +} + +struct rbench_t *rbench_execute1(rbench_t *r, long times, void *arg1) { + + for (unsigned int i = 0; i < r->function_count; i++) { + rbench_function_t *f = rbench_execute_prepare(r, i, times, 1); + rbench_call1 c = (rbench_call1)f->call; + nsecs_t start = nsecs(); + f->first = true; + c(arg1); + f->first = false; + f->last = false; + f->times_executed++; + for (int j = 1; j < times; j++) { + c(arg1); + f->times_executed++; + f->last = f->times_executed == r->times - 1; + if (r->progress_bar) { + rprogressbar_update(r->progress_bar, f->times_executed); + } + } + f->total_execution_time = nsecs() - start; + r->execution_time += f->total_execution_time; + rbench_execute_finish(r); + } + return r; +} + +struct rbench_t *rbench_execute2(rbench_t *r, long times, void *arg1, + void *arg2) { + + for (unsigned int i = 0; i < r->function_count; i++) { + rbench_function_t *f = rbench_execute_prepare(r, i, times, 2); + rbench_call2 c = (rbench_call2)f->call; + nsecs_t start = nsecs(); + f->first = true; + c(arg1, arg2); + f->first = false; + f->last = false; + f->times_executed++; + for (int j = 1; j < times; j++) { + c(arg1, arg2); + f->times_executed++; + f->last = f->times_executed == r->times - 1; + if (r->progress_bar) { + rprogressbar_update(r->progress_bar, f->times_executed); + } + } + f->total_execution_time = nsecs() - start; + r->execution_time += f->total_execution_time; + rbench_execute_finish(r); + } + return r; +} + +struct rbench_t *rbench_execute3(rbench_t *r, long times, void *arg1, + void *arg2, void *arg3) { + + for (unsigned int i = 0; i < r->function_count; i++) { + rbench_function_t *f = rbench_execute_prepare(r, i, times, 3); + + rbench_call3 c = (rbench_call3)f->call; + nsecs_t start = nsecs(); + f->first = true; + c(arg1, arg2, arg3); + f->first = false; + f->last = false; + f->times_executed++; + for (int j = 1; j < times; j++) { + c(arg1, arg2, arg3); + f->times_executed++; + f->last = f->times_executed == r->times - 1; + if (r->progress_bar) { + rprogressbar_update(r->progress_bar, f->times_executed); + } + } + f->total_execution_time = nsecs() - start; + rbench_execute_finish(r); + } + return r; +} + +rbench_t *rbench_new() { + + rbench_t *r = (rbench_t *)malloc(sizeof(rbench_t)); + memset(r, 0, sizeof(rbench_t)); + r->add_function = rbench_add_function; + r->rbench_reset = rbench_reset; + r->execute1 = rbench_execute1; + r->execute2 = rbench_execute2; + r->execute3 = rbench_execute3; + r->execute = rbench_execute; + r->stdout = true; + r->silent = false; + r->winner = 0; + r->show_progress = true; + return r; +} +void rbench_free(rbench_t *r) { free(r); } + +#endif +// END OF RLIB +#endif + +#include + +void benchmark(int times, char *str, char *expr) { + + regmatch_t matches[10]; + printf("Matching \"%s\" with \"%s\".\n", str, expr); + regex_t regex; + if (regcomp(®ex, expr, REG_EXTENDED)) { + printf("Creg: error in regular expression.\n"); + exit(1); + } + printf("creg: "); + RBENCH(times, { + if (regexec(®ex, str, 0, matches, 0)) { + printf("Creg: error executing regular expression.\n"); + } + }) + regfree(®ex); + ; + rrex3_t *rrex = rrex3_compile(NULL, expr); + printf("rrex3 (%s): ", rrex->compiled); + RBENCH(times, { + if (rrex3(rrex, str, NULL)) { + + } else { + printf("Rrex3: error\n"); + exit(0); + } + }); + rrex3_free(rrex); + printf("\n"); +} + +int main() { + rrex3_test(); + int times = 1; + benchmark(times, "\"stdio.h\"\"string.h\"\"sys/time.h\"", + "\".*\"\".*\"\".*\""); + + benchmark(times, "abcdefghijklmnopqrstuvwxyz", + "abcdefghijklmnopqrstuvwxyz$"); + benchmark(times, "aaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaa$"); + benchmark(times, "abcdefghijklmnopqrstuvwxyz", + "..........................$"); + + // [abcm] failed + benchmark(times, "abcdefghijklmnopqrstuvwxyz", ".*z"); + benchmark(times, "abcde", ".*e"); + benchmark(times, "abcdef", ".*f"); + + benchmark(times, "abcdefghijklmnopqrstuvwxyz", + "[a]b*c+d\\w[f-g][g][h-i][i][^a][abcdefgk][l][m][n][o][p][a-z][r]" + "[s][t][u][v][w].*z$"); + benchmark(times, "zzz", + "[abcdefghijklmnopqrstuvwxyz][abcdefghijklmnopqrstuvwxyz][" + "abcdefghijklmnopqrstuvwxyz]$"); + + benchmark(times, "7245 Sr", "[0-9][0-9][0-9][0-9] ?\\w\\w$"); + + benchmark(times, + "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmn" + "opqrstuvwxyzesting", + "[z-z][e-e]"); + benchmark(times, + "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmn" + "opqrstuvwxyzesting", + "zesting"); + benchmark(times, "\"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.*)\"\"(.*)\"\"(.*)\""); + benchmark(times, " \"stdio.h\"\"string.h\"\"sys/time.h\"", + "\".+\"\".+\"\".+\""); + benchmark(times, " \"stdio.h\"\"string.h\"\"sys/time.h\"", + "\"(.+)\"\"(.+)\"\"(.+)\""); +} diff --git a/rrex4.c b/rrex4.c new file mode 100644 index 0000000..8158da3 --- /dev/null +++ b/rrex4.c @@ -0,0 +1,371 @@ +#define R4_DEBUG_a + +#include "rrex4.h" +#include "rlib.h" +#include + +bool bench_r4(unsigned int times, char *str, char *expr) { + RBENCH(times, { + r4_t *r = r4(str, expr); + + if (r->valid == false) { + + printf("Bench r4 error\n"); + exit(1); + } + + r4_free(r); + }); + return true; +} + +void bench_c(unsigned int times, char *str, char *expr) { + regex_t regex; + if (regcomp(®ex, expr, REG_EXTENDED)) { + printf("Creg: error in regular expression.\n"); + exit(1); + } + RBENCH(times, { + if (regexec(®ex, str, 0, NULL, 0)) { + printf("Creg: error executing regular expression.\n"); + exit(1); + } + }); + + regfree(®ex); +} + +bool bench(unsigned int times, char *str, char *expr) { + printf("%d:(%s)<%s>\n", times, str, expr); + printf("c:"); + bench_c(times, str, expr); + printf("r:"); + bench_r4(times, str, expr); + return true; +} + +void test_r4_next() { + r4_t *r = r4_new(); + char *str = "abcdefghijklmnop"; + char *reg = "(\\w\\w\\w\\w)"; + r = r4(str, reg); + assert(r->valid); + assert(r->match_count == 1); + assert(!strcmp(r->matches[0], "abcd")); + // Again with same regex as parameter + r = r4_next(r, reg); + assert(r->valid); + assert(r->match_count == 1); + assert(!strcmp(r->matches[0], "efgh")); + // Again with same regex as parameter + r = r4_next(r, reg); + assert(r->valid); + assert(r->match_count == 1); + assert(!strcmp(r->matches[0], "ijkl")); + // Reuse expression, NULL parameter + r = r4_next(r, NULL); + assert(r->valid); + assert(r->match_count == 1); + assert(!strcmp(r->matches[0], "mnop")); + // No results using r4_next + r = r4_next(r, NULL); + assert(r->valid); + assert(r->match_count == 0); + // Again no results using r4_next, Shouldn't crash + r = r4_next(r, NULL); + assert(r->valid); + assert(r->match_count == 0); + r4_free(r); +} + +void bench_all(unsigned int times) { + assert(bench(times, "suvw", + "[abcdefghijklmnopqrstuvw][abcdefghijklmnopqrstuvw][" + "abcdefghijklmnopqrstuvw][abcdefghijklmnopqrstuvw]")); + assert(bench(times, "ponyyy", "^p+o.*yyy$$$$")); + assert(bench(times, " ponyyzd", "p+o.*yyzd$$$$")); + assert(bench(times, "abc", "def|gek|abc")); + assert(bench(times, "abc", "def|a?b?c|def")); + assert(bench(times, "NL18RABO0322309700", + "([A-Z]{2})([0-9]{2})([A-Z]{4}[0-9])([0-9]+)$")); + assert(bench(times, "a 1 b 2 c 3 d 4 ", "([A-Z0-9 ]+)")); +} + +bool r4_match_stats(char *str, char *expr) { + r4_t *r = r4(str, expr); + bool result = r->valid; + printf("%d:(%s)<%s>\n", r->validation_count, r->_str, r->_expr); + if (result) { + printf(" - match(0)\t: \"%s\"\n", r->match); + } + for (unsigned i = 0; i < r->match_count; i++) { + printf(" - match(%d)\t: \"%s\"\n", i + 1, r->matches[i]); + } + r4_free(r); + return result; +} + +void test_r4_bug_check_capture_overflow() { + // This is a former bug in r4. + + // Case one + r4_t *r = r4("test", "(test)+"); + assert(r->match_count == 1); + r4_free(r); + + // Case two + r = r4("tester", "(t\\est\\e\\r)+"); + assert(r->match_count == 1); + printf("%s\n", r->matches[0]); + r4_free(r); + + // Case three + r = r4("test", "(t\\est\\e\\r)+"); + assert(r->match_count == 0); + r4_free(r); +} + +void test_r4_capture_main_group() { + // Case 1 + r4_t *r = r4("testtesttesttest", "(test)+test$"); + // printf("%s\n",r->match); + // assert(!strcmp(r->match,"testtesttesttest")); + assert(r->match_count == 3); + assert(!strcmp(r->matches[0], "test")); + assert(!strcmp(r->matches[1], "test")); + assert(!strcmp(r->matches[2], "test")); + r4_free(r); + // Case 2 (with search) + /* + r = r4(" testtesttesttest","(test)+test$"); + printf("%s\n",r->match); + assert(!strcmp(r->match,"testtesttesttest")); + assert(r->match_count == 3); + assert(!strcmp(r->matches[0], "test")); + assert(!strcmp(r->matches[1], "test")); + assert(!strcmp(r->matches[2], "test")); + r4_free(r); */ +} + +char test_r4_capture_dynamic_amount() { + r4_t *r = r4("testtesttesttest", "(test)+test$"); + assert(r->match_count == 3); + assert(!strcmp(r->matches[0], "test")); + assert(!strcmp(r->matches[1], "test")); + assert(!strcmp(r->matches[2], "test")); + r4_free(r); + + return true; + // Some advanced capturing + // Fails + r = r4("testtesttesttest", "([tes]+)+test$"); + printf("%d\n", r->match_count); + assert(r->match_count == 1); + assert(!strcmp(r->matches[0], "testtesttest")); + r4_free(r); +} + +int main(int argc, char *argv[]) { + + for (int i = 0; i < argc; i++) { + if (!strcmp(argv[i], "--debug")) { + r4_enable_debug(); + } + } + + // Has to be fixed + r4_match_stats("r4@r4.net", + "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]*$"); + // r4_match_stats("r4@r4.net", "^[^@\\s]+@[^@\\s]+\\.[^@\\s]+$"); + // exit(0); + + test_r4_capture_main_group(); + assert(r4_match_stats("testtesttesttest", "(test)+test$")); + assert(r4_match_stats("testtest", "test")); + + // Group testing + assert(r4_match_stats("aaadddd", "(a+)(d+)$")); + assert(r4_match_stats("aaa", "(a+)$")); + assert(r4_match_stats("aaadddd", "(d+)$")); + assert(r4_match_stats("aaadddd", "(d+)")); + assert(r4_match_stats("aaa\"dddd\"", "\"(d+)\"")); + assert(r4_match_stats("aaadddd", "(a*)(d+)$")); + assert(r4_match_stats("aaa", "(a*)$")); + assert(r4_match_stats("aaadddd", "(d*)$")); + assert(r4_match_stats("aaadddd", "(d*)")); + assert(r4_match_stats("aaa\"dddd\" ", "\"(d*)\"\\s*")); + + // Words + assert(r4_match_stats("a", "\\w")); + assert(!r4_match_stats("1", "\\w")); + assert(r4_match_stats("1", "\\W")); + assert(!r4_match_stats("a", "\\W")); + assert(r4_match_stats("aa", "\\w{2}")); + assert(r4_match_stats("11", "\\W{2}")); + assert(r4_match_stats("1", "[\\W]")); + + // Digits + assert(r4_match_stats("1", "\\d")); + assert(!r4_match_stats("a", "\\d")); + assert(r4_match_stats("a", "\\D")); + assert(!r4_match_stats("1", "\\D")); + assert(r4_match_stats("11", "\\d{2}$")); + assert(r4_match_stats("aa", "\\D{2}$")); + assert(r4_match_stats("a", "[\\D]")); + + // Whitespace + assert(r4_match_stats(" ", "\\s")); + assert(r4_match_stats(" a", "\\s")); + assert(!r4_match_stats("a", "[\\s]")); + assert(r4_match_stats("a ", "[\\s]")); + assert(r4_match_stats("a", "\\S")); + assert(!r4_match_stats(" ", "\\S")); + assert(!r4_match_stats(" ", "[\\S]")); + assert(r4_match_stats("b ", "[\\S]")); + assert(r4_match_stats(" b", "[\\S]")); + + // Boundaries + assert(r4_match_stats("a", "\\b")); + assert(r4_match_stats("a", "\\ba$")); + assert(r4_match_stats("a", "^\\ba$")); + assert(r4_match_stats("aa", "\\b")); + assert(!r4_match_stats("aa", "\\b$")); + assert(r4_match_stats("aa", "[\\b]")); + assert(r4_match_stats("a", "\\B")); + assert(r4_match_stats("a", "\\Ba$")); + assert(r4_match_stats("a", "^\\Ba$")); + assert(r4_match_stats("aa", "\\B")); + assert(!r4_match_stats("aa", "^\\B")); + assert(!r4_match_stats("a1", "a[\\B]$")); + + // Optional + assert(!r4_match_stats("a", "?")); + assert(r4_match_stats("a", "a?")); + assert(r4_match_stats("a", "b?")); + assert(r4_match_stats("a", "^b?")); + assert(r4_match_stats("a", "a?$")); + assert(!r4_match_stats("a", "b?$")); + assert(r4_match_stats("a", "[def]?a$")); + + // Range + + assert(r4_match_stats("a", "a{1}")); + assert(r4_match_stats("ab", "a{1}")); + assert(r4_match_stats("aa", "a{2}")); + assert(!r4_match_stats("aab", "a{3}")); + assert(!r4_match_stats("a1", "a{2}")); + assert(r4_match_stats("ab", "a{1,2}")); + assert(r4_match_stats("aa", "a{2,}")); + + // Group (Custom function set) + r4_match_stats("*?+$^.|\\[{()}]@ ", "[*?+$^.|\\\\[{()}]]+$@\\s"); + + // Miscellaneous tests + bool debug_mode_original = _r4_debug; + _r4_debug = false; + r4_enable_debug(); + assert(_r4_debug); + r4_disable_debug(); + assert(!_r4_debug); + _r4_debug = debug_mode_original; + + assert(r4_match("a", "a")); + assert(!r4_match("b", "a")); + r4_init(NULL); + r4_free(NULL); + r4_free_matches(NULL); + + // Next tests + test_r4_next(); + + // Check if former known bugs are still fixed + test_r4_bug_check_capture_overflow(); + + // Check if capture amount is dynamic + test_r4_capture_dynamic_amount(); + + char *c_function_regex = + "(\\w[\\w\\d]*[\\s\\*]*)\\s*\\w[\\w\\d]*\\s*\\((.*)\\)\\s*\\{"; + r4_match_stats("int **main() {}", c_function_regex); + r4_match_stats("int main(int argc, char *argv[],(void *)aaa) {}", + c_function_regex); + + assert(r4_match_stats("NL18RABO0322309700", + "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d{10})")); + + // exit(0); + unsigned int times = 1; + bench_all(times); + + RBENCH(1, { + assert(r4_match_stats("#define DEFINETEST 1", + "#define\\s(+[\\w\\d_]+)\\s+[\\w\\d_]+")); + // assert(r4_match_stats("#define DEFINETEST 1\n", + // s "#define\\s+\\w[\\d\\w_]+\\s+[\\w\\d_]\\s*")); + + assert(!r4_match_stats("aa", "aaaa")); + assert(r4_match_stats("ponyyy", "^p+o.*yyy$$$$")); + assert(!r4_match_stats("ponyyy", "p%+o.*yyy$$$$")); + assert(!r4_match_stats("ponyyyd", "^p+o.*yyz$$$$")); + assert(r4_match_stats("123", "[0-2][2-2][1-3]$")); + assert(r4_match_stats("aaaabC5", "(a)(\\w)a*(a)\\w[A-Z][0-9]$")); + assert(r4_match_stats("abcdeeeeee", "ab(cdeee)e")); + assert(r4_match_stats("1234567", "12(.*)67$")); + assert(r4_match_stats("12111678993", "12(.*)67(.*)3$")); + assert(r4_match_stats("NL18RABO0322309700", "NL(.*)R(.*)0(.*)0(.*)$")); + + assert(r4_match_stats("NL18RABO0322309700", + "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$")); + assert(r4_match_stats("NL18RABO0322309700garbage", + "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)")); + assert(r4_match_stats("NL18RABO0322309700", + "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$")); + assert(r4_match_stats(" NL18RABO0322309700", + "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$")); + assert(r4_match_stats(" NL18RABO0322309700", + "(\\w{2})(\\d{2})(\\w{4}\\d)(\\d+)$")); + assert( + r4_match_stats("NL18RABO0", "(\\w\\w)(\\d\\d)(\\w\\w\\w\\w\\d)$")); + assert(r4_match_stats("q", "\\q$")); + assert(r4_match_stats("ab123", "[a-z0-9]+$")); + assert(r4_match_stats("ppppony", "p*pppony")); + assert(r4_match_stats("aa", "a{2}$")); + assert(r4_match_stats("A23", "[0-2A-z][2-2][1-3]$")); + assert(r4_match_stats("z23", "[0-2A-z][2-2][1-3]$")); + assert(r4_match_stats("r23", "[0-2Ar][2-2][1-3]$")); + assert(r4_match_stats("test", "\\w\\w\\w\\w$")); + assert(!r4_match_stats("test", "\\W\\w\\w\\w$")); + assert(r4_match_stats("1est", "\\W\\w\\w\\w$")); + assert(r4_match_stats("1est", "\\d\\w\\w\\w$")); + assert(r4_match_stats("Aest", "\\D\\w\\w\\w$")); + assert(r4_match_stats("abc", "[ab]+")); + assert(!r4_match_stats("abc", "[ab]+$")); + assert(r4_match_stats("abc", "[abc]+$")); + assert(!r4_match_stats("a", "[^ba]")); + assert(!r4_match_stats("a", "[^ab]")); + assert(r4_match_stats(" ponyyzd", "p+o.*yyzd$$$$")); + assert(r4_match_stats("abc", "def|gek|abc")); + assert(!r4_match_stats("abc", "def|gek|abd")); + assert(r4_match_stats("abc", "def|abc|def")); + assert(r4_match_stats( + "suwv", "[abcdesfghijklmnopqrtuvw][abcdefghijklmnopqrstuvw][" + "abcdefghijklmnopqrstuvw][abcdefghijklmnopqrstuvw]")); + + assert(r4_match_stats("123", "(.*)(.*)(.*)")); + assert(r4_match_stats("1234", "(.*)(.*)(.*)")); + + assert(r4_match_stats("#include \"test.c\"", "#include\\s+\"(.*)\"")); + assert(r4_match_stats("#define TEST_JE VALUE", + "#define\\s+([A-Za-z_0-9]+)\\s+([A-Za-z_0-9]+)")); + // + assert(r4_match_stats("bbb", "a*(bbb)")); + + // Tests added for coverage + assert(!r4_match_stats("1", "[\\D]")); + assert(!r4_match_stats("11", "\\D{2}")); + assert(!r4_match_stats("ab", "ba")); + assert(r4_match_stats("2", "[4-2]")); + }); + + return 0; +} \ No newline at end of file diff --git a/rrex4.h b/rrex4.h new file mode 100644 index 0000000..88716ce --- /dev/null +++ b/rrex4.h @@ -0,0 +1,761 @@ +#ifndef RREX4_H +#define RREX4_H +#include +#include +#include +#include +#include +#include + +#define R4_DEBUG_a + +#ifdef R4_DEBUG +static int _r4_debug = 1; +#else +static int _r4_debug = 0; +#endif + +static char *_format_function_name(const char *name) { + static char result[100]; + result[0] = 0; + + char *new_name = (char *)name; + new_name += 11; + if (new_name[0] == '_') + new_name += 1; + if (strlen(new_name) == 0) { + return " -"; + } + strcpy(result, new_name); + return result; +} + +#define DEBUG_VALIDATE_FUNCTION \ + if (_r4_debug || r4->debug) \ + printf("DEBUG: %s %s <%s> \"%s\"\n", _format_function_name(__func__), \ + r4->valid ? "valid" : "INVALID", r4->expr, r4->str); + +struct r4_t; + +void r4_enable_debug() { _r4_debug = true; } +void r4_disable_debug() { _r4_debug = false; } + +typedef bool (*r4_function)(struct r4_t *); + +typedef struct r4_t { + bool debug; + bool valid; + bool in_block; + bool is_greedy; + bool in_range; + unsigned int backtracking; + unsigned int loop_count; + unsigned int in_group; + unsigned int match_count; + unsigned int validation_count; + unsigned int start; + unsigned int end; + unsigned int length; + bool (*functions[254])(struct r4_t *); + bool (*slash_functions[254])(struct r4_t *); + char *_str; + char *_expr; + char *match; + char *str; + char *expr; + char *str_previous; + char *expr_previous; + char **matches; +} r4_t; + +static bool v4_initiated = false; +typedef bool (*v4_function_map)(r4_t *); +v4_function_map v4_function_map_global[256]; +v4_function_map v4_function_map_slash[256]; +v4_function_map v4_function_map_block[256]; + +static void r4_free_matches(r4_t *r) { + if (!r) + return; + if (r->match) { + free(r->match); + r->match = NULL; + } + if (!r->match_count) { + return; + } + for (unsigned i = 0; i < r->match_count; i++) { + free(r->matches[i]); + } + free(r->matches); + r->match_count = 0; + r->matches = NULL; +} + +static void r4_free(r4_t *r) { + if (!r) + return; + r4_free_matches(r); + free(r); +} + +static bool r4_backtrack(r4_t *r4); +static bool r4_validate(r4_t *r4); +static void r4_match_add(r4_t *r4, char *extracted); + +static bool r4_validate_literal(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (!r4->valid) + return false; + if (*r4->str != *r4->expr) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + if (r4->in_block || r4->in_range || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_question_mark(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->valid = true; + r4->expr++; + return r4_validate(r4); +} + +static bool r4_validate_plus(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (r4->valid == false) { + return r4_validate(r4); + } + char *expr_left = r4->expr_previous; + char *expr_right = r4->expr; + char *str = r4->str; + char *return_expr = NULL; + if (*expr_right == ')') { + return_expr = expr_right; + expr_right++; + } + r4->is_greedy = false; + r4->expr = expr_left; + while (r4->valid) { + if (*expr_right) { + r4->expr = expr_right; + r4->is_greedy = true; + if (r4_backtrack(r4)) { + + if (return_expr) { + r4->str = str; + r4->expr = return_expr; + } + return r4_validate(r4); + } else { + r4->is_greedy = false; + } + } + r4->valid = true; + r4->expr = expr_left; + r4->str = str; + r4_validate(r4); + str = r4->str; + } + r4->is_greedy = true; + r4->valid = true; + r4->expr = return_expr ? return_expr : expr_right; + return r4_validate(r4); +} + +static bool r4_validate_dollar(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + r4->valid = *r4->str == 0; + return r4_validate(r4); +} + +static bool r4_validate_roof(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (r4->str != r4->_str) { + return false; + } + r4->expr++; + return r4_validate(r4); +} + +static bool r4_validate_dot(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (*r4->str == 0) { + return false; + } + r4->expr++; + r4->valid = *r4->str != '\n'; + r4->str++; + + if (r4->in_block || r4->in_range || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} + +static bool r4_validate_asterisk(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (r4->valid == false) { + r4->valid = true; + return r4->valid; + // return r4_validate(r4); + } + char *expr_left = r4->expr_previous; + char *expr_right = r4->expr; + char *str = r4->str; + char *return_expr = NULL; + if (*expr_right == ')') { + return_expr = expr_right; + expr_right++; + } + r4->is_greedy = false; + r4->expr = expr_left; + while (r4->valid) { + if (*expr_right) { + r4->expr = expr_right; + r4->is_greedy = true; + if (r4_backtrack(r4)) { + + if (return_expr) { + r4->str = str; + r4->expr = return_expr; + } + return r4_validate(r4); + } else { + r4->is_greedy = false; + } + } + r4->valid = true; + r4->expr = expr_left; + r4->str = str; + r4_validate(r4); + str = r4->str; + } + r4->is_greedy = true; + r4->valid = true; + r4->expr = return_expr ? return_expr : expr_right; + return r4_validate(r4); +} + +static bool r4_validate_pipe(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (r4->valid == true) { + return true; + } else { + r4->valid = true; + } + return r4_validate(r4); +} + +static bool r4_validate_digit(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (!isdigit(*r4->str)) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + if (r4->in_block || r4->in_range || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_not_digit(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (isdigit(*r4->str)) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + + if (r4->in_block || r4->in_range || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_word(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (!isalpha(*r4->str)) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + + if (r4->in_block || r4->in_range || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_not_word(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (isalpha(*r4->str)) { + r4->valid = false; + } else { + r4->str++; + } + r4->expr++; + + if (r4->in_block || r4->in_range || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} + +static bool r4_isrange(char *s) { + if (!isalnum(*s)) { + return false; + } + if (*(s + 1) != '-') { + return false; + } + return isalnum(*(s + 2)); +} + +static bool r4_validate_block_open(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + if (r4->valid == false) { + return false; + } + char *expr_self = r4->expr; + r4->expr++; + bool reversed = *r4->expr == '^'; + if (reversed) { + r4->expr++; + } + + bool valid_once = false; + r4->in_block = true; + while (*r4->expr != ']') { + r4->valid = true; + if (r4_isrange(r4->expr)) { + char s = *r4->expr; + char e = *(r4->expr + 2); + r4->expr += 2; + if (s > e) { + char tempc = s; + s = e; + e = tempc; + } + if (*r4->str >= s && *r4->str <= e) { + if (!reversed) { + r4->str++; + } + valid_once = true; + break; + } else { + r4->expr++; + } + } else if (r4_validate(r4)) { + valid_once = true; + if (reversed) + r4->str--; + break; + } + } + char *expr_end = strchr(r4->expr, ']'); + + r4->expr = expr_end ? expr_end : r4->expr; + r4->in_block = false; + r4->valid = expr_end && (!reversed ? valid_once : !valid_once); + r4->expr++; + r4->expr_previous = expr_self; + + if (r4->in_range || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} + +static bool r4_validate_whitespace(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->valid = strchr("\r\t \n", *r4->str) != NULL; + r4->expr++; + if (r4->valid) { + r4->str++; + } + if (r4->in_range || r4->in_block || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_not_whitespace(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->valid = strchr("\r\t \n", *r4->str) == NULL; + r4->expr++; + if (r4->valid) { + r4->str++; + } + if (r4->in_range || r4->in_block || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} + +static bool r4_validate_range(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION; + if (r4->valid == false) { + r4->expr++; + return false; + } + char *previous = r4->expr_previous; + r4->in_range = true; + r4->expr++; + unsigned int start = 0; + while (isdigit(*r4->expr)) { + start = 10 * start; + start += *r4->expr - '0'; + r4->expr++; + } + if (start != 0) + start--; + + unsigned int end = 0; + bool variable_end_range = false; + if (*r4->expr == ',') { + r4->expr++; + if (!isdigit(*r4->expr)) { + variable_end_range = true; + } + } + while (isdigit(*r4->expr)) { + end = end * 10; + end += *r4->expr - '0'; + r4->expr++; + } + r4->expr++; + + bool valid = true; + char *expr_right = r4->expr; + for (unsigned int i = 0; i < start; i++) { + r4->expr = previous; + valid = r4_validate(r4); + if (!*r4->str) + break; + if (!valid) { + break; + } + } + r4->expr = expr_right; + r4->in_range = false; + if (!r4->valid) + return false; + return r4_validate(r4); + + for (unsigned int i = start; i < end; i++) { + r4->expr = previous; + valid = r4_validate(r4); + if (!valid) { + break; + } + } + + while (variable_end_range) { + r4->in_range = false; + valid = r4_validate(r4); + r4->in_range = true; + if (valid) { + break; + } + r4->in_range = true; + valid = r4_validate(r4); + r4->in_range = false; + if (!valid) { + break; + } + } + r4->valid = valid; + + return r4_validate(r4); +} + +static bool r4_validate_group_close(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + return r4->valid; +} + +static bool r4_validate_group_open(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + char *expr_previous = r4->expr_previous; + r4->expr++; + bool save_match = r4->in_group == 0; + r4->in_group++; + char *str_extract_start = r4->str; + bool valid = r4_validate(r4); + + if (!valid || *r4->expr != ')') { + // this is a valid case if not everything between () matches + r4->in_group--; + if (save_match == false) { + r4->valid = true; + } + + // Not direct return? Not sure + return r4_validate(r4); + } + if (save_match) { + char *str_extract_end = r4->str; + unsigned int extracted_length = str_extract_end - str_extract_start; + // strlen(str_extract_start) - strlen(str_extract_end); + char *str_extracted = + (char *)calloc(sizeof(char), extracted_length + 1); + strncpy(str_extracted, str_extract_start, extracted_length); + r4_match_add(r4, str_extracted); + } + assert(*r4->expr == ')'); + r4->expr++; + r4->in_group--; + r4->expr_previous = expr_previous; + return r4_validate(r4); +} + +static bool r4_validate_slash(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + // The handling code for handling slashes is implemented in r4_validate + char *expr_previous = r4->expr_previous; + r4->expr++; + r4_function f = v4_function_map_slash[(int)*r4->expr]; + r4->expr_previous = expr_previous; + return f(r4); +} + +static void r4_match_add(r4_t *r4, char *extracted) { + r4->matches = + (char **)realloc(r4->matches, (r4->match_count + 1) * sizeof(char *)); + r4->matches[r4->match_count] = extracted; + r4->match_count++; +} + +static bool r4_validate_word_boundary_start(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (!r4->valid) { + return r4->valid; + } + r4->valid = + isalpha(*r4->str) && (r4->str == r4->_str || !isalpha(*(r4->str - 1))); + if (r4->in_range || r4->in_block || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} +static bool r4_validate_word_boundary_end(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->expr++; + if (!r4->valid) { + return r4->valid; + } + r4->valid = + isalpha(*r4->str) && (*(r4->str + 1) == 0 || !isalpha(*(r4->str + 1))); + if (r4->in_range || r4->in_block || !r4->is_greedy) { + return r4->valid; + } + return r4_validate(r4); +} + +static void v4_init_function_maps() { + if (v4_initiated) + return; + v4_initiated = true; + for (__uint8_t i = 0; i < 255; i++) { + v4_function_map_global[i] = r4_validate_literal; + v4_function_map_slash[i] = r4_validate_literal; + v4_function_map_block[i] = r4_validate_literal; + } + v4_function_map_global['*'] = r4_validate_asterisk; + v4_function_map_global['?'] = r4_validate_question_mark; + v4_function_map_global['+'] = r4_validate_plus; + v4_function_map_global['$'] = r4_validate_dollar; + v4_function_map_global['^'] = r4_validate_roof; + v4_function_map_global['.'] = r4_validate_dot; + v4_function_map_global['|'] = r4_validate_pipe; + v4_function_map_global['\\'] = r4_validate_slash; + v4_function_map_global['['] = r4_validate_block_open; + v4_function_map_global['{'] = r4_validate_range; + v4_function_map_global['('] = r4_validate_group_open; + v4_function_map_global[')'] = r4_validate_group_close; + v4_function_map_slash['b'] = r4_validate_word_boundary_start; + v4_function_map_slash['B'] = r4_validate_word_boundary_end; + v4_function_map_slash['d'] = r4_validate_digit; + v4_function_map_slash['w'] = r4_validate_word; + v4_function_map_slash['D'] = r4_validate_not_digit; + v4_function_map_slash['W'] = r4_validate_not_word; + v4_function_map_slash['s'] = r4_validate_whitespace; + v4_function_map_slash['S'] = r4_validate_not_whitespace; + v4_function_map_block['\\'] = r4_validate_slash; + + v4_function_map_block['{'] = r4_validate_range; +} + +void r4_init(r4_t *r4) { + v4_init_function_maps(); + if (r4 == NULL) + return; + r4->debug = _r4_debug; + r4->valid = true; + r4->validation_count = 0; + r4->match_count = 0; + r4->start = 0; + r4->end = 0; + r4->length = 0; + r4->matches = NULL; +} + +static bool r4_looks_behind(char c) { return strchr("?*+{", c) != NULL; } + +r4_t *r4_new() { + r4_t *r4 = (r4_t *)malloc(sizeof(r4_t)); + + r4_init(r4); + + return r4; +} + +static bool r4_pipe_next(r4_t *r4) { + char *expr = r4->expr; + while (*expr) { + if (*expr == '|') { + r4->expr = expr + 1; + r4->valid = true; + return true; + } + expr++; + } + return false; +} + +static bool r4_backtrack(r4_t *r4) { + if (_r4_debug) + printf("\033[36mDEBUG: backtrack start (%d)\n", r4->backtracking); + r4->backtracking++; + char *str = r4->str; + char *expr = r4->expr; + bool result = r4_validate(r4); + r4->backtracking--; + if (result == false) { + r4->expr = expr; + r4->str = str; + } + if (_r4_debug) + printf("DEBUG: backtrack end (%d) result: %d %s\n", r4->backtracking, + result, r4->backtracking == 0 ? "\033[0m" : ""); + return result; +} + +static bool r4_validate(r4_t *r4) { + DEBUG_VALIDATE_FUNCTION + r4->validation_count++; + char c_val = *r4->expr; + if (c_val == 0) { + return r4->valid; + } + if (!r4_looks_behind(c_val)) { + r4->expr_previous = r4->expr; + } else if (r4->expr == r4->_expr) { + // Regex may not start with a look behind ufnction + return false; + } + + if (!r4->valid && !r4_looks_behind(*r4->expr)) { + if (!r4_pipe_next(r4)) { + return false; + } + } + r4_function f; + if (r4->in_block) { + f = v4_function_map_block[(int)c_val]; + } else { + f = v4_function_map_global[(int)c_val]; + } + + r4->valid = f(r4); + return r4->valid; +} + +char *r4_get_match(r4_t *r) { + char *match = (char *)malloc(r->length + 1); + strncpy(match, r->_str + r->start, r->length); + match[r->length] = 0; + return match; +} + +static bool r4_search(r4_t *r) { + bool valid = true; + char *str_next = r->str; + while (*r->str) { + if (!(valid = r4_validate(r))) { + // Move next until we find a match + if (!r->backtracking) { + r->start++; + } + str_next++; + r->str = str_next; + r->expr = r->_expr; + r->valid = true; + } else { + /// HIGH DOUBT + if (!r->backtracking) { + // r->start = 0; + } + break; + } + } + r->valid = valid; + if (r->valid) { + r->end = strlen(r->_str) - strlen(r->str); + r->length = r->end - r->start; + r->match = r4_get_match(r); + } + return r->valid; +} + +r4_t *r4(const char *str, const char *expr) { + r4_t *r = r4_new(); + r->_str = (char *)str; + r->_expr = (char *)expr; + r->match = NULL; + r->str = r->_str; + r->expr = r->_expr; + r->str_previous = r->_str; + r->expr_previous = r->expr; + r->in_block = false; + r->is_greedy = true; + r->in_group = 0; + r->loop_count = 0; + r->backtracking = 0; + r->in_range = false; + r4_search(r); + return r; +} + +r4_t *r4_next(r4_t *r, char *expr) { + if (expr) { + r->_expr = expr; + } + r->backtracking = 0; + r->expr = r->_expr; + r->is_greedy = true; + r->in_block = false; + r->in_range = false; + r->in_group = false; + r4_free_matches(r); + r4_search(r); + return r; +} + +bool r4_match(char *str, char *expr) { + r4_t *r = r4(str, expr); + bool result = r->valid; + r4_free(r); + return result; +} +#endif \ No newline at end of file