#include "rlib.h" #include "rrex.h" #include typedef struct rrex_compiler_t { int previous_method; char *previous_method_start; char *bdata; char *rdata; } rrex_compiler_t; void compile_one(rrex_compiler_t *compiler, char **content, char **compiled, int *indexp); void rrex_compile(char *content, char *compiled); int test_compiler(); int convert_bt(size_t i); char *format_bc(char *code); void print_bc(char *code); int test_compile(char *s, char *r); void rexx_init_compiler(rrex_compiler_t *c, char *rdata, char *bdata) { memset(c, 0, sizeof(rrex_compiler_t)); c->rdata = rdata; c->bdata = bdata; c->previous_method = 0; c->previous_method_start = rdata; } typedef enum reg_new_t { RN_LITERAL = 1, RN_DRANGE = 2, RN_ARANGE = 3, RN_IGNORE = 4, RN_REPEAT = 5, RN_FUNCTION = 6, RN_DOT, RN_ROOF, RN_CHOICE_START, RN_CHOICE_END, RN_WHITESPACE, RN_SLASH_CD, RN_SLASH_CW, RN_PLUS, RN_DOLLAR, RN_ASTERISK, RN_GROUP_START, RN_GROUP_END, RN_PIPE, RN_QUESTION, RN_DIGIT, RN_ALPHA } reg_new_t; int convert_bt(size_t i) { char chars[] = "lRRirf.^[]wDW+$*()|?da"; if (i < strlen(chars) + 1) // Index starts at 1 return chars[i - 1]; return i; } char *format_bc(char *code) { static char result[50000]; result[0] = 0; char value; int type = 0; for (size_t i = 0; i < strlen(code); i++) { type = 0; value = code[i]; if (i && (code[i - 1] == 1)) { type = 1; // no byte } else if (code[i - 1] == RN_REPEAT) { type = 2; // int } else { type = 0; // byte } char chunk[10]; chunk[0] = 0; if (type == 0) { sprintf(chunk, "%c", convert_bt(value)); } else if (type == 2) sprintf(chunk, "%d", value); else sprintf(chunk, "(%c)", value); strcat(result, chunk); } return result; } void print_bc(char *code) { char *human_readable = format_bc(code); printf("%s", human_readable); } void compile_one(rrex_compiler_t *compiler, char **content, char **compiled, int *indexp) { char *r = *content; char *c = *compiled; int index = *indexp; if (*r == '*') { compiler->previous_method_start = r; c[index] = RN_ASTERISK; index++; r++; } else if (*r == '\\') { r++; if (*r == 'd') { compiler->previous_method_start = r - 1; c[index] = RN_DIGIT; index++; r++; } else if (*r == 'w') { compiler->previous_method_start = r - 1; c[index] = RN_ALPHA; index++; r++; } else if (*r == 's') { compiler->previous_method_start = r - 1; c[index] = RN_WHITESPACE; index++; r++; } else if (*r == 'D') { compiler->previous_method_start = r - 1; c[index] = RN_SLASH_CD; index++; r++; } else if (*r == 'W') { compiler->previous_method_start = r - 1; c[index] = RN_SLASH_CW; index++; r++; } else { compiler->previous_method_start = r - 1; c[index] = *r; index++; r++; } } else if (*r == '$') { compiler->previous_method_start = r; c[index] = RN_DOLLAR; index++; r++; } else if (*r == '(') { char *choice_start = r; r++; c[index] = RN_GROUP_START; index++; while (*r != ')') { compile_one(compiler, &r, &c, &index); } compiler->previous_method_start = choice_start; c[index] = RN_GROUP_END; index++; r++; } else if (*r == '|') { compiler->previous_method_start = r; c[index] = RN_PIPE; index++; r++; } else if (*r == '?') { r++; if (index) { char buff_r[1024] = {0}; char *br = buff_r; char *br_start = br; char *first_position = compiler->previous_method_start; char *rindex = first_position; while (rindex != r - 1) { *br = *rindex; br++; *br = 0; rindex++; } br = br_start; char buff_b[1024] = {0}; char *bc = buff_b; char *bc_start = buff_b; int indexb = 0; compile_one(compiler, &br, &bc, &indexb); bc = bc_start; index -= strlen(bc); c[index] = RN_QUESTION; index++; while (*bc) { c[index] = *bc; index++; bc++; } compiler->previous_method_start = r - 1; } } else if (isalpharange(r) || isdigitrange(r)) { compiler->previous_method_start = r; c[index] = isalpha(*r) ? RN_ARANGE : RN_DRANGE; index++; c[index] = *r; index++; r += 2; c[index] = *r; index++; r++; } else if (*r == '.') { compiler->previous_method_start = r; c[index] = RN_DOT; index++; r++; } else if (*r == '^') { compiler->previous_method_start = r; c[index] = RN_ROOF; index++; r++; } else if (*r == '[') { char *choice_start = r; r++; c[index] = RN_CHOICE_START; index++; while (*r != ']') { compile_one(compiler, &r, &c, &index); } compiler->previous_method_start = choice_start; c[index] = RN_CHOICE_END; index++; r++; } else if (*r == '+') { compiler->previous_method_start = r; r++; c[index] = RN_PLUS; index++; } else if (*r == '{') { r++; char *to_repeat = compiler->previous_method_start; // r - 2; compiler->previous_method_start = r; char *to_repeat_end = r - 2; if (isgrouping(to_repeat)) { char begin_chr = groupcreverse(*(r - 2)); while (*to_repeat != begin_chr) to_repeat--; to_repeat--; } else { to_repeat--; } int times = *r - '0'; r++; while (isdigit(*r)) { times *= 10; times += *r - '0'; r++; } for (int i = 0; i < times - 1; i++) { char *repeat_index = to_repeat + 1; while (repeat_index <= to_repeat_end) { compile_one(compiler, &repeat_index, &c, &index); } } r++; } else { compiler->previous_method_start = r; c[index] = *r; index++; r++; } c[index] = 0; *indexp = index; *content = r; *compiled = c; } void rrex_compile(char *content, char *compiled) { rrex_compiler_t compiler; rexx_init_compiler(&compiler, content, compiled); char *r = content; int index = 0; while (*r) { compile_one(&compiler, &r, &compiled, &index); } compiled[index] = 0; } int test_compile(char *s, char *r) { char compiled[50000]; memset(compiled, 0, sizeof(compiled)); rrex_compile(s, compiled); char *human_format = format_bc(compiled); bool result = !strcmp(r, human_format); rassert(result); return result; } void rrex_compiler_tests() { rtest_banner("rrex compiler"); test_compile("\\W\\w\\d\\D", "WadD"); test_compile("0-9", "R09"); test_compile("a-z", "Raz"); test_compile("0-9a-z", "R09Raz"); test_compile("0-9A-Z", "R09RAZ"); test_compile("^12^3", "^12^3"); test_compile("3{1}", "3"); test_compile("3{2}", "33"); test_compile("[123]{1}", "[123]"); test_compile("[123]{2}", "[123][123]"); test_compile("[123]{3}$", "[123][123][123]$"); test_compile("(123){3}$", "(123)(123)(123)$"); }