/* * Mini C Interpreter * A concise recursive-descent interpreter in C. * Supports: int, char*, pointers, if/else, while, functions, printf. * Updates: Pointer declarations, improved printf, escape sequences. */ #include #include #include #include #include #include #include #include #define MAX_SRC 100000 #define MAX_TOK 10000 #define VAR_MAX 500 #define MEM_SIZE 10000 // --- Token Types --- enum { Num = 128, Str, Id, Int, Char, Else, If, While, Return, Printf, Assign, Eq, Ne, Lt, Gt, Le, Ge, Or, And }; // --- Structures --- typedef struct { int type; long val; // Changed to long to hold pointers on 64-bit char *text; } Token; typedef struct { char name[32]; int type; int addr; int is_array; } Symbol; typedef struct { char name[32]; int entry_point; int param_count; } Func; typedef long (*NativeFunc)(long*, int); typedef struct { char name[32]; NativeFunc func; } NativeFuncDef; // --- Globals --- Token tokens[MAX_TOK]; int tk_idx = 0; int pc = 0; long memory[MEM_SIZE]; // Changed to long int sp = 0; int bp = 0; Symbol locals[VAR_MAX]; int loc_cnt = 0; Func funcs[100]; int func_cnt = 0; NativeFuncDef native_funcs[100]; int native_func_cnt = 0; char *src_code; // --- Tokenizer --- void tokenize(char *src) { char *s = src; while (*s) { if (isspace(*s)) { s++; continue; } Token *t = &tokens[tk_idx++]; t->text = s; // Comments if (*s == '/' && *(s+1) == '/') { while (*s && *s != '\n') s++; tk_idx--; continue; } // Keywords & Identifiers if (isalpha(*s)) { int len = 0; while (isalnum(s[len]) || s[len] == '_') len++; char buf[32]; strncpy(buf, s, len); buf[len] = 0; if (!strcmp(buf, "int")) t->type = Int; else if (!strcmp(buf, "char")) t->type = Char; else if (!strcmp(buf, "if")) t->type = If; else if (!strcmp(buf, "else")) t->type = Else; else if (!strcmp(buf, "while")) t->type = While; else if (!strcmp(buf, "return")) t->type = Return; else if (!strcmp(buf, "printf")) t->type = Printf; else t->type = Id; t->val = len; s += len; continue; } // Numbers if (isdigit(*s)) { t->type = Num; t->val = strtol(s, &s, 10); continue; } // Strings if (*s == '"') { s++; t->type = Str; t->text = s; char *d = s; while (*s && *s != '"') { if (*s == '\\' && *(s+1) == 'n') { *d++ = '\n'; s+=2; } else { *d++ = *s++; } } if (*s == '"') s++; *d = 0; t->val = (long)(d - t->text); continue; } // Operators if (!strncmp(s, "==", 2)) { t->type = Eq; s += 2; continue; } if (!strncmp(s, "!=", 2)) { t->type = Ne; s += 2; continue; } if (!strncmp(s, "<=", 2)) { t->type = Le; s += 2; continue; } if (!strncmp(s, ">=", 2)) { t->type = Ge; s += 2; continue; } if (!strncmp(s, "&&", 2)) { t->type = And; s += 2; continue; } if (!strncmp(s, "||", 2)) { t->type = Or; s += 2; continue; } if (*s == '<') { t->type = Lt; s++; continue; } if (*s == '>') { t->type = Gt; s++; continue; } t->type = *s++; } tokens[tk_idx].type = 0; } // --- Helpers --- void error(char *msg) { printf("Error at token %d ('%c'): %s\n", pc, tokens[pc].type, msg); exit(1); } void match(int type) { if (tokens[pc].type == type) pc++; else error("Unexpected token"); } int find_local(char *name, int len) { for (int i = loc_cnt - 1; i >= 0; i--) { if (!strncmp(locals[i].name, name, len) && locals[i].name[len] == 0) return i; } return -1; } int find_func(char *name, int len) { for (int i = 0; i < func_cnt; i++) { if (!strncmp(funcs[i].name, name, len) && funcs[i].name[len] == 0) return i; } return -1; } int find_native_func(char *name, int len) { for (int i = 0; i < native_func_cnt; i++) { if (!strncmp(native_funcs[i].name, name, len) && native_funcs[i].name[len] == 0) return i; } return -1; } void register_native_func(char *name, NativeFunc func) { NativeFuncDef *nf = &native_funcs[native_func_cnt++]; strncpy(nf->name, name, 31); nf->name[31] = 0; nf->func = func; } // --- Parser --- long expression(); void statement(); long factor() { Token *t = &tokens[pc]; long val = 0; if (t->type == Num) { pc++; return t->val; } else if (t->type == Str) { pc++; return (long)t->text; } else if (t->type == '(') { pc++; val = expression(); match(')'); return val; } else if (t->type == Id) { if (tokens[pc + 1].type == '(') { int nf_idx = find_native_func(t->text, t->val); if (nf_idx != -1) { pc += 2; long args[10]; int argc = 0; if (tokens[pc].type != ')') { do { args[argc++] = expression(); } while (tokens[pc].type == ',' && pc++); } match(')'); return native_funcs[nf_idx].func(args, argc); } int f_idx = find_func(t->text, t->val); if (f_idx == -1) error("Unknown function"); pc += 2; int old_bp = bp; long args[10]; int argc = 0; if (tokens[pc].type != ')') { do { args[argc++] = expression(); } while (tokens[pc].type == ',' && pc++); } match(')'); int ret_pc = pc; memory[sp] = bp; bp = sp++; memory[sp++] = ret_pc; for(int i=0; itext, t->val); if (idx == -1) error("Undefined variable"); pc++; Symbol *sym = &locals[idx]; if (tokens[pc].type == '[') { pc++; long index = expression(); match(']'); return memory[sym->addr + index]; } if (sym->is_array) { return sym->addr; } return memory[sym->addr]; } } return 0; } long ax = 0; long unary() { if (tokens[pc].type == '*') { pc++; int addr = unary(); // CAUTION: Simplified access. // In real C, dereferencing char* reads a byte, int* reads int. // Here, everything is 'long' cell in memory. // This works for the specific request but is not byte-perfect. // Since 'b' holds an address to 'src_code' (char*), reading memory[addr] is invalid // if addr points to 'src_code'. 'memory' array is only for stack variables. // To support reading chars from string literals: // Hack for reading string characters: // If addr is outside virtual memory range, assume it's a pointer to raw C memory (string literal) // This is a bit unsafe but needed for "char *b" where b points to tokens. if (addr > MEM_SIZE * 8 || addr < 0) { // Rough heuristic for external pointer return *(char*)addr; } return memory[addr]; } else if (tokens[pc].type == '&') { pc++; Token *t = &tokens[pc]; if (t->type != Id) error("Expected identifier after &"); int idx = find_local(t->text, t->val); if (idx == -1) error("Undefined variable"); pc++; return locals[idx].addr; } else if (tokens[pc].type == '-') { pc++; return -unary(); } return factor(); } long term() { long val = unary(); while (tokens[pc].type == '*' || tokens[pc].type == '/') { int op = tokens[pc++].type; long val2 = unary(); if (op == '*') val = val * val2; else val = val / val2; } return val; } long add() { long val = term(); while (tokens[pc].type == '+' || tokens[pc].type == '-') { int op = tokens[pc++].type; long val2 = term(); if (op == '+') val = val + val2; else val = val - val2; } return val; } long relational() { long val = add(); while (tokens[pc].type >= Eq && tokens[pc].type <= Ge) { int op = tokens[pc++].type; long val2 = add(); if (op == Eq) val = val == val2; if (op == Ne) val = val != val2; if (op == Lt) val = val < val2; if (op == Gt) val = val > val2; } return val; } long expression() { // Check for pointer assignment: *ptr = val (Not fully robust for *b = 'x', but ok for int*) if (tokens[pc].type == '*') { int save_pc = pc; unary(); if (tokens[pc].type == '=') { pc = save_pc; pc++; long addr = unary(); match('='); long val = expression(); if (addr >= 0 && addr < MEM_SIZE) memory[addr] = val; return val; } pc = save_pc; } if (tokens[pc].type == Id) { if (tokens[pc+1].type == '[') { int idx = find_local(tokens[pc].text, tokens[pc].val); if (idx == -1) error("Assign to unknown var"); pc += 2; long index = expression(); match(']'); int addr = locals[idx].addr; if (tokens[pc].type == '=') { pc++; long val = expression(); memory[addr + index] = val; return val; } return memory[addr + index]; } else if (tokens[pc+1].type == '=') { int idx = find_local(tokens[pc].text, tokens[pc].val); if (idx == -1) error("Assign to unknown var"); pc += 2; long val = expression(); memory[locals[idx].addr] = val; return val; } } return relational(); } void skip_block() { int brace = 0; do { if (tokens[pc].type == '{') brace++; if (tokens[pc].type == '}') brace--; pc++; } while (brace > 0 && tokens[pc].type != 0); } void statement() { if (tokens[pc].type == '{') { pc++; while (tokens[pc].type != '}' && tokens[pc].type != 0) { statement(); if (ax == -999) break; } match('}'); } else if (tokens[pc].type == Int || tokens[pc].type == Char) { pc++; while (tokens[pc].type != ';') { while (tokens[pc].type == '*') pc++; Token *t = &tokens[pc]; match(Id); int addr = sp; Symbol *s = &locals[loc_cnt++]; strncpy(s->name, t->text, t->val); s->name[t->val] = 0; s->addr = addr; s->is_array = 0; if (tokens[pc].type == '[') { pc++; int size = (int)expression(); match(']'); s->is_array = 1; sp += size; } else { sp++; } if (tokens[pc].type == '=') { pc++; memory[addr] = expression(); } if (tokens[pc].type == ',') pc++; } match(';'); } else if (tokens[pc].type == If) { pc++; match('('); long cond = expression(); match(')'); if (cond) { statement(); if (ax == -999) return; if (tokens[pc].type == Else) { pc++; skip_block(); } } else { skip_block(); if (tokens[pc].type == Else) { pc++; statement(); if (ax == -999) return; } } } else if (tokens[pc].type == While) { pc++; int loop_start = pc; match('('); long cond = expression(); match(')'); while (cond) { statement(); if (ax == -999) return; int save_pc = pc; pc = loop_start; match('('); cond = expression(); match(')'); if (!cond) { pc = save_pc; break; } } if (!cond) skip_block(); } else if (tokens[pc].type == Return) { pc++; if (tokens[pc].type != ';') ax = expression(); else ax = 0; match(';'); ax = -999; } else if (tokens[pc].type == Printf) { pc++; match('('); char *fmt = tokens[pc].text; // Get raw format string match(Str); char *p = fmt; while (*p) { if (*p == '%' && (p[1] == 'd' || p[1] == 's')) { p++; // skip % match(','); long val = expression(); if (*p == 'd') printf("%ld", val); else if (*p == 's') printf("%s", (char*)val); p++; // skip d or s } else { putchar(*p++); } } match(')'); match(';'); } else { expression(); match(';'); } } void scan_functions() { int i = 0; while (tokens[i].type != 0) { // Simple scan: Type Id ( ... if ((tokens[i].type == Int || tokens[i].type == Char) && tokens[i+1].type == Id && tokens[i+2].type == '(') { Func *f = &funcs[func_cnt++]; Token *name = &tokens[i+1]; strncpy(f->name, name->text, name->val); f->name[name->val] = 0; i += 3; // Type Id ( int params = 0; while(tokens[i].type != ')') { if (tokens[i].type == Int || tokens[i].type == Char) { params++; // Skip 'char * name' or 'int name' i++; // Type while (tokens[i].type == '*') i++; if (tokens[i].type == Id) i++; } else i++; } f->param_count = params; i++; f->entry_point = i; int brace = 0; do { if (tokens[i].type == '{') brace++; if (tokens[i].type == '}') brace--; i++; } while (brace > 0 && tokens[i].type != 0); } else { i++; } } } long native_socket(long *args, int argc) { int domain = (int)args[0]; int type = (int)args[1]; int protocol = (int)args[2]; return socket(domain, type, protocol); } long native_bind(long *args, int argc) { int sockfd = (int)args[0]; int port = (int)args[1]; struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = INADDR_ANY; addr.sin_port = htons(port); return bind(sockfd, (struct sockaddr*)&addr, sizeof(addr)); } long native_listen(long *args, int argc) { int sockfd = (int)args[0]; int backlog = (int)args[1]; return listen(sockfd, backlog); } long native_accept(long *args, int argc) { int sockfd = (int)args[0]; return accept(sockfd, NULL, NULL); } long native_recv(long *args, int argc) { int sockfd = (int)args[0]; int addr = (int)args[1]; int len = (int)args[2]; int flags = (int)args[3]; char temp_buf[8192]; if (len > 8192) len = 8192; int result = recv(sockfd, temp_buf, len, flags); if (result > 0) { for (int i = 0; i < result; i++) { memory[addr + i] = temp_buf[i]; } } return result; } long native_send(long *args, int argc) { int sockfd = (int)args[0]; long buf_arg = args[1]; int len = (int)args[2]; int flags = (int)args[3]; if (buf_arg > MEM_SIZE * 8 || buf_arg < 0) { return send(sockfd, (char*)buf_arg, len, flags); } char temp_buf[8192]; if (len > 8192) len = 8192; for (int i = 0; i < len; i++) { temp_buf[i] = (char)memory[buf_arg + i]; } return send(sockfd, temp_buf, len, flags); } long native_close(long *args, int argc) { int fd = (int)args[0]; return close(fd); } long native_strlen(long *args, int argc) { char *str = (char*)args[0]; return strlen(str); } long native_AF_INET(long *args, int argc) { return AF_INET; } long native_SOCK_STREAM(long *args, int argc) { return SOCK_STREAM; } void register_native_functions() { register_native_func("socket", native_socket); register_native_func("bind", native_bind); register_native_func("listen", native_listen); register_native_func("accept", native_accept); register_native_func("recv", native_recv); register_native_func("send", native_send); register_native_func("close", native_close); register_native_func("strlen", native_strlen); register_native_func("AF_INET", native_AF_INET); register_native_func("SOCK_STREAM", native_SOCK_STREAM); } int main(int argc, char **argv) { if (argc < 2) { printf("Usage: ./mini_c file.c\n"); return 1; } FILE *f = fopen(argv[1], "rb"); if (!f) { printf("Could not open file.\n"); return 1; } src_code = malloc(MAX_SRC); size_t n = fread(src_code, 1, MAX_SRC, f); src_code[n] = 0; fclose(f); register_native_functions(); tokenize(src_code); scan_functions(); int main_idx = find_func("main", 4); if (main_idx == -1) { printf("No main function found.\n"); return 1; } pc = funcs[main_idx].entry_point; memory[sp++] = 0; memory[sp++] = 0; ax = 0; statement(); return 0; }