/*
* Mini C Interpreter
* A concise recursive-descent interpreter in C.
* Supports: int, char*, pointers, if/else, while, functions, printf.
* Updates: Pointer declarations, improved printf, escape sequences.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <arpa/inet.h>
#define MAX_SRC 100000
#define MAX_TOK 10000
#define VAR_MAX 500
#define MEM_SIZE 10000
// --- Token Types ---
enum {
Num = 128, Str, Id, Int, Char, Else, If, While, Return, Printf,
Assign, Eq, Ne, Lt, Gt, Le, Ge, Or, And
};
// --- Structures ---
typedef struct {
int type;
long val; // Changed to long to hold pointers on 64-bit
char *text;
} Token;
typedef struct {
char name[32];
int type;
int addr;
int is_array;
} Symbol;
typedef struct {
char name[32];
int entry_point;
int param_count;
} Func;
typedef long (*NativeFunc)(long*, int);
typedef struct {
char name[32];
NativeFunc func;
} NativeFuncDef;
// --- Globals ---
Token tokens[MAX_TOK];
int tk_idx = 0;
int pc = 0;
long memory[MEM_SIZE]; // Changed to long
int sp = 0;
int bp = 0;
Symbol locals[VAR_MAX];
int loc_cnt = 0;
Func funcs[100];
int func_cnt = 0;
NativeFuncDef native_funcs[100];
int native_func_cnt = 0;
char *src_code;
// --- Tokenizer ---
void tokenize(char *src) {
char *s = src;
while (*s) {
if (isspace(*s)) { s++; continue; }
Token *t = &tokens[tk_idx++];
t->text = s;
// Comments
if (*s == '/' && *(s+1) == '/') {
while (*s && *s != '\n') s++;
tk_idx--;
continue;
}
// Keywords & Identifiers
if (isalpha(*s)) {
int len = 0;
while (isalnum(s[len]) || s[len] == '_') len++;
char buf[32];
strncpy(buf, s, len); buf[len] = 0;
if (!strcmp(buf, "int")) t->type = Int;
else if (!strcmp(buf, "char")) t->type = Char;
else if (!strcmp(buf, "if")) t->type = If;
else if (!strcmp(buf, "else")) t->type = Else;
else if (!strcmp(buf, "while")) t->type = While;
else if (!strcmp(buf, "return")) t->type = Return;
else if (!strcmp(buf, "printf")) t->type = Printf;
else t->type = Id;
t->val = len;
s += len;
continue;
}
// Numbers
if (isdigit(*s)) {
t->type = Num;
t->val = strtol(s, &s, 10);
continue;
}
// Strings
if (*s == '"') {
s++;
t->type = Str;
t->text = s;
char *d = s;
while (*s && *s != '"') {
if (*s == '\\' && *(s+1) == 'n') {
*d++ = '\n'; s+=2;
} else {
*d++ = *s++;
}
}
if (*s == '"') s++;
*d = 0;
t->val = (long)(d - t->text);
continue;
}
// Operators
if (!strncmp(s, "==", 2)) { t->type = Eq; s += 2; continue; }
if (!strncmp(s, "!=", 2)) { t->type = Ne; s += 2; continue; }
if (!strncmp(s, "<=", 2)) { t->type = Le; s += 2; continue; }
if (!strncmp(s, ">=", 2)) { t->type = Ge; s += 2; continue; }
if (!strncmp(s, "&&", 2)) { t->type = And; s += 2; continue; }
if (!strncmp(s, "||", 2)) { t->type = Or; s += 2; continue; }
if (*s == '<') { t->type = Lt; s++; continue; }
if (*s == '>') { t->type = Gt; s++; continue; }
t->type = *s++;
}
tokens[tk_idx].type = 0;
}
// --- Helpers ---
void error(char *msg) {
printf("Error at token %d ('%c'): %s\n", pc, tokens[pc].type, msg);
exit(1);
}
void match(int type) {
if (tokens[pc].type == type) pc++;
else error("Unexpected token");
}
int find_local(char *name, int len) {
for (int i = loc_cnt - 1; i >= 0; i--) {
if (!strncmp(locals[i].name, name, len) && locals[i].name[len] == 0)
return i;
}
return -1;
}
int find_func(char *name, int len) {
for (int i = 0; i < func_cnt; i++) {
if (!strncmp(funcs[i].name, name, len) && funcs[i].name[len] == 0)
return i;
}
return -1;
}
int find_native_func(char *name, int len) {
for (int i = 0; i < native_func_cnt; i++) {
if (!strncmp(native_funcs[i].name, name, len) && native_funcs[i].name[len] == 0)
return i;
}
return -1;
}
void register_native_func(char *name, NativeFunc func) {
NativeFuncDef *nf = &native_funcs[native_func_cnt++];
strncpy(nf->name, name, 31);
nf->name[31] = 0;
nf->func = func;
}
// --- Parser ---
long expression();
void statement();
long factor() {
Token *t = &tokens[pc];
long val = 0;
if (t->type == Num) {
pc++;
return t->val;
}
else if (t->type == Str) {
pc++;
return (long)t->text;
}
else if (t->type == '(') {
pc++;
val = expression();
match(')');
return val;
}
else if (t->type == Id) {
if (tokens[pc + 1].type == '(') {
int nf_idx = find_native_func(t->text, t->val);
if (nf_idx != -1) {
pc += 2;
long args[10];
int argc = 0;
if (tokens[pc].type != ')') {
do {
args[argc++] = expression();
} while (tokens[pc].type == ',' && pc++);
}
match(')');
return native_funcs[nf_idx].func(args, argc);
}
int f_idx = find_func(t->text, t->val);
if (f_idx == -1) error("Unknown function");
pc += 2;
int old_bp = bp;
long args[10];
int argc = 0;
if (tokens[pc].type != ')') {
do {
args[argc++] = expression();
} while (tokens[pc].type == ',' && pc++);
}
match(')');
int ret_pc = pc;
memory[sp] = bp; bp = sp++;
memory[sp++] = ret_pc;
for(int i=0; i<argc; i++) memory[sp++] = args[i];
pc = funcs[f_idx].entry_point;
statement();
extern long ax;
val = ax;
sp = bp;
bp = memory[sp];
pc = ret_pc;
return val;
}
else {
int idx = find_local(t->text, t->val);
if (idx == -1) error("Undefined variable");
pc++;
Symbol *sym = &locals[idx];
if (tokens[pc].type == '[') {
pc++;
long index = expression();
match(']');
return memory[sym->addr + index];
}
if (sym->is_array) {
return sym->addr;
}
return memory[sym->addr];
}
}
return 0;
}
long ax = 0;
long unary() {
if (tokens[pc].type == '*') {
pc++;
int addr = unary();
// CAUTION: Simplified access.
// In real C, dereferencing char* reads a byte, int* reads int.
// Here, everything is 'long' cell in memory.
// This works for the specific request but is not byte-perfect.
// Since 'b' holds an address to 'src_code' (char*), reading memory[addr] is invalid
// if addr points to 'src_code'. 'memory' array is only for stack variables.
// To support reading chars from string literals:
// Hack for reading string characters:
// If addr is outside virtual memory range, assume it's a pointer to raw C memory (string literal)
// This is a bit unsafe but needed for "char *b" where b points to tokens.
if (addr > MEM_SIZE * 8 || addr < 0) { // Rough heuristic for external pointer
return *(char*)addr;
}
return memory[addr];
}
else if (tokens[pc].type == '&') {
pc++;
Token *t = &tokens[pc];
if (t->type != Id) error("Expected identifier after &");
int idx = find_local(t->text, t->val);
if (idx == -1) error("Undefined variable");
pc++;
return locals[idx].addr;
}
else if (tokens[pc].type == '-') {
pc++;
return -unary();
}
return factor();
}
long term() {
long val = unary();
while (tokens[pc].type == '*' || tokens[pc].type == '/') {
int op = tokens[pc++].type;
long val2 = unary();
if (op == '*') val = val * val2;
else val = val / val2;
}
return val;
}
long add() {
long val = term();
while (tokens[pc].type == '+' || tokens[pc].type == '-') {
int op = tokens[pc++].type;
long val2 = term();
if (op == '+') val = val + val2;
else val = val - val2;
}
return val;
}
long relational() {
long val = add();
while (tokens[pc].type >= Eq && tokens[pc].type <= Ge) {
int op = tokens[pc++].type;
long val2 = add();
if (op == Eq) val = val == val2;
if (op == Ne) val = val != val2;
if (op == Lt) val = val < val2;
if (op == Gt) val = val > val2;
}
return val;
}
long expression() {
// Check for pointer assignment: *ptr = val (Not fully robust for *b = 'x', but ok for int*)
if (tokens[pc].type == '*') {
int save_pc = pc;
unary();
if (tokens[pc].type == '=') {
pc = save_pc;
pc++;
long addr = unary();
match('=');
long val = expression();
if (addr >= 0 && addr < MEM_SIZE) memory[addr] = val;
return val;
}
pc = save_pc;
}
if (tokens[pc].type == Id) {
if (tokens[pc+1].type == '[') {
int idx = find_local(tokens[pc].text, tokens[pc].val);
if (idx == -1) error("Assign to unknown var");
pc += 2;
long index = expression();
match(']');
int addr = locals[idx].addr;
if (tokens[pc].type == '=') {
pc++;
long val = expression();
memory[addr + index] = val;
return val;
}
return memory[addr + index];
} else if (tokens[pc+1].type == '=') {
int idx = find_local(tokens[pc].text, tokens[pc].val);
if (idx == -1) error("Assign to unknown var");
pc += 2;
long val = expression();
memory[locals[idx].addr] = val;
return val;
}
}
return relational();
}
void skip_block() {
int brace = 0;
do {
if (tokens[pc].type == '{') brace++;
if (tokens[pc].type == '}') brace--;
pc++;
} while (brace > 0 && tokens[pc].type != 0);
}
void statement() {
if (tokens[pc].type == '{') {
pc++;
while (tokens[pc].type != '}' && tokens[pc].type != 0) {
statement();
if (ax == -999) break;
}
match('}');
}
else if (tokens[pc].type == Int || tokens[pc].type == Char) {
pc++;
while (tokens[pc].type != ';') {
while (tokens[pc].type == '*') pc++;
Token *t = &tokens[pc];
match(Id);
int addr = sp;
Symbol *s = &locals[loc_cnt++];
strncpy(s->name, t->text, t->val); s->name[t->val] = 0;
s->addr = addr;
s->is_array = 0;
if (tokens[pc].type == '[') {
pc++;
int size = (int)expression();
match(']');
s->is_array = 1;
sp += size;
} else {
sp++;
}
if (tokens[pc].type == '=') {
pc++;
memory[addr] = expression();
}
if (tokens[pc].type == ',') pc++;
}
match(';');
}
else if (tokens[pc].type == If) {
pc++;
match('(');
long cond = expression();
match(')');
if (cond) {
statement();
if (ax == -999) return;
if (tokens[pc].type == Else) { pc++; skip_block(); }
} else {
skip_block();
if (tokens[pc].type == Else) {
pc++;
statement();
if (ax == -999) return;
}
}
}
else if (tokens[pc].type == While) {
pc++;
int loop_start = pc;
match('(');
long cond = expression();
match(')');
while (cond) {
statement();
if (ax == -999) return;
int save_pc = pc;
pc = loop_start;
match('(');
cond = expression();
match(')');
if (!cond) { pc = save_pc; break; }
}
if (!cond) skip_block();
}
else if (tokens[pc].type == Return) {
pc++;
if (tokens[pc].type != ';') ax = expression();
else ax = 0;
match(';');
ax = -999;
}
else if (tokens[pc].type == Printf) {
pc++;
match('(');
char *fmt = tokens[pc].text; // Get raw format string
match(Str);
char *p = fmt;
while (*p) {
if (*p == '%' && (p[1] == 'd' || p[1] == 's')) {
p++; // skip %
match(',');
long val = expression();
if (*p == 'd') printf("%ld", val);
else if (*p == 's') printf("%s", (char*)val);
p++; // skip d or s
} else {
putchar(*p++);
}
}
match(')');
match(';');
}
else {
expression();
match(';');
}
}
void scan_functions() {
int i = 0;
while (tokens[i].type != 0) {
// Simple scan: Type Id ( ...
if ((tokens[i].type == Int || tokens[i].type == Char) &&
tokens[i+1].type == Id && tokens[i+2].type == '(') {
Func *f = &funcs[func_cnt++];
Token *name = &tokens[i+1];
strncpy(f->name, name->text, name->val); f->name[name->val] = 0;
i += 3; // Type Id (
int params = 0;
while(tokens[i].type != ')') {
if (tokens[i].type == Int || tokens[i].type == Char) {
params++;
// Skip 'char * name' or 'int name'
i++; // Type
while (tokens[i].type == '*') i++;
if (tokens[i].type == Id) i++;
} else i++;
}
f->param_count = params;
i++;
f->entry_point = i;
int brace = 0;
do {
if (tokens[i].type == '{') brace++;
if (tokens[i].type == '}') brace--;
i++;
} while (brace > 0 && tokens[i].type != 0);
} else {
i++;
}
}
}
long native_socket(long *args, int argc) {
int domain = (int)args[0];
int type = (int)args[1];
int protocol = (int)args[2];
return socket(domain, type, protocol);
}
long native_bind(long *args, int argc) {
int sockfd = (int)args[0];
int port = (int)args[1];
struct sockaddr_in addr;
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = INADDR_ANY;
addr.sin_port = htons(port);
return bind(sockfd, (struct sockaddr*)&addr, sizeof(addr));
}
long native_listen(long *args, int argc) {
int sockfd = (int)args[0];
int backlog = (int)args[1];
return listen(sockfd, backlog);
}
long native_accept(long *args, int argc) {
int sockfd = (int)args[0];
return accept(sockfd, NULL, NULL);
}
long native_recv(long *args, int argc) {
int sockfd = (int)args[0];
int addr = (int)args[1];
int len = (int)args[2];
int flags = (int)args[3];
char temp_buf[8192];
if (len > 8192) len = 8192;
int result = recv(sockfd, temp_buf, len, flags);
if (result > 0) {
for (int i = 0; i < result; i++) {
memory[addr + i] = temp_buf[i];
}
}
return result;
}
long native_send(long *args, int argc) {
int sockfd = (int)args[0];
long buf_arg = args[1];
int len = (int)args[2];
int flags = (int)args[3];
if (buf_arg > MEM_SIZE * 8 || buf_arg < 0) {
return send(sockfd, (char*)buf_arg, len, flags);
}
char temp_buf[8192];
if (len > 8192) len = 8192;
for (int i = 0; i < len; i++) {
temp_buf[i] = (char)memory[buf_arg + i];
}
return send(sockfd, temp_buf, len, flags);
}
long native_close(long *args, int argc) {
int fd = (int)args[0];
return close(fd);
}
long native_strlen(long *args, int argc) {
char *str = (char*)args[0];
return strlen(str);
}
long native_AF_INET(long *args, int argc) {
return AF_INET;
}
long native_SOCK_STREAM(long *args, int argc) {
return SOCK_STREAM;
}
void register_native_functions() {
register_native_func("socket", native_socket);
register_native_func("bind", native_bind);
register_native_func("listen", native_listen);
register_native_func("accept", native_accept);
register_native_func("recv", native_recv);
register_native_func("send", native_send);
register_native_func("close", native_close);
register_native_func("strlen", native_strlen);
register_native_func("AF_INET", native_AF_INET);
register_native_func("SOCK_STREAM", native_SOCK_STREAM);
}
int main(int argc, char **argv) {
if (argc < 2) { printf("Usage: ./mini_c file.c\n"); return 1; }
FILE *f = fopen(argv[1], "rb");
if (!f) { printf("Could not open file.\n"); return 1; }
src_code = malloc(MAX_SRC);
size_t n = fread(src_code, 1, MAX_SRC, f);
src_code[n] = 0;
fclose(f);
register_native_functions();
tokenize(src_code);
scan_functions();
int main_idx = find_func("main", 4);
if (main_idx == -1) { printf("No main function found.\n"); return 1; }
pc = funcs[main_idx].entry_point;
memory[sp++] = 0;
memory[sp++] = 0;
ax = 0;
statement();
return 0;
}