2025-11-22 16:53:39 +01:00
|
|
|
/*
|
|
|
|
|
* Mini C Interpreter
|
|
|
|
|
* A concise recursive-descent interpreter in C.
|
|
|
|
|
* Supports: int, char*, pointers, if/else, while, functions, printf.
|
|
|
|
|
* Updates: Pointer declarations, improved printf, escape sequences.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <ctype.h>
|
2025-11-22 22:22:43 +01:00
|
|
|
#include <math.h>
|
2025-11-22 16:53:39 +01:00
|
|
|
#include <sys/socket.h>
|
|
|
|
|
#include <netinet/in.h>
|
|
|
|
|
#include <unistd.h>
|
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
|
|
|
|
|
|
#define MAX_SRC 100000
|
|
|
|
|
#define MAX_TOK 10000
|
|
|
|
|
#define VAR_MAX 500
|
|
|
|
|
#define MEM_SIZE 10000
|
2025-11-22 22:22:43 +01:00
|
|
|
#define STR_POOL_SIZE 100000
|
2025-11-22 16:53:39 +01:00
|
|
|
|
|
|
|
|
// --- Token Types ---
|
|
|
|
|
enum {
|
|
|
|
|
Num = 128, Str, Id, Int, Char, Else, If, While, Return, Printf,
|
|
|
|
|
Assign, Eq, Ne, Lt, Gt, Le, Ge, Or, And
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// --- Structures ---
|
|
|
|
|
typedef struct {
|
|
|
|
|
int type;
|
|
|
|
|
long val; // Changed to long to hold pointers on 64-bit
|
|
|
|
|
char *text;
|
|
|
|
|
} Token;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
char name[32];
|
|
|
|
|
int type;
|
|
|
|
|
int addr;
|
|
|
|
|
int is_array;
|
|
|
|
|
} Symbol;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
char name[32];
|
|
|
|
|
int entry_point;
|
|
|
|
|
int param_count;
|
|
|
|
|
} Func;
|
|
|
|
|
|
|
|
|
|
typedef long (*NativeFunc)(long*, int);
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
char name[32];
|
|
|
|
|
NativeFunc func;
|
|
|
|
|
} NativeFuncDef;
|
|
|
|
|
|
|
|
|
|
// --- Globals ---
|
|
|
|
|
Token tokens[MAX_TOK];
|
|
|
|
|
int tk_idx = 0;
|
|
|
|
|
int pc = 0;
|
|
|
|
|
long memory[MEM_SIZE]; // Changed to long
|
|
|
|
|
int sp = 0;
|
|
|
|
|
int bp = 0;
|
|
|
|
|
|
|
|
|
|
Symbol locals[VAR_MAX];
|
|
|
|
|
int loc_cnt = 0;
|
|
|
|
|
|
|
|
|
|
Func funcs[100];
|
|
|
|
|
int func_cnt = 0;
|
|
|
|
|
|
|
|
|
|
NativeFuncDef native_funcs[100];
|
|
|
|
|
int native_func_cnt = 0;
|
|
|
|
|
|
|
|
|
|
char *src_code;
|
|
|
|
|
|
2025-11-22 22:22:43 +01:00
|
|
|
char str_pool[STR_POOL_SIZE];
|
|
|
|
|
int str_pool_idx = 0;
|
|
|
|
|
|
2025-11-22 16:53:39 +01:00
|
|
|
// --- Tokenizer ---
|
|
|
|
|
void tokenize(char *src) {
|
|
|
|
|
char *s = src;
|
|
|
|
|
while (*s) {
|
|
|
|
|
if (isspace(*s)) { s++; continue; }
|
|
|
|
|
|
|
|
|
|
Token *t = &tokens[tk_idx++];
|
|
|
|
|
t->text = s;
|
|
|
|
|
|
|
|
|
|
// Comments
|
|
|
|
|
if (*s == '/' && *(s+1) == '/') {
|
|
|
|
|
while (*s && *s != '\n') s++;
|
|
|
|
|
tk_idx--;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Keywords & Identifiers
|
|
|
|
|
if (isalpha(*s)) {
|
|
|
|
|
int len = 0;
|
|
|
|
|
while (isalnum(s[len]) || s[len] == '_') len++;
|
|
|
|
|
|
|
|
|
|
char buf[32];
|
|
|
|
|
strncpy(buf, s, len); buf[len] = 0;
|
|
|
|
|
|
|
|
|
|
if (!strcmp(buf, "int")) t->type = Int;
|
|
|
|
|
else if (!strcmp(buf, "char")) t->type = Char;
|
|
|
|
|
else if (!strcmp(buf, "if")) t->type = If;
|
|
|
|
|
else if (!strcmp(buf, "else")) t->type = Else;
|
|
|
|
|
else if (!strcmp(buf, "while")) t->type = While;
|
|
|
|
|
else if (!strcmp(buf, "return")) t->type = Return;
|
|
|
|
|
else if (!strcmp(buf, "printf")) t->type = Printf;
|
|
|
|
|
else t->type = Id;
|
|
|
|
|
|
|
|
|
|
t->val = len;
|
|
|
|
|
s += len;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Numbers
|
|
|
|
|
if (isdigit(*s)) {
|
|
|
|
|
t->type = Num;
|
|
|
|
|
t->val = strtol(s, &s, 10);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Strings
|
|
|
|
|
if (*s == '"') {
|
|
|
|
|
s++;
|
|
|
|
|
t->type = Str;
|
|
|
|
|
t->text = s;
|
|
|
|
|
|
|
|
|
|
char *d = s;
|
|
|
|
|
while (*s && *s != '"') {
|
|
|
|
|
if (*s == '\\' && *(s+1) == 'n') {
|
|
|
|
|
*d++ = '\n'; s+=2;
|
|
|
|
|
} else {
|
|
|
|
|
*d++ = *s++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (*s == '"') s++;
|
|
|
|
|
*d = 0;
|
|
|
|
|
t->val = (long)(d - t->text);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Operators
|
|
|
|
|
if (!strncmp(s, "==", 2)) { t->type = Eq; s += 2; continue; }
|
|
|
|
|
if (!strncmp(s, "!=", 2)) { t->type = Ne; s += 2; continue; }
|
|
|
|
|
if (!strncmp(s, "<=", 2)) { t->type = Le; s += 2; continue; }
|
|
|
|
|
if (!strncmp(s, ">=", 2)) { t->type = Ge; s += 2; continue; }
|
|
|
|
|
if (!strncmp(s, "&&", 2)) { t->type = And; s += 2; continue; }
|
|
|
|
|
if (!strncmp(s, "||", 2)) { t->type = Or; s += 2; continue; }
|
|
|
|
|
|
|
|
|
|
if (*s == '<') { t->type = Lt; s++; continue; }
|
|
|
|
|
if (*s == '>') { t->type = Gt; s++; continue; }
|
|
|
|
|
|
|
|
|
|
t->type = *s++;
|
|
|
|
|
}
|
|
|
|
|
tokens[tk_idx].type = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Helpers ---
|
|
|
|
|
void error(char *msg) {
|
|
|
|
|
printf("Error at token %d ('%c'): %s\n", pc, tokens[pc].type, msg);
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void match(int type) {
|
|
|
|
|
if (tokens[pc].type == type) pc++;
|
|
|
|
|
else error("Unexpected token");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int find_local(char *name, int len) {
|
|
|
|
|
for (int i = loc_cnt - 1; i >= 0; i--) {
|
|
|
|
|
if (!strncmp(locals[i].name, name, len) && locals[i].name[len] == 0)
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int find_func(char *name, int len) {
|
|
|
|
|
for (int i = 0; i < func_cnt; i++) {
|
|
|
|
|
if (!strncmp(funcs[i].name, name, len) && funcs[i].name[len] == 0)
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int find_native_func(char *name, int len) {
|
|
|
|
|
for (int i = 0; i < native_func_cnt; i++) {
|
|
|
|
|
if (!strncmp(native_funcs[i].name, name, len) && native_funcs[i].name[len] == 0)
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void register_native_func(char *name, NativeFunc func) {
|
|
|
|
|
NativeFuncDef *nf = &native_funcs[native_func_cnt++];
|
|
|
|
|
strncpy(nf->name, name, 31);
|
|
|
|
|
nf->name[31] = 0;
|
|
|
|
|
nf->func = func;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-22 22:22:43 +01:00
|
|
|
// --- Forward Declarations ---
|
|
|
|
|
int is_string_ptr(long val);
|
|
|
|
|
long slice_string(long str_ptr, int start, int end);
|
|
|
|
|
|
2025-11-22 16:53:39 +01:00
|
|
|
// --- Parser ---
|
|
|
|
|
long expression();
|
|
|
|
|
void statement();
|
|
|
|
|
|
|
|
|
|
long factor() {
|
|
|
|
|
Token *t = &tokens[pc];
|
|
|
|
|
long val = 0;
|
|
|
|
|
|
|
|
|
|
if (t->type == Num) {
|
|
|
|
|
pc++;
|
|
|
|
|
return t->val;
|
|
|
|
|
}
|
|
|
|
|
else if (t->type == Str) {
|
|
|
|
|
pc++;
|
|
|
|
|
return (long)t->text;
|
|
|
|
|
}
|
|
|
|
|
else if (t->type == '(') {
|
|
|
|
|
pc++;
|
|
|
|
|
val = expression();
|
|
|
|
|
match(')');
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
else if (t->type == Id) {
|
|
|
|
|
if (tokens[pc + 1].type == '(') {
|
|
|
|
|
int nf_idx = find_native_func(t->text, t->val);
|
|
|
|
|
|
|
|
|
|
if (nf_idx != -1) {
|
|
|
|
|
pc += 2;
|
|
|
|
|
long args[10];
|
|
|
|
|
int argc = 0;
|
|
|
|
|
|
|
|
|
|
if (tokens[pc].type != ')') {
|
|
|
|
|
do {
|
|
|
|
|
args[argc++] = expression();
|
|
|
|
|
} while (tokens[pc].type == ',' && pc++);
|
|
|
|
|
}
|
|
|
|
|
match(')');
|
|
|
|
|
|
|
|
|
|
return native_funcs[nf_idx].func(args, argc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int f_idx = find_func(t->text, t->val);
|
|
|
|
|
if (f_idx == -1) error("Unknown function");
|
|
|
|
|
pc += 2;
|
|
|
|
|
|
|
|
|
|
int old_bp = bp;
|
|
|
|
|
long args[10];
|
|
|
|
|
int argc = 0;
|
|
|
|
|
|
|
|
|
|
if (tokens[pc].type != ')') {
|
|
|
|
|
do {
|
|
|
|
|
args[argc++] = expression();
|
|
|
|
|
} while (tokens[pc].type == ',' && pc++);
|
|
|
|
|
}
|
|
|
|
|
match(')');
|
|
|
|
|
|
|
|
|
|
int ret_pc = pc;
|
|
|
|
|
memory[sp] = bp; bp = sp++;
|
|
|
|
|
memory[sp++] = ret_pc;
|
|
|
|
|
for(int i=0; i<argc; i++) memory[sp++] = args[i];
|
|
|
|
|
|
|
|
|
|
pc = funcs[f_idx].entry_point;
|
|
|
|
|
statement();
|
|
|
|
|
|
|
|
|
|
extern long ax;
|
|
|
|
|
val = ax;
|
|
|
|
|
|
|
|
|
|
sp = bp;
|
|
|
|
|
bp = memory[sp];
|
|
|
|
|
pc = ret_pc;
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
int idx = find_local(t->text, t->val);
|
|
|
|
|
if (idx == -1) error("Undefined variable");
|
|
|
|
|
pc++;
|
|
|
|
|
|
|
|
|
|
Symbol *sym = &locals[idx];
|
|
|
|
|
|
|
|
|
|
if (tokens[pc].type == '[') {
|
|
|
|
|
pc++;
|
2025-11-22 22:22:43 +01:00
|
|
|
long start_or_index = expression();
|
|
|
|
|
|
|
|
|
|
if (tokens[pc].type == ':') {
|
|
|
|
|
pc++;
|
|
|
|
|
long end = expression();
|
|
|
|
|
match(']');
|
|
|
|
|
|
|
|
|
|
long val = memory[sym->addr];
|
|
|
|
|
if (is_string_ptr(val)) {
|
|
|
|
|
return slice_string(val, start_or_index, end);
|
|
|
|
|
} else {
|
|
|
|
|
error("Slicing only works on strings");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
match(']');
|
|
|
|
|
return memory[sym->addr + start_or_index];
|
|
|
|
|
}
|
2025-11-22 16:53:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (sym->is_array) {
|
|
|
|
|
return sym->addr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return memory[sym->addr];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long ax = 0;
|
|
|
|
|
|
2025-11-22 22:22:43 +01:00
|
|
|
int is_string_ptr(long val) {
|
|
|
|
|
return val > MEM_SIZE * 8;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long concat_strings(long ptr1, long ptr2) {
|
|
|
|
|
char *s1 = (char*)ptr1;
|
|
|
|
|
char *s2 = (char*)ptr2;
|
|
|
|
|
char *result = &str_pool[str_pool_idx];
|
|
|
|
|
|
|
|
|
|
int len1 = strlen(s1);
|
|
|
|
|
int len2 = strlen(s2);
|
|
|
|
|
|
|
|
|
|
if (str_pool_idx + len1 + len2 + 1 >= STR_POOL_SIZE) {
|
|
|
|
|
error("String pool overflow");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
strcpy(result, s1);
|
|
|
|
|
strcat(result, s2);
|
|
|
|
|
|
|
|
|
|
str_pool_idx += len1 + len2 + 1;
|
|
|
|
|
|
|
|
|
|
return (long)result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long slice_string(long str_ptr, int start, int end) {
|
|
|
|
|
char *str = (char*)str_ptr;
|
|
|
|
|
char *result = &str_pool[str_pool_idx];
|
|
|
|
|
int str_len = strlen(str);
|
|
|
|
|
|
|
|
|
|
if (start < 0) start = 0;
|
|
|
|
|
if (end < 0) end = str_len;
|
|
|
|
|
if (end > str_len) end = str_len;
|
|
|
|
|
if (start > end) start = end;
|
|
|
|
|
|
|
|
|
|
int length = end - start;
|
|
|
|
|
|
|
|
|
|
if (str_pool_idx + length + 1 >= STR_POOL_SIZE) {
|
|
|
|
|
error("String pool overflow");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
strncpy(result, str + start, length);
|
|
|
|
|
result[length] = 0;
|
|
|
|
|
|
|
|
|
|
str_pool_idx += length + 1;
|
|
|
|
|
return (long)result;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-22 16:53:39 +01:00
|
|
|
long unary() {
|
|
|
|
|
if (tokens[pc].type == '*') {
|
|
|
|
|
pc++;
|
|
|
|
|
int addr = unary();
|
|
|
|
|
// CAUTION: Simplified access.
|
|
|
|
|
// In real C, dereferencing char* reads a byte, int* reads int.
|
|
|
|
|
// Here, everything is 'long' cell in memory.
|
|
|
|
|
// This works for the specific request but is not byte-perfect.
|
|
|
|
|
// Since 'b' holds an address to 'src_code' (char*), reading memory[addr] is invalid
|
|
|
|
|
// if addr points to 'src_code'. 'memory' array is only for stack variables.
|
|
|
|
|
// To support reading chars from string literals:
|
|
|
|
|
|
|
|
|
|
// Hack for reading string characters:
|
|
|
|
|
// If addr is outside virtual memory range, assume it's a pointer to raw C memory (string literal)
|
|
|
|
|
// This is a bit unsafe but needed for "char *b" where b points to tokens.
|
|
|
|
|
if (addr > MEM_SIZE * 8 || addr < 0) { // Rough heuristic for external pointer
|
|
|
|
|
return *(char*)addr;
|
|
|
|
|
}
|
|
|
|
|
return memory[addr];
|
|
|
|
|
}
|
|
|
|
|
else if (tokens[pc].type == '&') {
|
|
|
|
|
pc++;
|
|
|
|
|
Token *t = &tokens[pc];
|
|
|
|
|
if (t->type != Id) error("Expected identifier after &");
|
|
|
|
|
int idx = find_local(t->text, t->val);
|
|
|
|
|
if (idx == -1) error("Undefined variable");
|
|
|
|
|
pc++;
|
|
|
|
|
return locals[idx].addr;
|
|
|
|
|
}
|
|
|
|
|
else if (tokens[pc].type == '-') {
|
|
|
|
|
pc++;
|
|
|
|
|
return -unary();
|
|
|
|
|
}
|
|
|
|
|
return factor();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long term() {
|
|
|
|
|
long val = unary();
|
|
|
|
|
while (tokens[pc].type == '*' || tokens[pc].type == '/') {
|
|
|
|
|
int op = tokens[pc++].type;
|
|
|
|
|
long val2 = unary();
|
|
|
|
|
if (op == '*') val = val * val2;
|
|
|
|
|
else val = val / val2;
|
|
|
|
|
}
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long add() {
|
|
|
|
|
long val = term();
|
|
|
|
|
while (tokens[pc].type == '+' || tokens[pc].type == '-') {
|
|
|
|
|
int op = tokens[pc++].type;
|
|
|
|
|
long val2 = term();
|
2025-11-22 22:22:43 +01:00
|
|
|
if (op == '+') {
|
|
|
|
|
if (is_string_ptr(val) && is_string_ptr(val2)) {
|
|
|
|
|
val = concat_strings(val, val2);
|
|
|
|
|
} else {
|
|
|
|
|
val = val + val2;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
val = val - val2;
|
|
|
|
|
}
|
2025-11-22 16:53:39 +01:00
|
|
|
}
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long relational() {
|
|
|
|
|
long val = add();
|
|
|
|
|
while (tokens[pc].type >= Eq && tokens[pc].type <= Ge) {
|
|
|
|
|
int op = tokens[pc++].type;
|
|
|
|
|
long val2 = add();
|
|
|
|
|
if (op == Eq) val = val == val2;
|
|
|
|
|
if (op == Ne) val = val != val2;
|
|
|
|
|
if (op == Lt) val = val < val2;
|
|
|
|
|
if (op == Gt) val = val > val2;
|
|
|
|
|
}
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long expression() {
|
|
|
|
|
// Check for pointer assignment: *ptr = val (Not fully robust for *b = 'x', but ok for int*)
|
|
|
|
|
if (tokens[pc].type == '*') {
|
|
|
|
|
int save_pc = pc;
|
|
|
|
|
unary();
|
|
|
|
|
if (tokens[pc].type == '=') {
|
|
|
|
|
pc = save_pc;
|
|
|
|
|
pc++;
|
|
|
|
|
long addr = unary();
|
|
|
|
|
match('=');
|
|
|
|
|
long val = expression();
|
|
|
|
|
if (addr >= 0 && addr < MEM_SIZE) memory[addr] = val;
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
pc = save_pc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tokens[pc].type == Id) {
|
|
|
|
|
if (tokens[pc+1].type == '[') {
|
|
|
|
|
int idx = find_local(tokens[pc].text, tokens[pc].val);
|
|
|
|
|
if (idx == -1) error("Assign to unknown var");
|
|
|
|
|
pc += 2;
|
2025-11-22 22:22:43 +01:00
|
|
|
long start_or_index = expression();
|
|
|
|
|
|
|
|
|
|
if (tokens[pc].type == ':') {
|
|
|
|
|
pc++;
|
|
|
|
|
long end = expression();
|
|
|
|
|
match(']');
|
|
|
|
|
|
|
|
|
|
long val = memory[locals[idx].addr];
|
|
|
|
|
if (is_string_ptr(val)) {
|
|
|
|
|
return slice_string(val, start_or_index, end);
|
|
|
|
|
} else {
|
|
|
|
|
error("Slicing only works on strings");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-22 16:53:39 +01:00
|
|
|
match(']');
|
|
|
|
|
int addr = locals[idx].addr;
|
|
|
|
|
if (tokens[pc].type == '=') {
|
|
|
|
|
pc++;
|
|
|
|
|
long val = expression();
|
2025-11-22 22:22:43 +01:00
|
|
|
memory[addr + start_or_index] = val;
|
2025-11-22 16:53:39 +01:00
|
|
|
return val;
|
|
|
|
|
}
|
2025-11-22 22:22:43 +01:00
|
|
|
return memory[addr + start_or_index];
|
2025-11-22 16:53:39 +01:00
|
|
|
} else if (tokens[pc+1].type == '=') {
|
|
|
|
|
int idx = find_local(tokens[pc].text, tokens[pc].val);
|
|
|
|
|
if (idx == -1) error("Assign to unknown var");
|
|
|
|
|
pc += 2;
|
|
|
|
|
long val = expression();
|
|
|
|
|
memory[locals[idx].addr] = val;
|
|
|
|
|
return val;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return relational();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void skip_block() {
|
|
|
|
|
int brace = 0;
|
|
|
|
|
do {
|
|
|
|
|
if (tokens[pc].type == '{') brace++;
|
|
|
|
|
if (tokens[pc].type == '}') brace--;
|
|
|
|
|
pc++;
|
|
|
|
|
} while (brace > 0 && tokens[pc].type != 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void statement() {
|
|
|
|
|
if (tokens[pc].type == '{') {
|
|
|
|
|
pc++;
|
|
|
|
|
while (tokens[pc].type != '}' && tokens[pc].type != 0) {
|
|
|
|
|
statement();
|
|
|
|
|
if (ax == -999) break;
|
|
|
|
|
}
|
|
|
|
|
match('}');
|
|
|
|
|
}
|
|
|
|
|
else if (tokens[pc].type == Int || tokens[pc].type == Char) {
|
|
|
|
|
pc++;
|
|
|
|
|
while (tokens[pc].type != ';') {
|
|
|
|
|
while (tokens[pc].type == '*') pc++;
|
|
|
|
|
Token *t = &tokens[pc];
|
|
|
|
|
match(Id);
|
|
|
|
|
|
|
|
|
|
int addr = sp;
|
|
|
|
|
Symbol *s = &locals[loc_cnt++];
|
|
|
|
|
strncpy(s->name, t->text, t->val); s->name[t->val] = 0;
|
|
|
|
|
s->addr = addr;
|
|
|
|
|
s->is_array = 0;
|
|
|
|
|
|
|
|
|
|
if (tokens[pc].type == '[') {
|
|
|
|
|
pc++;
|
|
|
|
|
int size = (int)expression();
|
|
|
|
|
match(']');
|
|
|
|
|
s->is_array = 1;
|
|
|
|
|
sp += size;
|
|
|
|
|
} else {
|
|
|
|
|
sp++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tokens[pc].type == '=') {
|
|
|
|
|
pc++;
|
|
|
|
|
memory[addr] = expression();
|
|
|
|
|
}
|
|
|
|
|
if (tokens[pc].type == ',') pc++;
|
|
|
|
|
}
|
|
|
|
|
match(';');
|
|
|
|
|
}
|
|
|
|
|
else if (tokens[pc].type == If) {
|
|
|
|
|
pc++;
|
|
|
|
|
match('(');
|
|
|
|
|
long cond = expression();
|
|
|
|
|
match(')');
|
|
|
|
|
if (cond) {
|
|
|
|
|
statement();
|
|
|
|
|
if (ax == -999) return;
|
|
|
|
|
if (tokens[pc].type == Else) { pc++; skip_block(); }
|
|
|
|
|
} else {
|
|
|
|
|
skip_block();
|
|
|
|
|
if (tokens[pc].type == Else) {
|
|
|
|
|
pc++;
|
|
|
|
|
statement();
|
|
|
|
|
if (ax == -999) return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (tokens[pc].type == While) {
|
|
|
|
|
pc++;
|
|
|
|
|
int loop_start = pc;
|
|
|
|
|
match('(');
|
|
|
|
|
long cond = expression();
|
|
|
|
|
match(')');
|
2025-11-22 22:22:43 +01:00
|
|
|
if (!cond) {
|
|
|
|
|
skip_block();
|
|
|
|
|
} else {
|
|
|
|
|
while (1) {
|
|
|
|
|
statement();
|
|
|
|
|
if (ax == -999) return;
|
|
|
|
|
int save_pc = pc;
|
|
|
|
|
pc = loop_start;
|
|
|
|
|
match('(');
|
|
|
|
|
cond = expression();
|
|
|
|
|
match(')');
|
|
|
|
|
if (!cond) { pc = save_pc; break; }
|
|
|
|
|
}
|
2025-11-22 16:53:39 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (tokens[pc].type == Return) {
|
|
|
|
|
pc++;
|
|
|
|
|
if (tokens[pc].type != ';') ax = expression();
|
|
|
|
|
else ax = 0;
|
|
|
|
|
match(';');
|
|
|
|
|
ax = -999;
|
|
|
|
|
}
|
|
|
|
|
else if (tokens[pc].type == Printf) {
|
|
|
|
|
pc++;
|
|
|
|
|
match('(');
|
|
|
|
|
char *fmt = tokens[pc].text; // Get raw format string
|
|
|
|
|
match(Str);
|
|
|
|
|
|
|
|
|
|
char *p = fmt;
|
|
|
|
|
while (*p) {
|
|
|
|
|
if (*p == '%' && (p[1] == 'd' || p[1] == 's')) {
|
|
|
|
|
p++; // skip %
|
|
|
|
|
match(',');
|
|
|
|
|
long val = expression();
|
|
|
|
|
if (*p == 'd') printf("%ld", val);
|
|
|
|
|
else if (*p == 's') printf("%s", (char*)val);
|
|
|
|
|
p++; // skip d or s
|
|
|
|
|
} else {
|
|
|
|
|
putchar(*p++);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
match(')');
|
|
|
|
|
match(';');
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
expression();
|
|
|
|
|
match(';');
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void scan_functions() {
|
|
|
|
|
int i = 0;
|
|
|
|
|
while (tokens[i].type != 0) {
|
|
|
|
|
// Simple scan: Type Id ( ...
|
|
|
|
|
if ((tokens[i].type == Int || tokens[i].type == Char) &&
|
|
|
|
|
tokens[i+1].type == Id && tokens[i+2].type == '(') {
|
|
|
|
|
|
|
|
|
|
Func *f = &funcs[func_cnt++];
|
|
|
|
|
Token *name = &tokens[i+1];
|
|
|
|
|
strncpy(f->name, name->text, name->val); f->name[name->val] = 0;
|
|
|
|
|
|
|
|
|
|
i += 3; // Type Id (
|
|
|
|
|
int params = 0;
|
|
|
|
|
while(tokens[i].type != ')') {
|
|
|
|
|
if (tokens[i].type == Int || tokens[i].type == Char) {
|
|
|
|
|
params++;
|
|
|
|
|
// Skip 'char * name' or 'int name'
|
|
|
|
|
i++; // Type
|
|
|
|
|
while (tokens[i].type == '*') i++;
|
|
|
|
|
if (tokens[i].type == Id) i++;
|
|
|
|
|
} else i++;
|
|
|
|
|
}
|
|
|
|
|
f->param_count = params;
|
|
|
|
|
i++;
|
|
|
|
|
f->entry_point = i;
|
|
|
|
|
|
|
|
|
|
int brace = 0;
|
|
|
|
|
do {
|
|
|
|
|
if (tokens[i].type == '{') brace++;
|
|
|
|
|
if (tokens[i].type == '}') brace--;
|
|
|
|
|
i++;
|
|
|
|
|
} while (brace > 0 && tokens[i].type != 0);
|
|
|
|
|
} else {
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_socket(long *args, int argc) {
|
|
|
|
|
int domain = (int)args[0];
|
|
|
|
|
int type = (int)args[1];
|
|
|
|
|
int protocol = (int)args[2];
|
|
|
|
|
return socket(domain, type, protocol);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_bind(long *args, int argc) {
|
|
|
|
|
int sockfd = (int)args[0];
|
|
|
|
|
int port = (int)args[1];
|
|
|
|
|
|
|
|
|
|
struct sockaddr_in addr;
|
|
|
|
|
memset(&addr, 0, sizeof(addr));
|
|
|
|
|
addr.sin_family = AF_INET;
|
|
|
|
|
addr.sin_addr.s_addr = INADDR_ANY;
|
|
|
|
|
addr.sin_port = htons(port);
|
|
|
|
|
|
|
|
|
|
return bind(sockfd, (struct sockaddr*)&addr, sizeof(addr));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_listen(long *args, int argc) {
|
|
|
|
|
int sockfd = (int)args[0];
|
|
|
|
|
int backlog = (int)args[1];
|
|
|
|
|
return listen(sockfd, backlog);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_accept(long *args, int argc) {
|
|
|
|
|
int sockfd = (int)args[0];
|
|
|
|
|
return accept(sockfd, NULL, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_recv(long *args, int argc) {
|
|
|
|
|
int sockfd = (int)args[0];
|
|
|
|
|
int addr = (int)args[1];
|
|
|
|
|
int len = (int)args[2];
|
|
|
|
|
int flags = (int)args[3];
|
|
|
|
|
|
|
|
|
|
char temp_buf[8192];
|
|
|
|
|
if (len > 8192) len = 8192;
|
|
|
|
|
|
|
|
|
|
int result = recv(sockfd, temp_buf, len, flags);
|
|
|
|
|
if (result > 0) {
|
|
|
|
|
for (int i = 0; i < result; i++) {
|
|
|
|
|
memory[addr + i] = temp_buf[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_send(long *args, int argc) {
|
|
|
|
|
int sockfd = (int)args[0];
|
|
|
|
|
long buf_arg = args[1];
|
|
|
|
|
int len = (int)args[2];
|
|
|
|
|
int flags = (int)args[3];
|
|
|
|
|
|
|
|
|
|
if (buf_arg > MEM_SIZE * 8 || buf_arg < 0) {
|
|
|
|
|
return send(sockfd, (char*)buf_arg, len, flags);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char temp_buf[8192];
|
|
|
|
|
if (len > 8192) len = 8192;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < len; i++) {
|
|
|
|
|
temp_buf[i] = (char)memory[buf_arg + i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return send(sockfd, temp_buf, len, flags);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_close(long *args, int argc) {
|
|
|
|
|
int fd = (int)args[0];
|
|
|
|
|
return close(fd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_strlen(long *args, int argc) {
|
|
|
|
|
char *str = (char*)args[0];
|
|
|
|
|
return strlen(str);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_AF_INET(long *args, int argc) {
|
|
|
|
|
return AF_INET;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_SOCK_STREAM(long *args, int argc) {
|
|
|
|
|
return SOCK_STREAM;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-22 22:22:43 +01:00
|
|
|
long native_sqrt(long *args, int argc) {
|
|
|
|
|
return (long)sqrt((double)args[0]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_pow(long *args, int argc) {
|
|
|
|
|
return (long)pow((double)args[0], (double)args[1]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_sin(long *args, int argc) {
|
|
|
|
|
return (long)(sin((double)args[0]) * 1000000);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_cos(long *args, int argc) {
|
|
|
|
|
return (long)(cos((double)args[0]) * 1000000);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_tan(long *args, int argc) {
|
|
|
|
|
return (long)(tan((double)args[0]) * 1000000);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_abs(long *args, int argc) {
|
|
|
|
|
return (long)abs((int)args[0]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_floor(long *args, int argc) {
|
|
|
|
|
return (long)floor((double)args[0]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_ceil(long *args, int argc) {
|
|
|
|
|
return (long)ceil((double)args[0]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_strpos(long *args, int argc) {
|
|
|
|
|
char *haystack = (char*)args[0];
|
|
|
|
|
char *needle = (char*)args[1];
|
|
|
|
|
char *pos = strstr(haystack, needle);
|
|
|
|
|
if (pos) {
|
|
|
|
|
return pos - haystack;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_substr(long *args, int argc) {
|
|
|
|
|
char *str = (char*)args[0];
|
|
|
|
|
int start = (int)args[1];
|
|
|
|
|
int length = (int)args[2];
|
|
|
|
|
|
|
|
|
|
char *result = &str_pool[str_pool_idx];
|
|
|
|
|
int str_len = strlen(str);
|
|
|
|
|
|
|
|
|
|
if (start < 0) start = 0;
|
|
|
|
|
if (start >= str_len) return (long)"";
|
|
|
|
|
if (start + length > str_len) length = str_len - start;
|
|
|
|
|
if (length < 0) return (long)"";
|
|
|
|
|
|
|
|
|
|
if (str_pool_idx + length + 1 >= STR_POOL_SIZE) {
|
|
|
|
|
error("String pool overflow");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
strncpy(result, str + start, length);
|
|
|
|
|
result[length] = 0;
|
|
|
|
|
|
|
|
|
|
str_pool_idx += length + 1;
|
|
|
|
|
return (long)result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_upper(long *args, int argc) {
|
|
|
|
|
char *str = (char*)args[0];
|
|
|
|
|
char *result = &str_pool[str_pool_idx];
|
|
|
|
|
int len = strlen(str);
|
|
|
|
|
|
|
|
|
|
if (str_pool_idx + len + 1 >= STR_POOL_SIZE) {
|
|
|
|
|
error("String pool overflow");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i <= len; i++) {
|
|
|
|
|
result[i] = toupper(str[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
str_pool_idx += len + 1;
|
|
|
|
|
return (long)result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_lower(long *args, int argc) {
|
|
|
|
|
char *str = (char*)args[0];
|
|
|
|
|
char *result = &str_pool[str_pool_idx];
|
|
|
|
|
int len = strlen(str);
|
|
|
|
|
|
|
|
|
|
if (str_pool_idx + len + 1 >= STR_POOL_SIZE) {
|
|
|
|
|
error("String pool overflow");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i <= len; i++) {
|
|
|
|
|
result[i] = tolower(str[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
str_pool_idx += len + 1;
|
|
|
|
|
return (long)result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_strip(long *args, int argc) {
|
|
|
|
|
char *str = (char*)args[0];
|
|
|
|
|
char *result = &str_pool[str_pool_idx];
|
|
|
|
|
|
|
|
|
|
while (*str && isspace(*str)) str++;
|
|
|
|
|
|
|
|
|
|
int len = strlen(str);
|
|
|
|
|
while (len > 0 && isspace(str[len - 1])) len--;
|
|
|
|
|
|
|
|
|
|
if (str_pool_idx + len + 1 >= STR_POOL_SIZE) {
|
|
|
|
|
error("String pool overflow");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
strncpy(result, str, len);
|
|
|
|
|
result[len] = 0;
|
|
|
|
|
|
|
|
|
|
str_pool_idx += len + 1;
|
|
|
|
|
return (long)result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_replace(long *args, int argc) {
|
|
|
|
|
char *str = (char*)args[0];
|
|
|
|
|
char *old_str = (char*)args[1];
|
|
|
|
|
char *new_str = (char*)args[2];
|
|
|
|
|
|
|
|
|
|
char *result = &str_pool[str_pool_idx];
|
|
|
|
|
char *src = str;
|
|
|
|
|
char *dst = result;
|
|
|
|
|
int old_len = strlen(old_str);
|
|
|
|
|
int new_len = strlen(new_str);
|
|
|
|
|
|
|
|
|
|
while (*src) {
|
|
|
|
|
if (strncmp(src, old_str, old_len) == 0) {
|
|
|
|
|
if (str_pool_idx + (dst - result) + new_len + strlen(src + old_len) + 1 >= STR_POOL_SIZE) {
|
|
|
|
|
error("String pool overflow");
|
|
|
|
|
}
|
|
|
|
|
strcpy(dst, new_str);
|
|
|
|
|
dst += new_len;
|
|
|
|
|
src += old_len;
|
|
|
|
|
} else {
|
|
|
|
|
*dst++ = *src++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
*dst = 0;
|
|
|
|
|
|
|
|
|
|
int total_len = dst - result;
|
|
|
|
|
str_pool_idx += total_len + 1;
|
|
|
|
|
return (long)result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_startswith(long *args, int argc) {
|
|
|
|
|
char *str = (char*)args[0];
|
|
|
|
|
char *prefix = (char*)args[1];
|
|
|
|
|
int prefix_len = strlen(prefix);
|
|
|
|
|
return strncmp(str, prefix, prefix_len) == 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long native_endswith(long *args, int argc) {
|
|
|
|
|
char *str = (char*)args[0];
|
|
|
|
|
char *suffix = (char*)args[1];
|
|
|
|
|
int str_len = strlen(str);
|
|
|
|
|
int suffix_len = strlen(suffix);
|
|
|
|
|
|
|
|
|
|
if (suffix_len > str_len) return 0;
|
|
|
|
|
return strcmp(str + str_len - suffix_len, suffix) == 0;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-22 16:53:39 +01:00
|
|
|
void register_native_functions() {
|
|
|
|
|
register_native_func("socket", native_socket);
|
|
|
|
|
register_native_func("bind", native_bind);
|
|
|
|
|
register_native_func("listen", native_listen);
|
|
|
|
|
register_native_func("accept", native_accept);
|
|
|
|
|
register_native_func("recv", native_recv);
|
|
|
|
|
register_native_func("send", native_send);
|
|
|
|
|
register_native_func("close", native_close);
|
|
|
|
|
register_native_func("strlen", native_strlen);
|
|
|
|
|
register_native_func("AF_INET", native_AF_INET);
|
|
|
|
|
register_native_func("SOCK_STREAM", native_SOCK_STREAM);
|
2025-11-22 22:22:43 +01:00
|
|
|
register_native_func("sqrt", native_sqrt);
|
|
|
|
|
register_native_func("pow", native_pow);
|
|
|
|
|
register_native_func("sin", native_sin);
|
|
|
|
|
register_native_func("cos", native_cos);
|
|
|
|
|
register_native_func("tan", native_tan);
|
|
|
|
|
register_native_func("abs", native_abs);
|
|
|
|
|
register_native_func("floor", native_floor);
|
|
|
|
|
register_native_func("ceil", native_ceil);
|
|
|
|
|
register_native_func("strpos", native_strpos);
|
|
|
|
|
register_native_func("substr", native_substr);
|
|
|
|
|
register_native_func("upper", native_upper);
|
|
|
|
|
register_native_func("lower", native_lower);
|
|
|
|
|
register_native_func("strip", native_strip);
|
|
|
|
|
register_native_func("replace", native_replace);
|
|
|
|
|
register_native_func("startswith", native_startswith);
|
|
|
|
|
register_native_func("endswith", native_endswith);
|
2025-11-22 16:53:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int main(int argc, char **argv) {
|
|
|
|
|
if (argc < 2) { printf("Usage: ./mini_c file.c\n"); return 1; }
|
|
|
|
|
|
|
|
|
|
FILE *f = fopen(argv[1], "rb");
|
|
|
|
|
if (!f) { printf("Could not open file.\n"); return 1; }
|
|
|
|
|
|
|
|
|
|
src_code = malloc(MAX_SRC);
|
|
|
|
|
size_t n = fread(src_code, 1, MAX_SRC, f);
|
|
|
|
|
src_code[n] = 0;
|
|
|
|
|
fclose(f);
|
|
|
|
|
|
|
|
|
|
register_native_functions();
|
|
|
|
|
|
|
|
|
|
tokenize(src_code);
|
|
|
|
|
scan_functions();
|
|
|
|
|
|
|
|
|
|
int main_idx = find_func("main", 4);
|
|
|
|
|
if (main_idx == -1) { printf("No main function found.\n"); return 1; }
|
|
|
|
|
|
|
|
|
|
pc = funcs[main_idx].entry_point;
|
|
|
|
|
memory[sp++] = 0;
|
|
|
|
|
memory[sp++] = 0;
|
|
|
|
|
|
|
|
|
|
ax = 0;
|
|
|
|
|
statement();
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|