diff --git a/Makefile b/Makefile index 49e0b92..e3b9037 100644 --- a/Makefile +++ b/Makefile @@ -65,4 +65,11 @@ docker_run: docker run -v .:/app --rm -it r build_deb: - dpkg-deb --build r_package \ No newline at end of file + dpkg-deb --build r_package + +# --- RAG Side Project --- +rag_test: rag_test.c rag.c db_utils.c rag.h db_utils.h + $(CC) -o rag_test rag_test.c rag.c db_utils.c -lsqlite3 -ljson-c + +run_rag_test: rag_test + ./rag_test diff --git a/main.c b/main.c index ed0bc46..e43045b 100644 --- a/main.c +++ b/main.c @@ -1,12 +1,4 @@ #include "r.h" -#include -#include -#include -#include -#include -#include -#include - #include "db_utils.h" #include "line.h" #include "markdown.h" @@ -14,255 +6,231 @@ #include "tools.h" #include "utils.h" +#include +#include +#include +#include +#include +#include +#include +#include + volatile sig_atomic_t sigint_count = 0; time_t first_sigint_time = 0; bool SYNTAX_HIGHLIGHT_ENABLED = true; bool API_MODE = false; -void help(); -void render(const char *); -bool openai_include(const char *); -char *strreplace(const char *, const char *, const char *); +static void render(const char *content); +static bool openai_include(const char *path); +static char *get_prompt_from_stdin(char *prompt); +static char *get_prompt_from_args(int argc, char **argv); +static bool try_prompt(int argc, char *argv[]); +static void repl(void); +static void init(void); +static void handle_sigint(int sig); -char *get_prompt_from_stdin(char *prompt) { - int index = 0; - char c; - while ((c = getchar()) != EOF) { - prompt[index++] = c; - } - prompt[index] = '\0'; - return prompt; +static char *get_prompt_from_stdin(char *prompt) { + int index = 0; + int c; + while ((c = getchar()) != EOF) { + prompt[index++] = (char)c; + } + prompt[index] = '\0'; + return prompt; } -char *get_prompt_from_args(int argc, char **argv) { - char *prompt = malloc(10 * 1024 * 1024 + 1); - char *system = malloc(1024 * 1024); - if (!prompt || !system) { - fprintf(stderr, "Error: Memory allocation failed.\n"); - free(prompt); - free(system); - return NULL; - } - - bool get_from_std_in = false; - - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "--stdin") == 0) { - fprintf(stderr, "Reading from stdin.\n"); - get_from_std_in = true; - } else if (strcmp(argv[i], "--verbose") == 0) { - is_verbose = true; - } else if (strcmp(argv[i], "--py") == 0 && i + 1 < argc) { - char *py_file_path = expand_home_directory(argv[++i]); - fprintf(stderr, "Including \"%s\".\n", py_file_path); - openai_include(py_file_path); - free(py_file_path); - } else if (strcmp(argv[i], "--free") == 0) { - auth_free(); - } else if (strcmp(argv[i], "--context") == 0 && i + 1 < argc) { - char *context_file_path = argv[++i]; - fprintf(stderr, "Including \"%s\".\n", context_file_path); - openai_include(context_file_path); - } else if (strcmp(argv[i], "--api") == 0) { - API_MODE = true; - } else if (strcmp(argv[i], "--nh") == 0) { - SYNTAX_HIGHLIGHT_ENABLED = false; - fprintf(stderr, "Syntax highlighting disabled.\n"); - } else { - strcat(system, argv[i]); - strcat(system, (i < argc - 1) ? " " : "."); +static char *get_prompt_from_args(int argc, char **argv) { + char *prompt = malloc(10 * 1024 * 1024 + 1); + char *system = malloc(1024 * 1024); + if (!prompt || !system) { + fprintf(stderr, "Error: Memory allocation failed.\n"); + free(prompt); + free(system); + return NULL; } - } - if (get_from_std_in) { - if (*system) - openai_system(system); - prompt = get_prompt_from_stdin(prompt); - } else { - free(prompt); - prompt = system; - } + bool get_from_std_in = false; - if (!*prompt) { - free(prompt); - return NULL; - } - return prompt; -} - -bool try_prompt(int argc, char *argv[]) { - char *prompt = get_prompt_from_args(argc, argv); - if (prompt) { - char *response = openai_chat("user", prompt); - if (!response) { - printf("Could not get response from server\n"); - free(prompt); - return false; - } - render(response); - free(response); - free(prompt); - return true; - } - return false; -} - -char **get_parameters(const char *content, const char *delimiter) { - char *start = NULL; - char **parameters = NULL; - int count = 0; - - while ((start = strstr(content, delimiter)) != NULL) { - start += 3; - char *end = strstr(start, delimiter); - char *parameter = malloc(end - start + 1); - - memcpy(parameter, start, end - start); - parameter[end - start] = '\0'; - - content = end + 3; - count++; - parameters = realloc(parameters, sizeof(char *) * (count + 1)); - parameters[count - 1] = parameter; - parameters[count] = NULL; - } - - return parameters; -} - -void render(const char *content) { - if (SYNTAX_HIGHLIGHT_ENABLED) { - parse_markdown_to_ansi(content); - } else { - printf("%s", content); - } -} - -void repl() { - line_init(); - char *line = NULL; - - while (true) { - line = line_read("> "); - if (!line || !*line) - continue; - - if (!strncmp(line, "!dump", 5)) { - printf("%s\n", message_json()); - continue; - } - if (!strncmp(line, "!verbose", 8)) { - is_verbose = !is_verbose; - fprintf(stderr, "%s\n", - is_verbose ? "Verbose mode enabled" : "Verbose mode disabled"); - continue; - } - if (line && *line != '\n') - line_add_history(line); - if (!strncmp(line, "!tools", 6)) { - printf("Available tools: %s\n", - json_object_to_json_string(tools_descriptions())); - continue; - } - if (!strncmp(line, "!models", 7)) { - printf("Current model: %s\n", openai_fetch_models()); - continue; - } - if (!strncmp(line, "!model", 6)) { - if (line[6] == ' ') { - set_prompt_model(line + 7); - } - printf("Current model: %s\n", get_prompt_model()); - continue; - } - if (!strncmp(line, "exit", 4)) - exit(0); - - while (line && *line != '\n') { - char *response = openai_chat("user", line); - if (response) { - render(response); - printf("\n"); - if (strstr(response, "_STEP_")) { - line = "continue"; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--stdin") == 0) { + fprintf(stderr, "Reading from stdin.\n"); + get_from_std_in = true; + } else if (strcmp(argv[i], "--verbose") == 0) { + is_verbose = true; + } else if (strcmp(argv[i], "--py") == 0 && i + 1 < argc) { + char *py_file_path = expand_home_directory(argv[++i]); + fprintf(stderr, "Including \"%s\".\n", py_file_path); + openai_include(py_file_path); + free(py_file_path); + } else if (strcmp(argv[i], "--free") == 0) { + auth_free(); + } else if (strcmp(argv[i], "--context") == 0 && i + 1 < argc) { + char *context_file_path = argv[++i]; + fprintf(stderr, "Including \"%s\".\n", context_file_path); + openai_include(context_file_path); + } else if (strcmp(argv[i], "--api") == 0) { + API_MODE = true; + } else if (strcmp(argv[i], "--nh") == 0) { + SYNTAX_HIGHLIGHT_ENABLED = false; + fprintf(stderr, "Syntax highlighting disabled.\n"); } else { - line = NULL; + strcat(system, argv[i]); + strcat(system, (i < argc - 1) ? " " : "."); } - free(response); - } else { - exit(0); - } } - } -} -char *strreplace(const char *content, const char *what, const char *with) { - char *pos = strstr(content, what); - if (!pos) - return strdup(content); - - size_t result_size = strlen(content) + strlen(with) - strlen(what) + 1; - char *result = malloc(result_size); - snprintf(result, result_size, "%.*s%s%s", (int)(pos - content), content, with, - pos + strlen(what)); - return result; -} - -bool openai_include(const char *path) { - char *file_content = read_file(path); - if (!file_content) - return false; - - openai_system(file_content); - free(file_content); - return true; -} - -void init() { - setbuf(stdout, NULL); - line_init(); - auth_init(); - db_initialize(); - char *schema = db_get_schema(); - char payload[1024 * 1024] = {0}; - snprintf(payload, sizeof(payload), - "Your have a database that you can mutate using the query tool and " - "the get and set tool. This is the schema in json format: %s. " - "Dialect is sqlite.", - schema); - free(schema); - - fprintf(stderr, "Loading... 4e6"); - openai_system(payload); - if (!openai_include(".rcontext.txt")) { - openai_include("~/.rcontext.txt"); - } - fprintf(stderr, "\r \r"); -} - -void handle_sigint(int sig) { - time_t current_time = time(NULL); - printf("\n"); - if (sigint_count == 0) { - first_sigint_time = current_time; - sigint_count++; - } else { - if (difftime(current_time, first_sigint_time) <= 1) { - exit(0); + if (get_from_std_in) { + if (*system) + openai_system(system); + prompt = get_prompt_from_stdin(prompt); } else { - sigint_count = 1; - first_sigint_time = current_time; + free(prompt); + prompt = system; + } + + if (!*prompt) { + free(prompt); + return NULL; + } + return prompt; +} + +static bool try_prompt(int argc, char *argv[]) { + char *prompt = get_prompt_from_args(argc, argv); + if (prompt) { + char *response = openai_chat("user", prompt); + if (!response) { + printf("Could not get response from server\n"); + free(prompt); + return false; + } + render(response); + free(response); + free(prompt); + return true; + } + return false; +} + +static bool openai_include(const char *path) { + char *file_content = read_file(path); + if (!file_content) + return false; + + openai_system(file_content); + free(file_content); + return true; +} + +static void render(const char *content) { + if (SYNTAX_HIGHLIGHT_ENABLED) { + parse_markdown_to_ansi(content); + } else { + printf("%s", content); + } +} + +static void repl(void) { + line_init(); + char *line = NULL; + + while (true) { + line = line_read("> "); + if (!line || !*line) + continue; + + if (!strncmp(line, "!dump", 5)) { + printf("%s\n", message_json()); + continue; + } + if (!strncmp(line, "!verbose", 8)) { + is_verbose = !is_verbose; + fprintf(stderr, "%s\n", is_verbose ? "Verbose mode enabled" : "Verbose mode disabled"); + continue; + } + if (line && *line != '\n') + line_add_history(line); + if (!strncmp(line, "!tools", 6)) { + printf("Available tools: %s\n", json_object_to_json_string(tools_descriptions())); + continue; + } + if (!strncmp(line, "!models", 7)) { + printf("Current model: %s\n", openai_fetch_models()); + continue; + } + if (!strncmp(line, "!model", 6)) { + if (line[6] == ' ') { + set_prompt_model(line + 7); + } + printf("Current model: %s\n", get_prompt_model()); + continue; + } + if (!strncmp(line, "exit", 4)) + exit(0); + + while (line && *line != '\n') { + char *response = openai_chat("user", line); + if (response) { + render(response); + printf("\n"); + if (strstr(response, "_STEP_")) { + line = "continue"; + } else { + line = NULL; + } + free(response); + } else { + exit(0); + } + } + } +} + +static void init(void) { + setbuf(stdout, NULL); + line_init(); + auth_init(); + db_initialize(); + char *schema = db_get_schema(); + char payload[1024 * 1024] = {0}; + snprintf(payload, sizeof(payload), + "Your have a database that you can mutate using the query tool and " + "the get and set tool. This is the schema in json format: %s. " + "Dialect is sqlite.", + schema); + free(schema); + + fprintf(stderr, "Loading... 📨"); + openai_system(payload); + if (!openai_include(".rcontext.txt")) { + openai_include("~/.rcontext.txt"); + } + fprintf(stderr, "\r \r"); +} + +static void handle_sigint(int sig) { + time_t current_time = time(NULL); + printf("\n"); + if (sigint_count == 0) { + first_sigint_time = current_time; + sigint_count++; + } else { + if (difftime(current_time, first_sigint_time) <= 1) { + exit(0); + } else { + sigint_count = 1; + first_sigint_time = current_time; + } } - } } int main(int argc, char *argv[]) { - signal(SIGINT, handle_sigint); + signal(SIGINT, handle_sigint); - init(); - if (try_prompt(argc, argv)) + init(); + if (try_prompt(argc, argv)) + return 0; + + repl(); return 0; - - repl(); - return 0; } diff --git a/rpylib.so b/rpylib.so index 99c3529..eedf29d 100755 Binary files a/rpylib.so and b/rpylib.so differ diff --git a/tools.h b/tools.h index 96c4b1c..62fbc21 100644 --- a/tools.h +++ b/tools.h @@ -49,6 +49,15 @@ struct json_object *tool_description_db_get(); struct json_object *tool_description_web_search_news(); struct json_object *tool_description_web_search(); struct json_object *tool_description_mkdir(); +struct json_object *tool_description_python_execute(); + +struct json_object *tool_description_rag_search(); +struct json_object *tool_description_rag_chunk(); +char *tool_function_rag_search(char *query, int top_k); +char *tool_function_rag_chunk(char *file_path); + + +char *tool_function_python_execute(char *source_code); struct json_object *tools_descriptions() { struct json_object *root = json_object_new_array(); @@ -68,6 +77,8 @@ struct json_object *tools_descriptions() { json_object_array_add(root, tool_description_web_search_news()); json_object_array_add(root, tool_description_web_search()); json_object_array_add(root, tool_description_mkdir()); + json_object_array_add(root, tool_description_python_execute()); + return root; } @@ -411,6 +422,86 @@ char *tool_function_linux_terminal_interactive(char *command) { return result; } +// ---- PYTHON EXECUTE TOOL ---- +char *tool_function_python_execute(char *source_code) { + char tmp_file[] = "/tmp/r_python_tool_XXXXXX.py"; + int fd = mkstemps(tmp_file, 3); // 3 for ".py" + if (fd == -1) { + return strdup("Failed to create temporary file for Python code."); + } + FILE *fp = fdopen(fd, "w"); + if (!fp) { + close(fd); + return strdup("Failed to open temporary file for writing."); + } + fwrite(source_code, 1, strlen(source_code), fp); + fclose(fp); + + char command[4096]; + snprintf(command, sizeof(command), "python3 '%s' 2>&1", tmp_file); + + FILE *proc = popen(command, "r"); + if (!proc) { + unlink(tmp_file); + return strdup("Failed to execute python3."); + } + char buffer[1024]; + size_t total = 0; + char *output = NULL; + while (fgets(buffer, sizeof(buffer), proc)) { + size_t len = strlen(buffer); + char *new_output = realloc(output, total + len + 1); + if (!new_output) { + free(output); + pclose(proc); + unlink(tmp_file); + return strdup("Memory allocation failed."); + } + output = new_output; + strcpy(output + total, buffer); + total += len; + } + if (output) + output[total] = 0; + else + output = strdup(""); + pclose(proc); + unlink(tmp_file); + return output; +} + +struct json_object *tool_description_python_execute() { + struct json_object *root = json_object_new_object(); + json_object_object_add(root, "type", json_object_new_string("function")); + + struct json_object *function = json_object_new_object(); + json_object_object_add(function, "name", json_object_new_string("python_execute")); + json_object_object_add(function, "description", json_object_new_string("Executes Python source code using the python3 interpreter and returns stdout/stderr.")); + + struct json_object *parameters = json_object_new_object(); + json_object_object_add(parameters, "type", json_object_new_string("object")); + + struct json_object *properties = json_object_new_object(); + struct json_object *source = json_object_new_object(); + json_object_object_add(source, "type", json_object_new_string("string")); + json_object_object_add(source, "description", json_object_new_string("Python source code to execute.")); + json_object_object_add(properties, "source", source); + + json_object_object_add(parameters, "properties", properties); + + struct json_object *required = json_object_new_array(); + json_object_array_add(required, json_object_new_string("source")); + json_object_object_add(parameters, "required", required); + + json_object_object_add(parameters, "additionalProperties", json_object_new_boolean(0)); + json_object_object_add(function, "parameters", parameters); + + json_object_object_add(root, "function", function); + + return root; +} +// ---- END PYTHON EXECUTE TOOL ---- + char *tool_function_getpwd() { char *cwd = (char *)malloc(PATH_MAX); if (cwd == NULL) { @@ -555,10 +646,9 @@ struct json_object *tool_description_linux_terminal_interactive() { struct json_object *properties = json_object_new_object(); struct json_object *path = json_object_new_object(); json_object_object_add(path, "type", json_object_new_string("string")); - json_object_object_add( - path, "description", - json_object_new_string( - "Executable with parameters to execute interactively.")); + json_object_object_add(path, "description", + json_object_new_string( + "Executable with parameters to execute interactively.")); json_object_object_add(properties, "command", path); json_object_object_add(parameters, "properties", properties); @@ -725,7 +815,10 @@ struct json_object *tool_description_write_file() { } char *tool_function_index_source_directory(char *path) { - return index_directory(path); + char * result = index_directory(path); + if(!result) + return strdup("Failed to index directory!"); + return result; } char *tool_function_mkdir(char *path); @@ -1113,29 +1206,33 @@ struct json_object *tool_description_linux_terminal() { } char *tool_function_mkdir(char *path) { - char temp[2048]; - char *p = NULL; - size_t len; + char temp[2048]; + char *p = NULL; + size_t len; - snprintf(temp, sizeof(temp), "%s", path); - len = strlen(temp); + snprintf(temp, sizeof(temp), "%s", path); + len = strlen(temp); - if (temp[len - 1] == '/') { - temp[len - 1] = '\0'; - } - - for (p = temp + 1; *p; p++) { - if (*p == '/') { - *p = '\0'; - if (mkdir(temp, 0777) != 0 && errno != EEXIST) { - return strdup("Failed to create directory!"); - } - *p = '/'; + if (temp[len - 1] == '/') { + temp[len - 1] = '\0'; } - } - return strdup("Directory successfully created."); + + for (p = temp + 1; *p; p++) { + if (*p == '/') { + *p = '\0'; + if (mkdir(temp, 0777) != 0 && errno != EEXIST) { + return strdup("Failed to create directory!"); + } + *p = '/'; + } + } + if (mkdir(temp, 0777) != 0 && errno != EEXIST) { + return strdup("Failed to create directory!"); + } + return strdup("Directory successfully created."); } + struct json_object *tool_description_mkdir() { struct json_object *root = json_object_new_object(); json_object_object_add(root, "type", json_object_new_string("function")); @@ -1462,6 +1559,18 @@ struct json_object *tools_execute(struct json_object *tools_array) { free(mkdir_result); } } + } else if (!strcmp(function_name, "python_execute")) { + struct json_object *arguments_obj; + if (json_object_object_get_ex(function_obj, "arguments", &arguments_obj)) { + struct json_object *arguments = json_tokener_parse(json_object_get_string(arguments_obj)); + struct json_object *source_obj; + if (json_object_object_get_ex(arguments, "source", &source_obj)) { + char *source = (char *)json_object_get_string(source_obj); + char *result = tool_function_python_execute(source); + json_object_object_add(tool_result, "content", json_object_new_string(result)); + free(result); + } + } } else { fprintf(stderr, "Unknown function: %s\n", function_name); json_object_object_add(