Update indexer.h

This commit is contained in:
retoor 2025-03-28 23:22:57 +01:00
parent 619b231e8d
commit e0c4d650c6
4 changed files with 268 additions and 442 deletions

201
indexer.h
View File

@ -7,151 +7,98 @@
#include <sys/stat.h>
#include <time.h>
#include <unistd.h>
#define MAX_FILES 20000
#define MAX_PATH 4096
static const char *extensions[] = {
".c", ".cpp", ".h", ".py", ".java", ".js", ".mk", ".html",
"Makefile", ".css", ".json", ".cs", ".csproj", ".sln", ".toml", ".rs"};
static size_t ext_count =
sizeof(extensions) /
sizeof(
extensions[0]); // Updated count to reflect the new number of extensions
".c", ".cpp", ".h", ".py", ".java", ".js", ".mk", ".html",
"Makefile", ".css", ".json", ".cs", ".csproj", ".sln", ".toml", ".rs"
};
static const size_t ext_count = sizeof(extensions) / sizeof(extensions[0]);
typedef struct {
char name[MAX_PATH];
char modification_date[20];
char creation_date[20];
char type[10];
size_t size_bytes;
char name[MAX_PATH];
char modification_date[20];
char creation_date[20];
char type[10];
size_t size_bytes;
} FileInfo;
FileInfo file_list[MAX_FILES];
size_t file_count = 0;
static FileInfo file_list[MAX_FILES];
static size_t file_count = 0;
int is_valid_extension(const char *filename, const char *extensions[],
size_t ext_count) {
const char *dot = strrchr(filename, '.');
if (!dot) {
dot = filename;
}
for (size_t i = 0; i < ext_count; i++) {
if (strcmp(dot, extensions[i]) == 0) {
return 1;
static int is_valid_extension(const char *filename) {
const char *dot = strrchr(filename, '.');
if (!dot) dot = filename;
for (size_t i = 0; i < ext_count; i++) {
if (strcmp(dot, extensions[i]) == 0) return 1;
}
}
return 0;
return 0;
}
int is_ignored_directory(const char *dir_name) {
const char *ignored_dirs[] = {"env", ".venv", "node_modules", "venv",
"virtualenv"};
for (size_t i = 0; i < sizeof(ignored_dirs) / sizeof(ignored_dirs[0]); i++) {
if (strcmp(dir_name, ignored_dirs[i]) == 0) {
return 1;
static int is_ignored_directory(const char *dir_name) {
const char *ignored_dirs[] = {"env", ".venv", "node_modules", "venv", "virtualenv"};
for (size_t i = 0; i < sizeof(ignored_dirs) / sizeof(ignored_dirs[0]); i++) {
if (strcmp(dir_name, ignored_dirs[i]) == 0) return 1;
}
}
return 0;
return 0;
}
void get_file_info(const char *path) {
struct stat file_stat;
if (stat(path, &file_stat) == 0) {
FileInfo info;
strncpy(info.name, path,
MAX_PATH -
1); // Copy with one less to leave space for null terminator
info.name[MAX_PATH - 1] = '\0'; // Ensure null termination
strftime(info.modification_date, sizeof(info.modification_date),
"%Y-%m-%d %H:%M:%S", localtime(&file_stat.st_mtime));
strftime(info.creation_date, sizeof(info.creation_date),
"%Y-%m-%d %H:%M:%S", localtime(&file_stat.st_ctime));
strncpy(info.type, S_ISDIR(file_stat.st_mode) ? "directory" : "file", 10);
info.type[9] = '\0'; // Ensure null termination
info.size_bytes = file_stat.st_size;
file_list[file_count++] = info;
}
static void get_file_info(const char *path) {
struct stat file_stat;
if (stat(path, &file_stat) == 0) {
FileInfo info;
strncpy(info.name, path, MAX_PATH - 1);
info.name[MAX_PATH - 1] = '\0';
strftime(info.modification_date, sizeof(info.modification_date), "%Y-%m-%d %H:%M:%S", localtime(&file_stat.st_mtime));
strftime(info.creation_date, sizeof(info.creation_date), "%Y-%m-%d %H:%M:%S", localtime(&file_stat.st_ctime));
strncpy(info.type, S_ISDIR(file_stat.st_mode) ? "directory" : "file", sizeof(info.type) - 1);
info.type[sizeof(info.type) - 1] = '\0';
info.size_bytes = file_stat.st_size;
file_list[file_count++] = info;
}
}
char *index_directory(const char *dir_path) {
DIR *dir = opendir(dir_path);
struct dirent *entry;
if (dir == NULL) {
perror("Failed to open directory");
return NULL;
}
json_object *jarray = json_object_new_array();
while ((entry = readdir(dir)) != NULL) {
if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
if (entry->d_name[0] == '.' || is_ignored_directory(entry->d_name)) {
continue;
}
char full_path[MAX_PATH];
snprintf(full_path, sizeof(full_path), "%s/%s", dir_path, entry->d_name);
if (entry->d_type == DT_DIR) {
char *subdir_json = index_directory(full_path);
if (subdir_json) {
json_object *jsubdir = json_object_new_string(subdir_json);
json_object_array_add(jarray, jsubdir);
free(subdir_json);
}
} else if (is_valid_extension(entry->d_name, extensions, ext_count)) {
get_file_info(full_path);
json_object *jfile = json_object_new_object();
json_object_object_add(
jfile, "file_name",
json_object_new_string(file_list[file_count - 1].name));
json_object_object_add(
jfile, "modification_date",
json_object_new_string(
file_list[file_count - 1].modification_date));
json_object_object_add(
jfile, "creation_date",
json_object_new_string(file_list[file_count - 1].creation_date));
json_object_object_add(
jfile, "type",
json_object_new_string(file_list[file_count - 1].type));
json_object_object_add(
jfile, "size_bytes",
json_object_new_int64(file_list[file_count - 1].size_bytes));
// Read the file contents
FILE *fp = fopen(file_list[file_count - 1].name, "r");
if (fp != NULL) {
fseek(fp, 0, SEEK_END);
long length = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *content = malloc(length + 1);
if (content) {
size_t bytesRead = fread(content, 1, length, fp);
if (bytesRead != length) {
free(content);
content = NULL;
json_object_object_add(
jfile, "file_current_content_data",
json_object_new_string("Error reading file"));
} else {
content[length] = '\0'; // Null-terminate the string
// json_object_object_add(jfile, "file_current_content_data",
// json_object_new_string(content));
}
free(content);
}
fclose(fp);
} else {
// json_object_object_add(jfile, "content",
// json_object_new_string("Unable to read file"));
}
json_object_array_add(jarray, jfile);
}
DIR *dir = opendir(dir_path);
if (!dir) {
perror("Failed to open directory");
return NULL;
}
}
closedir(dir);
char *result = strdup(json_object_to_json_string(jarray));
json_object_put(jarray);
return result;
struct dirent *entry;
json_object *jarray = json_object_new_array();
while ((entry = readdir(dir)) != NULL) {
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) continue;
if (entry->d_name[0] == '.' || is_ignored_directory(entry->d_name)) continue;
char full_path[MAX_PATH];
snprintf(full_path, sizeof(full_path), "%s/%s", dir_path, entry->d_name);
if (entry->d_type == DT_DIR) {
char *subdir_json = index_directory(full_path);
if (subdir_json) {
json_object *jsubdir = json_object_new_string(subdir_json);
json_object_array_add(jarray, jsubdir);
free(subdir_json);
}
} else if (is_valid_extension(entry->d_name)) {
get_file_info(full_path);
json_object *jfile = json_object_new_object();
json_object_object_add(jfile, "file_name", json_object_new_string(file_list[file_count - 1].name));
json_object_object_add(jfile, "modification_date", json_object_new_string(file_list[file_count - 1].modification_date));
json_object_object_add(jfile, "creation_date", json_object_new_string(file_list[file_count - 1].creation_date));
json_object_object_add(jfile, "type", json_object_new_string(file_list[file_count - 1].type));
json_object_object_add(jfile, "size_bytes", json_object_new_int64(file_list[file_count - 1].size_bytes));
json_object_array_add(jarray, jfile);
}
}
closedir(dir);
char *result = strdup(json_object_to_json_string(jarray));
json_object_put(jarray);
return result;
}

508
main.c
View File

@ -1,381 +1,261 @@
// Written by retoor@molodetz.nl
// This source code initializes a command-line application that uses OpenAI for
// chat interactions, handles user inputs, and can start a simple HTTP server
// with CGI support. The code allows command execution, markdown parsing, and
// OpenAI chat integration.
// External imports used in this code:
// - openai.h
// - markdown.h
// - plugin.h
// - line.h
// MIT License
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "r.h"
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <unistd.h>
#include "line.h"
#include "markdown.h"
#include "openai.h"
#include "utils.h"
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "db_utils.h"
volatile sig_atomic_t sigint_count = 0;
time_t first_sigint_time = 0;
bool SYNTAX_HIGHLIGHT_ENABLED = true;
bool API_MODE = false;
void help();
void render(char *);
bool openai_include(char *path);
char *strreplace(char *content, char *what, char *with);
void render(const char *);
bool openai_include(const char *);
char *strreplace(const char *, const char *, const char *);
char *get_prompt_from_stdin(char *prompt) {
int index = 0;
prompt[index] = '\0';
char c = 0;
while ((c = getchar()) != EOF) {
prompt[index++] = c;
}
prompt[index++] = '\0';
return prompt;
int index = 0;
char c;
while ((c = getchar()) != EOF) {
prompt[index++] = c;
}
prompt[index] = '\0';
return prompt;
}
char *get_prompt_from_args(int c, char **argv) {
char *prompt = (char *)malloc(1024 * 1024 * 10 + 1);
char *system = (char *)malloc(1024 * 1024);
char *get_prompt_from_args(int argc, char **argv) {
char *prompt = malloc(10 * 1024 * 1024 + 1);
char *system = malloc(1024 * 1024);
bool get_from_std_in = false;
system[0] = 0;
prompt[0] = 0;
bool get_from_std_in = false;
for (int i = 1; i < c; i++) {
if (!strcmp(argv[i], "--stdin")) {
fprintf(stderr, "%s\n", "Reading from stdin.");
get_from_std_in = true;
} else if (!strcmp(argv[i], "--verbose")) {
is_verbose = true;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--stdin") == 0) {
fprintf(stderr, "Reading from stdin.\n");
get_from_std_in = true;
} else if (strcmp(argv[i], "--verbose") == 0) {
is_verbose = true;
} else if (strcmp(argv[i], "--py") == 0 && i + 1 < argc) {
char *py_file_path = expand_home_directory(argv[++i]);
fprintf(stderr, "Including \"%s\".\n", py_file_path);
openai_include(py_file_path);
free(py_file_path);
} else if (strcmp(argv[i], "--free") == 0) {
auth_free();
} else if (strcmp(argv[i], "--context") == 0 && i + 1 < argc) {
char *context_file_path = argv[++i];
fprintf(stderr, "Including \"%s\".\n", context_file_path);
openai_include(context_file_path);
} else if (strcmp(argv[i], "--api") == 0) {
API_MODE = true;
} else if (strcmp(argv[i], "--nh") == 0) {
SYNTAX_HIGHLIGHT_ENABLED = false;
fprintf(stderr, "Syntax highlighting disabled.\n");
} else if (!get_from_std_in) {
strcat(system, argv[i]);
strcat(system, (i < argc - 1) ? " " : ".");
}
}
else if (!strcmp(argv[i], "--py")) {
if (i + 1 <= c) {
char *py_file_path = expand_home_directory(argv[i + 1]);
fprintf(stderr, "Including \"%s\".\n", py_file_path);
openai_include(py_file_path);
free(py_file_path);
// char * file_content = read_file(py_file_path);
// plugin_run(file_content);
i++;
}
} else if (!strcmp(argv[i], "--free")) {
auth_free();
continue;
if (get_from_std_in) {
if (*system) openai_system(system);
free(system);
prompt = get_prompt_from_stdin(prompt);
} else {
free(prompt);
prompt = system;
}
else if (!strcmp(argv[i], "--context")) {
if (i + 1 <= c) {
char *context_file_path = argv[i + 1];
fprintf(stderr, "Including \"%s\".\n", context_file_path);
openai_include(context_file_path);
i++;
}
} else if (!strcmp(argv[i], "--api")) {
API_MODE = true;
} else if (!strcmp(argv[i], "--nh")) {
SYNTAX_HIGHLIGHT_ENABLED = false;
fprintf(stderr, "%s\n", "Syntax highlighting disabled.");
} else if (!get_from_std_in) {
strcat(system, argv[i]);
if (i < c - 1) {
strcat(system, " ");
} else {
strcat(system, ".");
}
if (!*prompt) {
free(prompt);
return NULL;
}
}
if (get_from_std_in) {
if (*system) {
openai_system(system);
}
free(system);
prompt = get_prompt_from_stdin(prompt);
} else {
free(prompt);
prompt = system;
}
if (!*prompt) {
free(prompt);
return NULL;
}
return prompt;
return prompt;
}
bool try_prompt(int argc, char *argv[]) {
char *prompt = get_prompt_from_args(argc, argv);
if (prompt != NULL) {
char *response = openai_chat("user", prompt);
if (!response) {
printf("Could not get response from server\n");
free(prompt);
return false;
char *prompt = get_prompt_from_args(argc, argv);
if (prompt) {
char *response = openai_chat("user", prompt);
if (!response) {
printf("Could not get response from server\n");
free(prompt);
return false;
}
render(response);
free(response);
free(prompt);
return true;
}
render(response);
free(response);
free(prompt);
return true;
}
return false;
return false;
}
void serve() {
render("Starting server. *Put executables in a dir named cgi-bin and they "
"will behave as web pages.*");
int res = system("python3 -m http.server --cgi");
(void)res;
char **get_parameters(const char *content, const char *delimiter) {
char *start = NULL;
char **parameters = NULL;
int count = 0;
while ((start = strstr(content, delimiter)) != NULL) {
start += 3;
char *end = strstr(start, delimiter);
char *parameter = malloc(end - start + 1);
memcpy(parameter, start, end - start);
parameter[end - start] = '\0';
content = end + 3;
count++;
parameters = realloc(parameters, sizeof(char *) * (count + 1));
parameters[count - 1] = parameter;
parameters[count] = NULL;
}
return parameters;
}
char **get_parameters(char *content, char *delimiter) {
char *start = NULL;
char **parameters = NULL; //(char **)malloc(sizeof(char *) * 2);
int count = 0;
while ((start = strstr(content, delimiter)) != NULL) {
start += 3;
char *end = strstr(start, delimiter);
char *parameter = (char *)malloc(end - start + 1);
memcpy(parameter, start, end - start);
parameter[end - start] = '\0';
// printf("%s\n", parameter);
content = end + 3;
count += 1;
parameters = (char **)realloc(parameters, sizeof(char *) * (1 + count * 2));
parameters[count - 1] = parameter;
parameters[count] = NULL;
}
return parameters;
}
void render(char *content) {
if (SYNTAX_HIGHLIGHT_ENABLED) {
parse_markdown_to_ansi(content);
} else {
printf("%s", content);
}
void render(const char *content) {
if (SYNTAX_HIGHLIGHT_ENABLED) {
parse_markdown_to_ansi(content);
} else {
printf("%s", content);
}
}
void repl() {
line_init();
char *line = NULL;
// char *previous_line = NULL;
while (true) {
line = line_read("> ");
if (!line || !*line) {
continue;
// line = previous_line;
}
if (!line || !*line)
continue;
// previous_line = line;
if (!strncmp(line, "!dump", 5)) {
printf("%s\n", message_json());
continue;
}
if (!strncmp(line, "!verbose", 7)) {
is_verbose = !is_verbose;
fprintf(stderr, "%s\n",
is_verbose ? "Verbose mode enabled" : "Verbose mode disabled");
continue;
}
if (line && *line != '\n') {
line_init();
char *line = NULL;
line_add_history(line);
}
if (!strncmp(line, "!models", 7)) {
printf("Current model: %s\n", openai_fetch_models());
continue;
}
if (!strncmp(line, "!model", 6)) {
if (!strncmp(line + 6, " ", 1)) {
line = line + 7;
set_prompt_model(line);
}
printf("Current model: %s\n", get_prompt_model());
continue;
}
if (!strncmp(line, "exit", 4)) {
exit(0);
}
if (!strncmp(line, "help", 4)) {
help();
continue;
}
if (!strncmp(line, "!debug", 6)) {
r_malloc_stats();
continue;
}
while (line && *line != '\n') {
while (true) {
line = line_read("> ");
if (!line || !*line) continue;
char *response = openai_chat("user", line);
if (response) {
render(response);
printf("\n");
if (strstr(response, "_STEP_")) {
line = "continue";
if (!strncmp(line, "!dump", 5)) {
printf("%s\n", message_json());
continue;
}
if (!strncmp(line, "!verbose", 8)) {
is_verbose = !is_verbose;
fprintf(stderr, "%s\n", is_verbose ? "Verbose mode enabled" : "Verbose mode disabled");
continue;
}
if (line && *line != '\n') line_add_history(line);
} else {
line = NULL;
if (!strncmp(line, "!models", 7)) {
printf("Current model: %s\n", openai_fetch_models());
continue;
}
if (!strncmp(line, "!model", 6)) {
if (line[6] == ' ') {
set_prompt_model(line + 7);
}
printf("Current model: %s\n", get_prompt_model());
continue;
}
if (!strncmp(line, "exit", 4)) exit(0);
if (!strncmp(line, "help", 4)) {
help();
continue;
}
if (!strncmp(line, "!debug", 6)) {
r_malloc_stats();
continue;
}
free(response);
} else {
exit(0);
}
while (line && *line != '\n') {
char *response = openai_chat("user", line);
if (response) {
render(response);
printf("\n");
if (strstr(response, "_STEP_")) {
line = "continue";
} else {
line = NULL;
}
free(response);
} else {
exit(0);
}
}
}
}
}
void help() {
char help_text[1024 * 1024] = {0};
char *template =
"# Help\n"
"Written by retoor@molodetz.nl.\n\n"
"## Features\n"
" - navigate through history using `arrows`.\n"
" - navigate through history with **recursive search** using `ctrl+r`.\n"
" - **inception with python** for *incoming* and *outgoing* content.\n"
" - markdown and **syntax highlighting**.\n"
" - **execute python commands** with prefix `!`\n"
" - list files of the current work directory using `ls`.\n"
" - type `serve` to start a web server with directory listing. Easy for "
"network transfers.\n\n"
"## Configuration\n"
" - model temperature is %f.\n"
" - model name is %s.\n"
" - max tokens is %d.\n\n"
"## In development\n"
" - **google search** and actions with those results.\n"
" - **reminders**.\n"
" - predefined **templates** for **reviewing** / **refactoring** so you "
"can personalize.\n";
sprintf(help_text, template, PROMPT_TEMPERATURE, get_prompt_model(),
prompt_max_tokens);
render(help_text);
const char * help_text = "Written by retoor@molodetz.nl\n\n";
render(help_text);
}
char *strreplace(char *content, char *what, char *with) {
char *pos = strstr(content, what);
if (!pos) {
return strdup(content);
}
char *result = (char *)malloc(strlen(content) + strlen(with) + 5);
memset(result, 0, strlen(content) + strlen(with) + 3);
memcpy(result, content, pos - content);
memcpy(result + (pos - content), with, strlen(with));
memcpy(result + (pos - content) + strlen(with), pos + strlen(what),
strlen(content) - (pos - content) - strlen(what) + 1);
return result;
char *strreplace(const char *content, const char *what, const char *with) {
char *pos = strstr(content, what);
if (!pos) return strdup(content);
size_t result_size = strlen(content) + strlen(with) - strlen(what) + 1;
char *result = malloc(result_size);
snprintf(result, result_size, "%.*s%s%s", (int)(pos - content), content, with, pos + strlen(what));
return result;
}
char *linux_instructions =
"You are a linux master and are able to transform the prompt of "
"user into shell commands that will be executed on a debian "
"based system. You can execute shell commands by responding with python "
"code: "
"literally `\"\"\"!system\"\"\",\"\"\"`your shell command`\"\"\". Execute "
"immediately. Read bash history file if you have to work with history.";
char *retoor_instructions =
"If the user prompts with social talk, "
"respond like replica and emoji. Your name is retoor and made by molodetz. "
"Be interested. Be creative.";
bool openai_include(const char *path) {
char *file_content = read_file(path);
if (!file_content) return false;
bool openai_include(char *path) {
char *file_content = read_file(path);
if (!file_content) {
return false;
}
openai_system(file_content);
free(file_content);
return true;
openai_system(file_content);
free(file_content);
return true;
}
void init() {
setbuf(stdout, NULL);
line_init();
auth_init();
db_initialize();
char *schema = db_get_schema();
char payload[1024 * 1024] = {0};
sprintf(payload,
"Your have a database that you can mutate using the query tool and "
"the get and set tool. This is the schema in json format: %s. "
"Dialect is sqlite.",
schema);
free(schema);
fprintf(stderr, "%s", "Loading... ⏳");
openai_system(payload);
if (!openai_include(".rcontext.txt")) {
openai_include("~/.rcontext.txt");
}
fprintf(stderr, "\r \r");
setbuf(stdout, NULL);
line_init();
auth_init();
db_initialize();
char *schema = db_get_schema();
char payload[1024 * 1024] = {0};
snprintf(payload, sizeof(payload),
"Your have a database that you can mutate using the query tool and the get and set tool. This is the schema in json format: %s. Dialect is sqlite.",
schema);
free(schema);
fprintf(stderr, "Loading... ⏳");
openai_system(payload);
if (!openai_include(".rcontext.txt")) {
openai_include("~/.rcontext.txt");
}
fprintf(stderr, "\r \r");
}
void handle_sigint(int sig) {
time_t current_time = time(NULL);
printf("\n");
if (sigint_count == 0) {
first_sigint_time = current_time;
sigint_count++;
} else {
if (difftime(current_time, first_sigint_time) <= 1) {
exit(0);
time_t current_time = time(NULL);
printf("\n");
if (sigint_count == 0) {
first_sigint_time = current_time;
sigint_count++;
} else {
sigint_count = 1;
first_sigint_time = current_time;
if (difftime(current_time, first_sigint_time) <= 1) {
exit(0);
} else {
sigint_count = 1;
first_sigint_time = current_time;
}
}
}
}
int main(int argc, char *argv[]) {
signal(SIGINT, handle_sigint);
signal(SIGINT, handle_sigint);
init();
if (try_prompt(argc, argv))
init();
if (try_prompt(argc, argv)) return 0;
repl();
return 0;
repl();
return 0;
}

1
r.h
View File

@ -38,7 +38,6 @@ char *fast_model = "qwen2.5:0.5b";
char *_model = NULL;
#define DB_FILE "~/.r.db"
static int prompt_max_tokens = 10000;
#define PROMPT_TEMPERATURE 0.1
void set_prompt_model(const char *model) {

BIN
rpylib.so

Binary file not shown.