Update.

2025-12-05 18:09:44 +01:00 · 2025-12-05 18:09:44 +01:00 · 8b13dad644
commit 8b13dad644
parent f5b47eebe4
4 changed files with 203 additions and 84 deletions
--- a/BIN
+++ b/BIN
--- a/tikker.c
+++ b/tikker.c
@ -141,6 +141,7 @@ static void tikker_print_usage(void) {
    printf("  stats top-keys [N]        Top N keys (default: 10)\n");
    printf("  stats top-words [N]       Top N words (default: 10)\n");
    printf("  stats summary             Overall summary statistics\n");
+    printf("  export                    Export logs to logs_plain/\n");
    printf("  decode [FILE]             Decode keystroke log file\n");
    printf("\nOptions:\n");
    printf("  --device='NAME'           Monitor specific device\n");
@ -309,6 +310,10 @@ int main(int argc, char *argv[]) {
            return tikker_handle_stats_command(argc - 2, argv + 2);
        }

+        if (strcmp(argv[1], "export") == 0) {
+            return tikker_export_logs(tikker_db);
+        }
+
        if (strcmp(argv[1], "decode") == 0) {
            if (argc < 3) {
                fprintf(stderr, "Error: decode requires a filename\n");
--- a/tikker_stats.h
+++ b/tikker_stats.h
@ -4,6 +4,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <sys/stat.h>
 #include "sormc.h"
 #include "tikker_types.h"
 #include "tikker_db.h"
@ -212,59 +215,150 @@ static inline int tikker_stats_summary(int db) {
    return 0;
 }

-static inline int tikker_stats_top_words(int db, int limit) {
-    sorm_ptr result = sormq(db,
-        "SELECT STRFTIME('%%Y-%%m-%%d.%%H', timestamp) as date_hour, "
-        "GROUP_CONCAT(char, '') as chars "
-        "FROM kevent WHERE event = 'PRESSED' "
-        "GROUP BY date_hour ORDER BY date_hour");
-
-    printf("word,count\n");
-
-    if (!result) return 0;
-
-    size_t total_len = strlen((char *)result);
-    char *decoded = malloc(total_len + 1);
-    if (!decoded) {
-        free(result);
-        return 1;
+static inline int tikker_export_logs(int db) {
+    struct stat st = {0};
+    if (stat("logs_plain", &st) == -1) {
+        mkdir("logs_plain", 0755);
    }

-    char *csv = (char *)result;
+    sorm_ptr hours_result = sormq(db,
+        "SELECT DISTINCT STRFTIME('%%Y-%%m-%%d.%%H', timestamp) as date_hour "
+        "FROM kevent WHERE event = 'PRESSED' ORDER BY date_hour");
+
+    if (!hours_result) {
+        printf("No data to export\n");
+        return 0;
+    }
+
+    int files_written = 0;
+    char *csv = (char *)hours_result;
    char *line = csv;
    char *next;
-    size_t decoded_pos = 0;

    while (line && *line) {
        next = strchr(line, '\n');
        if (next) *next = '\0';

-        char *chars = strchr(line, ';');
-        if (chars) {
-            chars++;
-            char *end = strchr(chars, ';');
+        if (tikker_csv_is_metadata(line) || strlen(line) < 10) {
+            if (next) line = next + 1;
+            else break;
+            continue;
+        }
+
+        char *end = strchr(line, ';');
        if (end) *end = '\0';

-            int len = tikker_decode_buffer(chars, decoded + decoded_pos, total_len - decoded_pos);
-            decoded_pos += len;
-            if (decoded_pos < total_len) {
-                decoded[decoded_pos++] = ' ';
+        char date_hour[32];
+        strncpy(date_hour, line, sizeof(date_hour) - 1);
+        date_hour[sizeof(date_hour) - 1] = '\0';
+
+        char date_hour_space[32];
+        strncpy(date_hour_space, date_hour, sizeof(date_hour_space) - 1);
+        date_hour_space[sizeof(date_hour_space) - 1] = '\0';
+        char *dot = strchr(date_hour_space, '.');
+        if (dot) *dot = ' ';
+
+        char start_ts[32], end_ts[32];
+        snprintf(start_ts, sizeof(start_ts), "%s:00:00", date_hour_space);
+        snprintf(end_ts, sizeof(end_ts), "%s:59:59", date_hour_space);
+
+        char sql[512];
+        snprintf(sql, sizeof(sql),
+            "SELECT GROUP_CONCAT(char, '') FROM kevent "
+            "WHERE event = 'PRESSED' AND timestamp >= '%s' AND timestamp <= '%s'",
+            start_ts, end_ts);
+
+        sorm_ptr chars_result = sormq(db, sql);
+        if (chars_result) {
+            char *chars_csv = (char *)chars_result;
+            char *chars_line = strchr(chars_csv, '\n');
+            if (chars_line) chars_line++;
+            else chars_line = chars_csv;
+
+            char *chars_end = strchr(chars_line, ';');
+            if (chars_end) *chars_end = '\0';
+            chars_end = strchr(chars_line, '\n');
+            if (chars_end) *chars_end = '\0';
+
+            char filepath[256];
+            snprintf(filepath, sizeof(filepath), "logs_plain/%s.txt", date_hour);
+
+            FILE *f = fopen(filepath, "w");
+            if (f) {
+                fprintf(f, "**%s:00**: ```%s```\n", date_hour_space, chars_line);
+                fclose(f);
+                files_written++;
+                if (files_written % 100 == 0) {
+                    printf("Exported %d files...\r", files_written);
+                    fflush(stdout);
                }
            }
+            free(chars_result);
+        }

        if (next) line = next + 1;
        else break;
    }
-    decoded[decoded_pos] = '\0';

-    tikker_word_count_t *words = calloc(TIKKER_MAX_WORDS, sizeof(tikker_word_count_t));
-    if (!words) {
-        free(decoded);
-        free(result);
+    free(hours_result);
+    printf("Exported %d hourly log files to logs_plain/\n", files_written);
+    return 0;
+}
+
+static inline int tikker_stats_top_words(int db, int limit) {
+    (void)db;
+
+    printf("word,count\n");
+
+    tikker_word_hash_t *hash = tikker_hash_create();
+    if (!hash) return 1;
+
+    DIR *dir = opendir("logs_plain");
+    if (!dir) {
+        fprintf(stderr, "Error: Cannot open logs_plain directory\n");
+        tikker_hash_free(hash);
        return 1;
    }

-    int word_count = tikker_extract_words(decoded, words, TIKKER_MAX_WORDS);
+    struct dirent *entry;
+    char filepath[512];
+    char buffer[65536];
+
+    while ((entry = readdir(dir)) != NULL) {
+        if (entry->d_name[0] == '.') continue;
+
+        snprintf(filepath, sizeof(filepath), "logs_plain/%s", entry->d_name);
+        FILE *f = fopen(filepath, "r");
+        if (!f) continue;
+
+        while (fgets(buffer, sizeof(buffer), f)) {
+            char *p = buffer;
+            char word[TIKKER_MAX_WORD_LEN];
+            int word_len = 0;
+
+            while (*p) {
+                if (tikker_is_valid_word_char(*p)) {
+                    if (word_len < TIKKER_MAX_WORD_LEN - 1) {
+                        word[word_len++] = toupper((unsigned char)*p);
+                    }
+                } else {
+                    if (word_len >= 2) {
+                        word[word_len] = '\0';
+                        tikker_hash_insert(hash, word);
+                    }
+                    word_len = 0;
+                }
+                p++;
+            }
+        }
+        fclose(f);
+    }
+    closedir(dir);
+
+    int word_count;
+    tikker_word_count_t *words = tikker_hash_to_array(hash, &word_count);
+    tikker_hash_free(hash);
+
    tikker_sort_words(words, word_count);

    int output_count = (limit < word_count) ? limit : word_count;
@ -273,8 +367,6 @@ static inline int tikker_stats_top_words(int db, int limit) {
    }

    free(words);
-    free(decoded);
-    free(result);
    return 0;
 }

--- a/tikker_words.h
+++ b/tikker_words.h
@ -6,67 +6,89 @@
 #include <string.h>
 #include "tikker_types.h"

+#define TIKKER_HASH_SIZE 65536
+
+typedef struct tikker_word_node {
+    char word[TIKKER_MAX_WORD_LEN];
+    int count;
+    struct tikker_word_node *next;
+} tikker_word_node_t;
+
+typedef struct {
+    tikker_word_node_t *buckets[TIKKER_HASH_SIZE];
+    int total_words;
+} tikker_word_hash_t;
+
 static inline int tikker_is_valid_word_char(char c) {
    return isalnum((unsigned char)c) || c == '_';
 }

+static inline unsigned int tikker_hash_word(const char *word) {
+    unsigned int hash = 5381;
+    while (*word) {
+        hash = ((hash << 5) + hash) + (unsigned char)*word++;
+    }
+    return hash % TIKKER_HASH_SIZE;
+}
+
+static inline tikker_word_hash_t *tikker_hash_create(void) {
+    tikker_word_hash_t *h = calloc(1, sizeof(tikker_word_hash_t));
+    return h;
+}
+
+static inline void tikker_hash_insert(tikker_word_hash_t *h, const char *word) {
+    unsigned int idx = tikker_hash_word(word);
+    tikker_word_node_t *node = h->buckets[idx];
+
+    while (node) {
+        if (strcmp(node->word, word) == 0) {
+            node->count++;
+            return;
+        }
+        node = node->next;
+    }
+
+    node = malloc(sizeof(tikker_word_node_t));
+    strncpy(node->word, word, TIKKER_MAX_WORD_LEN - 1);
+    node->word[TIKKER_MAX_WORD_LEN - 1] = '\0';
+    node->count = 1;
+    node->next = h->buckets[idx];
+    h->buckets[idx] = node;
+    h->total_words++;
+}
+
 static inline int tikker_word_count_compare(const void *a, const void *b) {
    return ((tikker_word_count_t *)b)->count - ((tikker_word_count_t *)a)->count;
 }

-static inline int tikker_extract_words(const char *text, tikker_word_count_t *words, int max_words) {
-    int word_count = 0;
-    const char *p = text;
-    char word[TIKKER_MAX_WORD_LEN];
-    int word_len = 0;
+static inline tikker_word_count_t *tikker_hash_to_array(tikker_word_hash_t *h, int *count) {
+    tikker_word_count_t *arr = malloc(h->total_words * sizeof(tikker_word_count_t));
+    int idx = 0;

-    while (*p) {
-        if (tikker_is_valid_word_char(*p)) {
-            if (word_len < TIKKER_MAX_WORD_LEN - 1) {
-                word[word_len++] = tolower((unsigned char)*p);
-            }
-        } else {
-            if (word_len >= 2) {
-                word[word_len] = '\0';
-
-                int found = 0;
-                for (int i = 0; i < word_count; i++) {
-                    if (strcmp(words[i].word, word) == 0) {
-                        words[i].count++;
-                        found = 1;
-                        break;
+    for (int i = 0; i < TIKKER_HASH_SIZE; i++) {
+        tikker_word_node_t *node = h->buckets[i];
+        while (node) {
+            strncpy(arr[idx].word, node->word, TIKKER_MAX_WORD_LEN);
+            arr[idx].count = node->count;
+            idx++;
+            node = node->next;
        }
    }

-                if (!found && word_count < max_words) {
-                    strcpy(words[word_count].word, word);
-                    words[word_count].count = 1;
-                    word_count++;
-                }
-            }
-            word_len = 0;
-        }
-        p++;
-    }
+    *count = h->total_words;
+    return arr;
+}

-    if (word_len >= 2) {
-        word[word_len] = '\0';
-        int found = 0;
-        for (int i = 0; i < word_count; i++) {
-            if (strcmp(words[i].word, word) == 0) {
-                words[i].count++;
-                found = 1;
-                break;
+static inline void tikker_hash_free(tikker_word_hash_t *h) {
+    for (int i = 0; i < TIKKER_HASH_SIZE; i++) {
+        tikker_word_node_t *node = h->buckets[i];
+        while (node) {
+            tikker_word_node_t *next = node->next;
+            free(node);
+            node = next;
        }
    }
-        if (!found && word_count < max_words) {
-            strcpy(words[word_count].word, word);
-            words[word_count].count = 1;
-            word_count++;
-        }
-    }
-
-    return word_count;
+    free(h);
 }

 static inline void tikker_sort_words(tikker_word_count_t *words, int count) {