diff --git a/tikker b/tikker index ee2ce5e..6aad683 100755 Binary files a/tikker and b/tikker differ diff --git a/tikker.c b/tikker.c index b144319..7f29903 100755 --- a/tikker.c +++ b/tikker.c @@ -141,6 +141,7 @@ static void tikker_print_usage(void) { printf(" stats top-keys [N] Top N keys (default: 10)\n"); printf(" stats top-words [N] Top N words (default: 10)\n"); printf(" stats summary Overall summary statistics\n"); + printf(" export Export logs to logs_plain/\n"); printf(" decode [FILE] Decode keystroke log file\n"); printf("\nOptions:\n"); printf(" --device='NAME' Monitor specific device\n"); @@ -309,6 +310,10 @@ int main(int argc, char *argv[]) { return tikker_handle_stats_command(argc - 2, argv + 2); } + if (strcmp(argv[1], "export") == 0) { + return tikker_export_logs(tikker_db); + } + if (strcmp(argv[1], "decode") == 0) { if (argc < 3) { fprintf(stderr, "Error: decode requires a filename\n"); diff --git a/tikker_stats.h b/tikker_stats.h index 7730e5f..58145d0 100644 --- a/tikker_stats.h +++ b/tikker_stats.h @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include #include "sormc.h" #include "tikker_types.h" #include "tikker_db.h" @@ -212,59 +215,150 @@ static inline int tikker_stats_summary(int db) { return 0; } -static inline int tikker_stats_top_words(int db, int limit) { - sorm_ptr result = sormq(db, - "SELECT STRFTIME('%%Y-%%m-%%d.%%H', timestamp) as date_hour, " - "GROUP_CONCAT(char, '') as chars " - "FROM kevent WHERE event = 'PRESSED' " - "GROUP BY date_hour ORDER BY date_hour"); - - printf("word,count\n"); - - if (!result) return 0; - - size_t total_len = strlen((char *)result); - char *decoded = malloc(total_len + 1); - if (!decoded) { - free(result); - return 1; +static inline int tikker_export_logs(int db) { + struct stat st = {0}; + if (stat("logs_plain", &st) == -1) { + mkdir("logs_plain", 0755); } - char *csv = (char *)result; + sorm_ptr hours_result = sormq(db, + "SELECT DISTINCT STRFTIME('%%Y-%%m-%%d.%%H', timestamp) as date_hour " + "FROM kevent WHERE event = 'PRESSED' ORDER BY date_hour"); + + if (!hours_result) { + printf("No data to export\n"); + return 0; + } + + int files_written = 0; + char *csv = (char *)hours_result; char *line = csv; char *next; - size_t decoded_pos = 0; while (line && *line) { next = strchr(line, '\n'); if (next) *next = '\0'; - char *chars = strchr(line, ';'); - if (chars) { - chars++; - char *end = strchr(chars, ';'); - if (end) *end = '\0'; + if (tikker_csv_is_metadata(line) || strlen(line) < 10) { + if (next) line = next + 1; + else break; + continue; + } - int len = tikker_decode_buffer(chars, decoded + decoded_pos, total_len - decoded_pos); - decoded_pos += len; - if (decoded_pos < total_len) { - decoded[decoded_pos++] = ' '; + char *end = strchr(line, ';'); + if (end) *end = '\0'; + + char date_hour[32]; + strncpy(date_hour, line, sizeof(date_hour) - 1); + date_hour[sizeof(date_hour) - 1] = '\0'; + + char date_hour_space[32]; + strncpy(date_hour_space, date_hour, sizeof(date_hour_space) - 1); + date_hour_space[sizeof(date_hour_space) - 1] = '\0'; + char *dot = strchr(date_hour_space, '.'); + if (dot) *dot = ' '; + + char start_ts[32], end_ts[32]; + snprintf(start_ts, sizeof(start_ts), "%s:00:00", date_hour_space); + snprintf(end_ts, sizeof(end_ts), "%s:59:59", date_hour_space); + + char sql[512]; + snprintf(sql, sizeof(sql), + "SELECT GROUP_CONCAT(char, '') FROM kevent " + "WHERE event = 'PRESSED' AND timestamp >= '%s' AND timestamp <= '%s'", + start_ts, end_ts); + + sorm_ptr chars_result = sormq(db, sql); + if (chars_result) { + char *chars_csv = (char *)chars_result; + char *chars_line = strchr(chars_csv, '\n'); + if (chars_line) chars_line++; + else chars_line = chars_csv; + + char *chars_end = strchr(chars_line, ';'); + if (chars_end) *chars_end = '\0'; + chars_end = strchr(chars_line, '\n'); + if (chars_end) *chars_end = '\0'; + + char filepath[256]; + snprintf(filepath, sizeof(filepath), "logs_plain/%s.txt", date_hour); + + FILE *f = fopen(filepath, "w"); + if (f) { + fprintf(f, "**%s:00**: ```%s```\n", date_hour_space, chars_line); + fclose(f); + files_written++; + if (files_written % 100 == 0) { + printf("Exported %d files...\r", files_written); + fflush(stdout); + } } + free(chars_result); } if (next) line = next + 1; else break; } - decoded[decoded_pos] = '\0'; - tikker_word_count_t *words = calloc(TIKKER_MAX_WORDS, sizeof(tikker_word_count_t)); - if (!words) { - free(decoded); - free(result); + free(hours_result); + printf("Exported %d hourly log files to logs_plain/\n", files_written); + return 0; +} + +static inline int tikker_stats_top_words(int db, int limit) { + (void)db; + + printf("word,count\n"); + + tikker_word_hash_t *hash = tikker_hash_create(); + if (!hash) return 1; + + DIR *dir = opendir("logs_plain"); + if (!dir) { + fprintf(stderr, "Error: Cannot open logs_plain directory\n"); + tikker_hash_free(hash); return 1; } - int word_count = tikker_extract_words(decoded, words, TIKKER_MAX_WORDS); + struct dirent *entry; + char filepath[512]; + char buffer[65536]; + + while ((entry = readdir(dir)) != NULL) { + if (entry->d_name[0] == '.') continue; + + snprintf(filepath, sizeof(filepath), "logs_plain/%s", entry->d_name); + FILE *f = fopen(filepath, "r"); + if (!f) continue; + + while (fgets(buffer, sizeof(buffer), f)) { + char *p = buffer; + char word[TIKKER_MAX_WORD_LEN]; + int word_len = 0; + + while (*p) { + if (tikker_is_valid_word_char(*p)) { + if (word_len < TIKKER_MAX_WORD_LEN - 1) { + word[word_len++] = toupper((unsigned char)*p); + } + } else { + if (word_len >= 2) { + word[word_len] = '\0'; + tikker_hash_insert(hash, word); + } + word_len = 0; + } + p++; + } + } + fclose(f); + } + closedir(dir); + + int word_count; + tikker_word_count_t *words = tikker_hash_to_array(hash, &word_count); + tikker_hash_free(hash); + tikker_sort_words(words, word_count); int output_count = (limit < word_count) ? limit : word_count; @@ -273,8 +367,6 @@ static inline int tikker_stats_top_words(int db, int limit) { } free(words); - free(decoded); - free(result); return 0; } diff --git a/tikker_words.h b/tikker_words.h index 8e31c7a..70d920e 100644 --- a/tikker_words.h +++ b/tikker_words.h @@ -6,67 +6,89 @@ #include #include "tikker_types.h" +#define TIKKER_HASH_SIZE 65536 + +typedef struct tikker_word_node { + char word[TIKKER_MAX_WORD_LEN]; + int count; + struct tikker_word_node *next; +} tikker_word_node_t; + +typedef struct { + tikker_word_node_t *buckets[TIKKER_HASH_SIZE]; + int total_words; +} tikker_word_hash_t; + static inline int tikker_is_valid_word_char(char c) { return isalnum((unsigned char)c) || c == '_'; } +static inline unsigned int tikker_hash_word(const char *word) { + unsigned int hash = 5381; + while (*word) { + hash = ((hash << 5) + hash) + (unsigned char)*word++; + } + return hash % TIKKER_HASH_SIZE; +} + +static inline tikker_word_hash_t *tikker_hash_create(void) { + tikker_word_hash_t *h = calloc(1, sizeof(tikker_word_hash_t)); + return h; +} + +static inline void tikker_hash_insert(tikker_word_hash_t *h, const char *word) { + unsigned int idx = tikker_hash_word(word); + tikker_word_node_t *node = h->buckets[idx]; + + while (node) { + if (strcmp(node->word, word) == 0) { + node->count++; + return; + } + node = node->next; + } + + node = malloc(sizeof(tikker_word_node_t)); + strncpy(node->word, word, TIKKER_MAX_WORD_LEN - 1); + node->word[TIKKER_MAX_WORD_LEN - 1] = '\0'; + node->count = 1; + node->next = h->buckets[idx]; + h->buckets[idx] = node; + h->total_words++; +} + static inline int tikker_word_count_compare(const void *a, const void *b) { return ((tikker_word_count_t *)b)->count - ((tikker_word_count_t *)a)->count; } -static inline int tikker_extract_words(const char *text, tikker_word_count_t *words, int max_words) { - int word_count = 0; - const char *p = text; - char word[TIKKER_MAX_WORD_LEN]; - int word_len = 0; +static inline tikker_word_count_t *tikker_hash_to_array(tikker_word_hash_t *h, int *count) { + tikker_word_count_t *arr = malloc(h->total_words * sizeof(tikker_word_count_t)); + int idx = 0; - while (*p) { - if (tikker_is_valid_word_char(*p)) { - if (word_len < TIKKER_MAX_WORD_LEN - 1) { - word[word_len++] = tolower((unsigned char)*p); - } - } else { - if (word_len >= 2) { - word[word_len] = '\0'; - - int found = 0; - for (int i = 0; i < word_count; i++) { - if (strcmp(words[i].word, word) == 0) { - words[i].count++; - found = 1; - break; - } - } - - if (!found && word_count < max_words) { - strcpy(words[word_count].word, word); - words[word_count].count = 1; - word_count++; - } - } - word_len = 0; - } - p++; - } - - if (word_len >= 2) { - word[word_len] = '\0'; - int found = 0; - for (int i = 0; i < word_count; i++) { - if (strcmp(words[i].word, word) == 0) { - words[i].count++; - found = 1; - break; - } - } - if (!found && word_count < max_words) { - strcpy(words[word_count].word, word); - words[word_count].count = 1; - word_count++; + for (int i = 0; i < TIKKER_HASH_SIZE; i++) { + tikker_word_node_t *node = h->buckets[i]; + while (node) { + strncpy(arr[idx].word, node->word, TIKKER_MAX_WORD_LEN); + arr[idx].count = node->count; + idx++; + node = node->next; } } - return word_count; + *count = h->total_words; + return arr; +} + +static inline void tikker_hash_free(tikker_word_hash_t *h) { + for (int i = 0; i < TIKKER_HASH_SIZE; i++) { + tikker_word_node_t *node = h->buckets[i]; + while (node) { + tikker_word_node_t *next = node->next; + free(node); + node = next; + } + } + free(h); } static inline void tikker_sort_words(tikker_word_count_t *words, int count) {