#ifndef TIKKER_WORDS_H #define TIKKER_WORDS_H #include #include #include #include "tikker_types.h" #define TIKKER_HASH_SIZE 65536 typedef struct tikker_word_node { char word[TIKKER_MAX_WORD_LEN]; int count; struct tikker_word_node *next; } tikker_word_node_t; typedef struct { tikker_word_node_t *buckets[TIKKER_HASH_SIZE]; int total_words; } tikker_word_hash_t; static inline int tikker_is_valid_word_char(char c) { return isalnum((unsigned char)c) || c == '_'; } static inline unsigned int tikker_hash_word(const char *word) { unsigned int hash = 5381; while (*word) { hash = ((hash << 5) + hash) + (unsigned char)*word++; } return hash % TIKKER_HASH_SIZE; } static inline tikker_word_hash_t *tikker_hash_create(void) { tikker_word_hash_t *h = calloc(1, sizeof(tikker_word_hash_t)); return h; } static inline void tikker_hash_insert(tikker_word_hash_t *h, const char *word) { unsigned int idx = tikker_hash_word(word); tikker_word_node_t *node = h->buckets[idx]; while (node) { if (strcmp(node->word, word) == 0) { node->count++; return; } node = node->next; } node = malloc(sizeof(tikker_word_node_t)); strncpy(node->word, word, TIKKER_MAX_WORD_LEN - 1); node->word[TIKKER_MAX_WORD_LEN - 1] = '\0'; node->count = 1; node->next = h->buckets[idx]; h->buckets[idx] = node; h->total_words++; } static inline int tikker_word_count_compare(const void *a, const void *b) { return ((tikker_word_count_t *)b)->count - ((tikker_word_count_t *)a)->count; } static inline tikker_word_count_t *tikker_hash_to_array(tikker_word_hash_t *h, int *count) { tikker_word_count_t *arr = malloc(h->total_words * sizeof(tikker_word_count_t)); int idx = 0; for (int i = 0; i < TIKKER_HASH_SIZE; i++) { tikker_word_node_t *node = h->buckets[i]; while (node) { strncpy(arr[idx].word, node->word, TIKKER_MAX_WORD_LEN); arr[idx].count = node->count; idx++; node = node->next; } } *count = h->total_words; return arr; } static inline void tikker_hash_free(tikker_word_hash_t *h) { for (int i = 0; i < TIKKER_HASH_SIZE; i++) { tikker_word_node_t *node = h->buckets[i]; while (node) { tikker_word_node_t *next = node->next; free(node); node = next; } } free(h); } static inline void tikker_sort_words(tikker_word_count_t *words, int count) { qsort(words, count, sizeof(tikker_word_count_t), tikker_word_count_compare); } #endif