77 lines
2.1 KiB
C
Raw Normal View History

2025-12-05 17:08:50 +01:00
#ifndef TIKKER_WORDS_H
#define TIKKER_WORDS_H
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "tikker_types.h"
static inline int tikker_is_valid_word_char(char c) {
return isalnum((unsigned char)c) || c == '_';
}
static inline int tikker_word_count_compare(const void *a, const void *b) {
return ((tikker_word_count_t *)b)->count - ((tikker_word_count_t *)a)->count;
}
static inline int tikker_extract_words(const char *text, tikker_word_count_t *words, int max_words) {
int word_count = 0;
const char *p = text;
char word[TIKKER_MAX_WORD_LEN];
int word_len = 0;
while (*p) {
if (tikker_is_valid_word_char(*p)) {
if (word_len < TIKKER_MAX_WORD_LEN - 1) {
word[word_len++] = tolower((unsigned char)*p);
}
} else {
if (word_len >= 2) {
word[word_len] = '\0';
int found = 0;
for (int i = 0; i < word_count; i++) {
if (strcmp(words[i].word, word) == 0) {
words[i].count++;
found = 1;
break;
}
}
if (!found && word_count < max_words) {
strcpy(words[word_count].word, word);
words[word_count].count = 1;
word_count++;
}
}
word_len = 0;
}
p++;
}
if (word_len >= 2) {
word[word_len] = '\0';
int found = 0;
for (int i = 0; i < word_count; i++) {
if (strcmp(words[i].word, word) == 0) {
words[i].count++;
found = 1;
break;
}
}
if (!found && word_count < max_words) {
strcpy(words[word_count].word, word);
words[word_count].count = 1;
word_count++;
}
}
return word_count;
}
static inline void tikker_sort_words(tikker_word_count_t *words, int count) {
qsort(words, count, sizeof(tikker_word_count_t), tikker_word_count_compare);
}
#endif