|
#ifndef TIKKER_WORDS_H
|
|
#define TIKKER_WORDS_H
|
|
|
|
#include <ctype.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "tikker_types.h"
|
|
|
|
static inline int tikker_is_valid_word_char(char c) {
|
|
return isalnum((unsigned char)c) || c == '_';
|
|
}
|
|
|
|
static inline int tikker_word_count_compare(const void *a, const void *b) {
|
|
return ((tikker_word_count_t *)b)->count - ((tikker_word_count_t *)a)->count;
|
|
}
|
|
|
|
static inline int tikker_extract_words(const char *text, tikker_word_count_t *words, int max_words) {
|
|
int word_count = 0;
|
|
const char *p = text;
|
|
char word[TIKKER_MAX_WORD_LEN];
|
|
int word_len = 0;
|
|
|
|
while (*p) {
|
|
if (tikker_is_valid_word_char(*p)) {
|
|
if (word_len < TIKKER_MAX_WORD_LEN - 1) {
|
|
word[word_len++] = tolower((unsigned char)*p);
|
|
}
|
|
} else {
|
|
if (word_len >= 2) {
|
|
word[word_len] = '\0';
|
|
|
|
int found = 0;
|
|
for (int i = 0; i < word_count; i++) {
|
|
if (strcmp(words[i].word, word) == 0) {
|
|
words[i].count++;
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!found && word_count < max_words) {
|
|
strcpy(words[word_count].word, word);
|
|
words[word_count].count = 1;
|
|
word_count++;
|
|
}
|
|
}
|
|
word_len = 0;
|
|
}
|
|
p++;
|
|
}
|
|
|
|
if (word_len >= 2) {
|
|
word[word_len] = '\0';
|
|
int found = 0;
|
|
for (int i = 0; i < word_count; i++) {
|
|
if (strcmp(words[i].word, word) == 0) {
|
|
words[i].count++;
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!found && word_count < max_words) {
|
|
strcpy(words[word_count].word, word);
|
|
words[word_count].count = 1;
|
|
word_count++;
|
|
}
|
|
}
|
|
|
|
return word_count;
|
|
}
|
|
|
|
static inline void tikker_sort_words(tikker_word_count_t *words, int count) {
|
|
qsort(words, count, sizeof(tikker_word_count_t), tikker_word_count_compare);
|
|
}
|
|
|
|
#endif
|