#include "rmalloc.h" #include #include #include #include #include "rstring_list.h" #include "rstr.h" #include #define sl rstring_list_t #define slf rstring_list_free #define sla rstring_list_add #define sln rstring_list_new #define rb rbuffer_t #define rbf rbuffer_free #define rbs rbuffer_to_string #define rbw rbuffer_write #define rbn rbuffer_new char *forbidden_words[] = { "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com", "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency", "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century", "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds", NULL}; bool show_capitalized = false; bool show_sentences = false; bool show_words = false; bool show_numbers = false; bool show_forbidden_words = true; bool file_exists(char * path){ FILE * f = fopen(path, "r"); bool result = f != NULL; if(f){ fclose(f); } return result; } void sld(sl *lst) { for (uint i = 0; i < lst->count; i++) { printf("<%u:%s>\n", i, lst->strings[i]); } } char *remove_preserved_chars(char *content) { char *cc = (char *)malloc(strlen(content) + 1); *cc = 0; char *ccp = cc; while (*content) { if (*content == '<' || *content == '>' || *content == ':') { content++; continue; } *ccp = *content; ccp++; *ccp = 0; content++; } return cc; } char *slds(sl *lst) { str_t *buffer = strn(1337); for (uint i = 0; i < lst->count; i++) { char *temp = (char *)malloc(strlen(lst->strings[i]) + 20); char *cc = remove_preserved_chars(lst->strings[i]); sprintf(temp, "<%u:%s>\n", i, cc); free(cc); stra(buffer, temp); free(temp); } return strc(buffer); } bool isws(char c) { return c == '\t' || c == '\n' || c == ' ' || c == ','; } char *stripws(char *content) { char *cc = (char *)malloc(strlen(content) + 1); *cc = 0; char *ccp = cc; while (*content) { if (!isws(*content)) { *ccp = *content; ccp++; *ccp = 0; } content++; } return cc; } char *fread_till_eof(FILE *f) { char c; str_t *buffer = strn(1337); while ((c = fgetc(f)) != EOF) { strac(buffer, c); } char *content = strc(buffer); return content; } rstring_list_t *get_sentences(char *content) { rstring_list_t *sentences = rstring_list_new(); char *sentence_buffer = (char *)malloc(strlen(content) + 1); char *sentence_buffer_p = sentence_buffer; // rbuffer_t * buffer = rbuffer_new(NULL,0); bool in_line = false; while (*content) { if ((*content == ' ' || *content == '\t' || *content == '\n') && !in_line) { content++; continue; } else { in_line = true; } if (*content == '.') { *sentence_buffer_p = *content; sentence_buffer_p++; *sentence_buffer_p = 0; rstring_list_add(sentences, sentence_buffer); sentence_buffer_p = sentence_buffer; *sentence_buffer = 0; content++; in_line = false; continue; } *sentence_buffer_p = *content; sentence_buffer_p++; *sentence_buffer_p = 0; content++; } free(sentence_buffer); return sentences; } rstring_list_t *get_words(char *content) { rstring_list_t *words = rstring_list_new(); char *word_buffer = (char *)malloc(strlen(content) + 1); char *word_buffer_p = word_buffer; *word_buffer_p = 0; // rbuffer_t * buffer = rbuffer_new(NULL,0); while (*content) { if (*content == ' ' || *content == '\t' || *content == '\n') { if (word_buffer_p != word_buffer) { rstring_list_add(words, word_buffer); word_buffer_p = word_buffer; *word_buffer = 0; } content++; continue; } *word_buffer_p = *content; word_buffer_p++; *word_buffer_p = 0; content++; } free(word_buffer); return words; } bool is_fully_capitalized_word(char *word) { while (*word) { if (isalnum(*word) && toupper(*word) != *word) return false; word++; } return true; } sl *get_capitalized_words(char *content) { sl *capitalized_words = sln(); sl *sentences = get_sentences(content); for (uint j = 0; j < sentences->count; j++) { char *sentence = sentences->strings[j]; sl *all_words = get_words(sentence); // Always skip the first word since sentences start with for (uint i = 0; i < all_words->count; i++) { if (is_fully_capitalized_word(all_words->strings[i])) { rstring_list_add(capitalized_words, all_words->strings[i]); } } slf(all_words); } slf(sentences); return capitalized_words; } char *clean_content(char *content) { char *allowed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?"; char *clean_content = (char *)malloc(strlen(content) + 1); char *clean_content_p = clean_content; *clean_content_p = 0; while (*content) { if (strchr(allowed_ichars, tolower(*content))) { *clean_content_p = *content; clean_content_p++; *clean_content_p = 0; } content++; } return clean_content; } sl *get_numbers(char *content) { char *cc = clean_content(content); char *ccc = stripws(cc); char *cccp = ccc; free(cc); char *number_buffer = (char *)malloc(strlen(ccc) + 1); *number_buffer = 0; char *number_buffer_p = number_buffer; sl *numbers = sln(); while (*cccp) { if (isdigit((*cccp))) { *number_buffer_p = *cccp; number_buffer_p++; *number_buffer_p = 0; } else if (number_buffer != number_buffer_p) { sla(numbers, number_buffer); *number_buffer = 0; number_buffer_p = number_buffer; } cccp++; } free(number_buffer); free(ccc); return numbers; } bool stricmp(char *word1, char *word2) { while (*word1 && tolower(*word1) == tolower(*word2)) { word1++; word2++; } return *word1 == *word2; } bool containswordi(sl *words, char *word) { for (uint i = 0; i < words->count; i++) { if (stricmp(words->strings[i], word)) return true; } return false; } sl *get_forbidden_words(char *content) { sl *words = get_words(content); sl *found = sln(); for (int j = 0; forbidden_words[j] != NULL; j++) { if (containswordi(words, forbidden_words[j])) { rstring_list_add(found, forbidden_words[j]); } } slf(words); return found; } void analyze(FILE *f) { char *data = fread_till_eof(f); str_t *all = strn(1337); char *sbuf = NULL; char *clean_data = clean_content(data); free(clean_data); // All capitalized words sl *capitalized_words = get_capitalized_words(data); uint capitalized_words_count = capitalized_words->count; printf("Capitalized words: %u\n", capitalized_words_count); if(show_capitalized) sld(capitalized_words); sbuf = slds(capitalized_words); stra(all, sbuf); free(sbuf); sl *sentences = get_sentences(data); // All sentences printf("Sentences: %u\n", sentences->count); if(show_sentences) sld(sentences); sbuf = slds(sentences); stra(all, sbuf); free(sbuf); sl *words = get_words(data); // All words printf("Words: %u\n", words->count); if(show_words) sld(words); sbuf = slds(words); stra(all, sbuf); free(sbuf); // Numbers sl *numbers = get_numbers(data); printf("Numbers: %u\n", numbers->count); if(show_numbers) sld(numbers); sbuf = slds(numbers); stra(all, sbuf); free(sbuf); // Forbidden words sl *fw = get_forbidden_words(data); printf("Forbidden words: %u\n", fw->count); if(show_forbidden_words) sld(fw); sbuf = slds(fw); stra(all, sbuf); free(sbuf); strd(all); uint word_count_per_sentence = words->count / sentences->count; printf("Word count per sentence: %u\n", word_count_per_sentence); slf(capitalized_words); slf(sentences); slf(words); slf(numbers); slf(fw); free(data); } void analyze_file(char *path) { FILE *f = fopen(path, "r"); analyze(f); fclose(f); } int main(int argc, char *argv[]) { if (argc > 1) { for (int i = 1; i < argc; i++) { if(!strcmp(argv[1],"--hide-capitalized")){ show_capitalized=false; }else if(!strcmp(argv[1],"--show-sentences")){ show_sentences=true; }else if(!strcmp(argv[1],"--show-words")){ show_words=true; }else if(!strcmp(argv[1],"--show-numbers")){ show_words=true; }else if(!strcmp(argv[1],"--hide-forbidden-words")){ show_forbidden_words=false; }else if(!strcmp(argv[1],"help") || !strcmp(argv[1],"--help")){ printf("%s", "Usage: spam [file] [file] [file]\n" "Flag defaults:\n" " hide-capitalized = true\n" " show-sentences = false\n" " show-words = false\n" " show-numbers = false\n" " hide-forbidden-words = false\n"); return 0; } printf("File: %s\n", argv[i]); analyze_file(argv[i]); printf("%s\n", rmalloc_stats()); printf("\n"); } return 0; } analyze(stdin); printf("%s\n", rmalloc_stats()); return 0; }