#include "rmalloc.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "rstring_list.h"
#include "rstr.h"
#include <ctype.h>

#define sl rstring_list_t
#define slf rstring_list_free
#define sla rstring_list_add
#define sln rstring_list_new
#define rb rbuffer_t
#define rbf rbuffer_free
#define rbs rbuffer_to_string
#define rbw rbuffer_write
#define rbn rbuffer_new

char *forbidden_words[] = {
    "recovery",    "techie",    "http",     "https",   "digital",    "hack",          "::",       "//",    "com",
    "@",           "crypto",    "bitcoin",  "wallet",  "hacker",     "welcome",       "whatsapp", "email", "cryptocurrency",
    "stolen",      "freeze",    "quick",    "crucial", "tracing",    "scammers",      "expers",   "hire",  "century",
    "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets",   "funds", NULL};


bool show_capitalized = false;
bool show_sentences = false;
bool show_words = false;
bool show_numbers = false;
bool show_forbidden_words = true;




bool file_exists(char * path){
    FILE * f = fopen(path, "r");
    bool result = f != NULL;
    if(f){
        fclose(f);
    }
    return result;
}

void sld(sl *lst) {
    for (ulonglong i = 0; i < lst->count; i++) {
        printf("<%llu:%s>\n", i, lst->strings[i]);
    }
}

char *remove_preserved_chars(char *content) {
    char *cc = (char *)malloc(strlen(content) + 1);
    *cc = 0;
    char *ccp = cc;
    while (*content) {
        if (*content == '<' || *content == '>' || *content == ':') {
            content++;
            continue;
        }
        *ccp = *content;
        ccp++;
        *ccp = 0;
        content++;
    }
    return cc;
}

char *slds(sl *lst) {
    str_t *buffer = strn(1337);
    for (ulonglong i = 0; i < lst->count; i++) {
        char *temp = (char *)malloc(strlen(lst->strings[i]) + 20);
        char *cc = remove_preserved_chars(lst->strings[i]);
        sprintf(temp, "<%llu:%s>\n", i, cc);
        free(cc);
        stra(buffer, temp);
        free(temp);
    }
    return strc(buffer);
}

bool isws(char c) { return c == '\t' || c == '\n' || c == ' ' || c == ','; }

char *stripws(char *content) {
    char *cc = (char *)malloc(strlen(content) + 1);
    *cc = 0;
    char *ccp = cc;
    while (*content) {
        if (!isws(*content)) {
            *ccp = *content;
            ccp++;
            *ccp = 0;
        }
        content++;
    }
    return cc;
}

char *fread_till_eof(FILE *f) {
    char c;
    str_t *buffer = strn(1337);
    while ((c = fgetc(f)) != EOF) {
        strac(buffer, c);
    }
    char *content = strc(buffer);
    return content;
}

rstring_list_t *get_sentences(char *content) {

    rstring_list_t *sentences = rstring_list_new();
    char *sentence_buffer = (char *)malloc(strlen(content) + 1);
    char *sentence_buffer_p = sentence_buffer;
    // rbuffer_t * buffer = rbuffer_new(NULL,0);
    bool in_line = false;
    while (*content) {
        if ((*content == ' ' || *content == '\t' || *content == '\n') && !in_line) {
            content++;
            continue;
        } else {
            in_line = true;
        }
        if (*content == '.') {
            *sentence_buffer_p = *content;
            sentence_buffer_p++;
            *sentence_buffer_p = 0;
            rstring_list_add(sentences, sentence_buffer);
            sentence_buffer_p = sentence_buffer;
            *sentence_buffer = 0;
            content++;
            in_line = false;
            continue;
        }
        *sentence_buffer_p = *content;
        sentence_buffer_p++;
        *sentence_buffer_p = 0;
        content++;
    }
    free(sentence_buffer);
    return sentences;
}

rstring_list_t *get_words(char *content) {
    rstring_list_t *words = rstring_list_new();
    char *word_buffer = (char *)malloc(strlen(content) + 1);
    char *word_buffer_p = word_buffer;
    *word_buffer_p = 0;
    // rbuffer_t * buffer = rbuffer_new(NULL,0);
    while (*content) {
        if (*content == ' ' || *content == '\t' || *content == '\n') {
            if (word_buffer_p != word_buffer) {
                rstring_list_add(words, word_buffer);
                word_buffer_p = word_buffer;
                *word_buffer = 0;
            }
            content++;
            continue;
        }
        *word_buffer_p = *content;
        word_buffer_p++;
        *word_buffer_p = 0;
        content++;
    }
    free(word_buffer);
    return words;
}

bool is_fully_capitalized_word(char *word) {
    while (*word) {
        if (isalnum(*word) && toupper(*word) != *word)
            return false;
        word++;
    }
    return true;
}

sl *get_capitalized_words(char *content) {
    sl *capitalized_words = sln();
    sl *sentences = get_sentences(content);
    for (uint j = 0; j < sentences->count; j++) {
        char *sentence = sentences->strings[j];
        sl *all_words = get_words(sentence);

        // Always skip the first word since sentences start with
        for (uint i = 0; i < all_words->count; i++) {
            if (is_fully_capitalized_word(all_words->strings[i])) {
                rstring_list_add(capitalized_words, all_words->strings[i]);
            }
        }
        slf(all_words);
    }
    slf(sentences);
    return capitalized_words;
}

char *clean_content(char *content) {
    char *allowed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?";
    char *clean_content = (char *)malloc(strlen(content) + 1);
    char *clean_content_p = clean_content;
    *clean_content_p = 0;
    while (*content) {
        if (strchr(allowed_ichars, tolower(*content))) {
            *clean_content_p = *content;
            clean_content_p++;
            *clean_content_p = 0;
        }
        content++;
    }
    return clean_content;
}

sl *get_numbers(char *content) {
    char *cc = clean_content(content);
    char *ccc = stripws(cc);
    char *cccp = ccc;
    free(cc);
    char *number_buffer = (char *)malloc(strlen(ccc) + 1);
    *number_buffer = 0;
    char *number_buffer_p = number_buffer;
    sl *numbers = sln();
    while (*cccp) {
        if (isdigit((*cccp))) {
            *number_buffer_p = *cccp;
            number_buffer_p++;
            *number_buffer_p = 0;
        } else if (number_buffer != number_buffer_p) {
            sla(numbers, number_buffer);
            *number_buffer = 0;
            number_buffer_p = number_buffer;
        }
        cccp++;
    }
    free(number_buffer);
    free(ccc);
    return numbers;
}

bool stricmp(char *word1, char *word2) {
    while (*word1 && tolower(*word1) == tolower(*word2)) {
        word1++;
        word2++;
    }
    return *word1 == *word2;
}

bool containswordi(sl *words, char *word) {
    for (uint i = 0; i < words->count; i++) {
        if (stricmp(words->strings[i], word))
            return true;
    }
    return false;
}

sl *get_forbidden_words(char *content) {
    sl *words = get_words(content);
    sl *found = sln();
    for (int j = 0; forbidden_words[j] != NULL; j++) {
        if (containswordi(words, forbidden_words[j])) {
            rstring_list_add(found, forbidden_words[j]);
        }
    }

    slf(words);
    return found;
}
unsigned int total = 0;

void analyze(FILE *f) {
    total = total + 1;

    printf("#%u\n", total);
    char *data = fread_till_eof(f);

    str_t *all = strn(1337);
    char *sbuf = NULL;

    char *clean_data = clean_content(data);

    free(clean_data);

    // All capitalized words
    sl *capitalized_words = get_capitalized_words(data);
    ulonglong capitalized_words_count = capitalized_words->count;
    printf("Capitalized words: %llu\n", capitalized_words_count);
    if(show_capitalized)
        sld(capitalized_words);
    sbuf = slds(capitalized_words);
    stra(all, sbuf);
    free(sbuf);

    sl *sentences = get_sentences(data);

    // All sentences
    printf("Sentences: %llu\n", sentences->count);
    if(show_sentences)
    sld(sentences);
    sbuf = slds(sentences);
    stra(all, sbuf);
    free(sbuf);

    sl *words = get_words(data);

    // All words
    printf("Words: %llu\n", words->count);
    if(show_words)
    sld(words);
    sbuf = slds(words);
    stra(all, sbuf);
    free(sbuf);

    // Numbers
    sl *numbers = get_numbers(data);
    printf("Numbers: %llu\n", numbers->count);
    if(show_numbers)
        sld(numbers);
    sbuf = slds(numbers);
    stra(all, sbuf);
    free(sbuf);

    // Forbidden words
    sl *fw = get_forbidden_words(data);
    printf("Forbidden words: %llu\n", fw->count);
    if(show_forbidden_words)
    sld(fw);
    sbuf = slds(fw);
    stra(all, sbuf);
    free(sbuf);
    strd(all);
    if(words->count){
    double capitalized_word_percentage = 100 * ((double)capitalized_words->count / (double)words->count);
   
    printf("Capitalized percentage: %f%%\n",capitalized_word_percentage);
    double forbidden_word_percentage = 100 *  ((double)fw->count / (double)words->count);
    printf("Forbidden percentage: %f%%\n",forbidden_word_percentage);
    ulonglong word_count_per_sentence = words->count / (sentences->count ? sentences->count : 1);
    printf("Word count per sentence: %llu\n", word_count_per_sentence);
    }
    slf(capitalized_words);
    slf(sentences);
    slf(words);
    slf(numbers);
    slf(fw);

    free(data);
}

void analyze_file(char *path) {
    FILE *f = fopen(path, "r");
    analyze(f);
    fclose(f);
}

int main(int argc, char *argv[]) {
   
    if (argc > 1) {
        for (int i = 1; i < argc; i++) {
            if(!strcmp(argv[1],"--hide-capitalized")){
                show_capitalized=false;
            }else if(!strcmp(argv[1],"--show-sentences")){
                show_sentences=true;
            }else if(!strcmp(argv[1],"--show-words")){
                show_words=true;
            }else if(!strcmp(argv[1],"--show-numbers")){
                show_words=true;
            }else if(!strcmp(argv[1],"--hide-forbidden-words")){
                show_forbidden_words=false;
            }else if(!strcmp(argv[1],"help") || !strcmp(argv[1],"--help")){
                printf("%s", 
                    "Usage: spam [file] [file] [file]\n"
                    "Flag defaults:\n"
                    "  hide-capitalized = true\n"
                    "  show-sentences = false\n"
                    "  show-words = false\n"
                    "  show-numbers = false\n"
                    "  hide-forbidden-words = false\n");
                return 0;
            }

            printf("File: %s\n", argv[i]);
            analyze_file(argv[i]);
            printf("%s\n", rmalloc_stats());
            printf("\n");
        }

        return 0;
    }
    analyze(stdin);
    printf("%s\n", rmalloc_stats());
    return 0;
}