|
// Author: retoor@molodetz.nl
|
|
|
|
// This program analyzes text files for word counts, capitalized words, sentences, numbers, and forbidden words.
|
|
|
|
/*
|
|
MIT License
|
|
|
|
Copyright (c) 2025 retoor
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
... (full license text)
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <stdlib.h>
|
|
#include <pthread.h>
|
|
|
|
#define MAX_TEXT_LENGTH 1024
|
|
#define FORBIDDEN_WORDS_COUNT 40
|
|
|
|
const char* forbidden_words[FORBIDDEN_WORDS_COUNT] = {
|
|
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
|
|
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
|
|
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
|
|
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
|
|
NULL
|
|
};
|
|
|
|
typedef struct {
|
|
char *filename;
|
|
long long total_word_count;
|
|
long long total_capitalized_count;
|
|
long long total_sentence_count;
|
|
long long total_number_count;
|
|
long long total_forbidden_count;
|
|
} AnalysisResult;
|
|
|
|
int is_forbidden(const char* word) {
|
|
for (size_t i = 0; forbidden_words[i] != NULL; i++) {
|
|
if (strcmp(word, forbidden_words[i]) == 0) {
|
|
return 1; // Word is forbidden
|
|
}
|
|
}
|
|
return 0; // Word is not forbidden
|
|
}
|
|
|
|
char* read_file(const char* filename) {
|
|
FILE *file = fopen(filename, "r");
|
|
if (!file) {
|
|
printf("File doesn't exist: %s\n", filename);
|
|
return NULL;
|
|
}
|
|
|
|
char *content = NULL;
|
|
size_t content_size = 0;
|
|
size_t bytes_read;
|
|
|
|
do {
|
|
char *new_content = (char *)realloc(content, content_size + MAX_TEXT_LENGTH);
|
|
if (!new_content) {
|
|
free(content);
|
|
fclose(file);
|
|
printf("Memory allocation failed while reading file: %s\n", filename);
|
|
return NULL;
|
|
}
|
|
content = new_content;
|
|
bytes_read = fread(content + content_size, 1, MAX_TEXT_LENGTH, file);
|
|
content_size += bytes_read;
|
|
} while (bytes_read == MAX_TEXT_LENGTH);
|
|
|
|
content[content_size] = '\0'; // Null-terminate the string
|
|
fclose(file);
|
|
return content;
|
|
}
|
|
|
|
void* analyze_file(void* arg) {
|
|
AnalysisResult *result = (AnalysisResult *)arg;
|
|
char *text = read_file(result->filename);
|
|
if (text) {
|
|
long long word_count = 0;
|
|
long long capitalized_count = 0;
|
|
long long sentence_count = 0;
|
|
long long number_count = 0;
|
|
long long forbidden_count = 0;
|
|
|
|
for (size_t i = 0; text[i] != '\0'; i++) {
|
|
if (text[i] == '.') {
|
|
sentence_count++;
|
|
}
|
|
}
|
|
|
|
char *saveptr;
|
|
char* token = strtok_r(text, " \f\v\r\n\t", &saveptr);
|
|
while (token != NULL) {
|
|
word_count++;
|
|
|
|
if (isupper(token[0])) {
|
|
capitalized_count++;
|
|
}
|
|
|
|
for (size_t i = 0; token[i] != '\0'; i++) {
|
|
if (isdigit(token[i])) {
|
|
number_count++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (is_forbidden(token)) {
|
|
forbidden_count++;
|
|
}
|
|
|
|
token = strtok_r(NULL, " \f\v\r\n\t", &saveptr);
|
|
}
|
|
|
|
result->total_word_count = word_count;
|
|
result->total_capitalized_count = capitalized_count;
|
|
result->total_sentence_count = sentence_count;
|
|
result->total_number_count = number_count;
|
|
result->total_forbidden_count = forbidden_count;
|
|
|
|
free(text);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
if (argc < 2) {
|
|
printf("Usage: %s <file1> <file2> ... <fileN>\n", argv[0]);
|
|
return 1;
|
|
}
|
|
|
|
pthread_t threads[argc - 1];
|
|
AnalysisResult results[argc - 1];
|
|
|
|
for (size_t i = 1; i < argc; i++) {
|
|
results[i - 1].filename = argv[i];
|
|
if (pthread_create(&threads[i - 1], NULL, analyze_file, &results[i - 1]) != 0) {
|
|
printf("Error creating thread for file: %s\n", argv[i]);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
for (size_t i = 1; i < argc; i++) {
|
|
pthread_join(threads[i - 1], NULL);
|
|
}
|
|
|
|
long long total_word_count = 0;
|
|
long long total_capitalized_count = 0;
|
|
long long total_sentence_count = 0;
|
|
long long total_number_count = 0;
|
|
long long total_forbidden_count = 0;
|
|
|
|
for (size_t i = 0; i < argc - 1; i++) {
|
|
total_word_count += results[i].total_word_count;
|
|
total_capitalized_count += results[i].total_capitalized_count;
|
|
total_sentence_count += results[i].total_sentence_count;
|
|
total_number_count += results[i].total_number_count;
|
|
total_forbidden_count += results[i].total_forbidden_count;
|
|
}
|
|
|
|
double capitalized_percentage = (total_word_count > 0) ? (double)total_capitalized_count / total_word_count * 100.0 : 0;
|
|
double forbidden_percentage = (total_word_count > 0) ? (double)total_forbidden_count / total_word_count * 100.0 : 0;
|
|
double word_count_per_sentence = (total_sentence_count > 0) ? (double)total_word_count / total_sentence_count : 0;
|
|
|
|
printf("\nTotal Words: %lld\n", total_word_count);
|
|
printf("Total Capitalized words: %lld\n", total_capitalized_count);
|
|
printf("Total Sentences: %lld\n", total_sentence_count);
|
|
printf("Total Numbers: %lld\n", total_number_count);
|
|
printf("Total Forbidden words: %lld\n", total_forbidden_count);
|
|
printf("Capitalized percentage: %.6f%%\n", capitalized_percentage);
|
|
printf("Forbidden percentage: %.6f%%\n", forbidden_percentage);
|
|
printf("Word count per sentence: %.6f\n", word_count_per_sentence);
|
|
printf("Total files read: %d\n", (int)(argc - 1));
|
|
return 0;
|
|
}
|