diff --git a/retoor_c/isspam.py b/retoor_c/isspam.py new file mode 100644 index 0000000..2b2ce4d --- /dev/null +++ b/retoor_c/isspam.py @@ -0,0 +1,99 @@ +import os +import sys +import threading + +MAX_TEXT_LENGTH = 1024 +FORBIDDEN_WORDS_COUNT = 40 + +forbidden_words = [ + "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com", + "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency", + "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century", + "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds", +] + +class AnalysisResult: + def __init__(self, filename): + self.filename = filename + self.total_word_count = 0 + self.total_capitalized_count = 0 + self.total_sentence_count = 0 + self.total_number_count = 0 + self.total_forbidden_count = 0 + +def is_forbidden(word): + return word in forbidden_words + +def read_file(filename): + if not os.path.exists(filename): + print(f"File doesn't exist: {filename}") + return None + + content = '' + with open(filename, 'r') as file: + while True: + chunk = file.read(MAX_TEXT_LENGTH) + if not chunk: + break + content += chunk + + return content + +def analyze_file(result): + text = read_file(result.filename) + if text: + result.total_sentence_count = text.count('.') + tokens = text.split() + + for token in tokens: + result.total_word_count += 1 + + if token[0].isupper(): + result.total_capitalized_count += 1 + + if any(char.isdigit() for char in token): + result.total_number_count += 1 + + if is_forbidden(token): + result.total_forbidden_count += 1 + +def main(): + if len(sys.argv) < 2: + print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>") + return + + threads = [] + results = [] + + for filename in sys.argv[1:]: + result = AnalysisResult(filename) + results.append(result) + thread = threading.Thread(target=analyze_file, args=(result,)) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() + + total_word_count = sum(result.total_word_count for result in results) + total_capitalized_count = sum(result.total_capitalized_count for result in results) + total_sentence_count = sum(result.total_sentence_count for result in results) + total_number_count = sum(result.total_number_count for result in results) + total_forbidden_count = sum(result.total_forbidden_count for result in results) + + capitalized_percentage = (total_word_count > 0) * (total_capitalized_count / total_word_count * 100.0) + forbidden_percentage = (total_word_count > 0) * (total_forbidden_count / total_word_count * 100.0) + word_count_per_sentence = (total_sentence_count > 0) * (total_word_count / total_sentence_count) + + print(f"\nTotal Words: {total_word_count}") + print(f"Total Capitalized words: {total_capitalized_count}") + print(f"Total Sentences: {total_sentence_count}") + print(f"Total Numbers: {total_number_count}") + print(f"Total Forbidden words: {total_forbidden_count}") + print(f"Capitalized percentage: {capitalized_percentage:.6f}%") + print(f"Forbidden percentage: {forbidden_percentage:.6f}%") + print(f"Word count per sentence: {word_count_per_sentence:.6f}") + print(f"Total files read: {len(sys.argv) - 1}") + +if __name__ == "__main__": + main() \ No newline at end of file