Added python version.

2025-03-22 23:55:48 +01:00 · 2025-03-22 23:55:48 +01:00 · 2f2710df41
commit 2f2710df41
parent fbb1db28d7
1 changed files with 99 additions and 0 deletions
--- a/retoor_c/isspam.py
+++ b/retoor_c/isspam.py
@ -0,0 +1,99 @@
+import os
+import sys
+import threading
+
+MAX_TEXT_LENGTH = 1024
+FORBIDDEN_WORDS_COUNT = 40
+
+forbidden_words = [
+    "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
+    "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
+    "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
+    "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
+]
+
+class AnalysisResult:
+    def __init__(self, filename):
+        self.filename = filename
+        self.total_word_count = 0
+        self.total_capitalized_count = 0
+        self.total_sentence_count = 0
+        self.total_number_count = 0
+        self.total_forbidden_count = 0
+
+def is_forbidden(word):
+    return word in forbidden_words
+
+def read_file(filename):
+    if not os.path.exists(filename):
+        print(f"File doesn't exist: {filename}")
+        return None
+
+    content = ''
+    with open(filename, 'r') as file:
+        while True:
+            chunk = file.read(MAX_TEXT_LENGTH)
+            if not chunk:
+                break
+            content += chunk
+
+    return content
+
+def analyze_file(result):
+    text = read_file(result.filename)
+    if text:
+        result.total_sentence_count = text.count('.')
+        tokens = text.split()
+
+        for token in tokens:
+            result.total_word_count += 1
+
+            if token[0].isupper():
+                result.total_capitalized_count += 1
+
+            if any(char.isdigit() for char in token):
+                result.total_number_count += 1
+
+            if is_forbidden(token):
+                result.total_forbidden_count += 1
+
+def main():
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>")
+        return
+
+    threads = []
+    results = []
+
+    for filename in sys.argv[1:]:
+        result = AnalysisResult(filename)
+        results.append(result)
+        thread = threading.Thread(target=analyze_file, args=(result,))
+        threads.append(thread)
+        thread.start()
+
+    for thread in threads:
+        thread.join()
+
+    total_word_count = sum(result.total_word_count for result in results)
+    total_capitalized_count = sum(result.total_capitalized_count for result in results)
+    total_sentence_count = sum(result.total_sentence_count for result in results)
+    total_number_count = sum(result.total_number_count for result in results)
+    total_forbidden_count = sum(result.total_forbidden_count for result in results)
+
+    capitalized_percentage = (total_word_count > 0) * (total_capitalized_count / total_word_count * 100.0)
+    forbidden_percentage = (total_word_count > 0) * (total_forbidden_count / total_word_count * 100.0)
+    word_count_per_sentence = (total_sentence_count > 0) * (total_word_count / total_sentence_count)
+
+    print(f"\nTotal Words: {total_word_count}")
+    print(f"Total Capitalized words: {total_capitalized_count}")
+    print(f"Total Sentences: {total_sentence_count}")
+    print(f"Total Numbers: {total_number_count}")
+    print(f"Total Forbidden words: {total_forbidden_count}")
+    print(f"Capitalized percentage: {capitalized_percentage:.6f}%")
+    print(f"Forbidden percentage: {forbidden_percentage:.6f}%")
+    print(f"Word count per sentence: {word_count_per_sentence:.6f}")
+    print(f"Total files read: {len(sys.argv) - 1}")
+
+if __name__ == "__main__":
+    main()