updates.

2025-03-24 02:31:28 +01:00 · 2025-03-24 02:31:28 +01:00 · e4354a7645
commit e4354a7645
parent a41f819cdf
2 changed files with 17 additions and 30 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,6 +5,7 @@ publish
 books
 __pycache__
 target
 ./isspam.py
 isspam
 risspam
 /jisspam
--- a/retoor_c/isspam.py
+++ b/retoor_c/isspam.py
@ -1,16 +1,17 @@
 import os
 import sys
 import threading
 from concurrent.futures import ThreadPoolExecutor
 MAX_TEXT_LENGTH = 1024
 FORBIDDEN_WORDS_COUNT = 40
-forbidden_words = [
+forbidden_words = set([
    "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
    "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
    "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
    "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
-]
+])
 class AnalysisResult:
    def __init__(self, filename):
@ -29,15 +30,8 @@ def read_file(filename):
        print(f"File doesn't exist: {filename}")
        return None
    content = ''
    with open(filename, 'r') as file:
-        while True:
+        return file.read()
            chunk = file.read(MAX_TEXT_LENGTH)
            if not chunk:
                break
            content += chunk
    return content
 def analyze_file(result):
    text = read_file(result.filename)
@ -45,35 +39,27 @@ def analyze_file(result):
        result.total_sentence_count = text.count('.')
        tokens = text.split()
-        for token in tokens:
+        result.total_word_count = len(tokens)
-            result.total_word_count += 1
+        result.total_capitalized_count = sum(1 for token in tokens if token[0].isupper())
-
+        result.total_number_count = sum(1 for token in tokens if any(char.isdigit() for char in token))
-            if token[0].isupper():
+        result.total_forbidden_count = sum(1 for token in tokens if is_forbidden(token))
                result.total_capitalized_count += 1
            if any(char.isdigit() for char in token):
                result.total_number_count += 1
            if is_forbidden(token):
                result.total_forbidden_count += 1
 def main():
    if len(sys.argv) < 2:
        print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>")
        return
    threads = []
    results = []
-    for filename in sys.argv[1:]:
+    with ThreadPoolExecutor() as executor:
-        result = AnalysisResult(filename)
+        futures = []
-        results.append(result)
+        for filename in sys.argv[1:]:
-        thread = threading.Thread(target=analyze_file, args=(result,))
+            result = AnalysisResult(filename)
-        threads.append(thread)
+            results.append(result)
-        thread.start()
+            futures.append(executor.submit(analyze_file, result))
-    for thread in threads:
+        for future in futures:
-        thread.join()
+            future.result()
    total_word_count = sum(result.total_word_count for result in results)
    total_capitalized_count = sum(result.total_capitalized_count for result in results)