diff --git a/.gitignore b/.gitignore index fb2db35..206928b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ publish books __pycache__ target +./isspam.py isspam risspam /jisspam diff --git a/retoor_c/isspam.py b/retoor_c/isspam.py index 2b2ce4d..a62705c 100644 --- a/retoor_c/isspam.py +++ b/retoor_c/isspam.py @@ -1,16 +1,17 @@ import os import sys import threading +from concurrent.futures import ThreadPoolExecutor MAX_TEXT_LENGTH = 1024 FORBIDDEN_WORDS_COUNT = 40 -forbidden_words = [ +forbidden_words = set([ "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com", "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency", "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century", "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds", -] +]) class AnalysisResult: def __init__(self, filename): @@ -29,15 +30,8 @@ def read_file(filename): print(f"File doesn't exist: {filename}") return None - content = '' with open(filename, 'r') as file: - while True: - chunk = file.read(MAX_TEXT_LENGTH) - if not chunk: - break - content += chunk - - return content + return file.read() def analyze_file(result): text = read_file(result.filename) @@ -45,35 +39,27 @@ def analyze_file(result): result.total_sentence_count = text.count('.') tokens = text.split() - for token in tokens: - result.total_word_count += 1 - - if token[0].isupper(): - result.total_capitalized_count += 1 - - if any(char.isdigit() for char in token): - result.total_number_count += 1 - - if is_forbidden(token): - result.total_forbidden_count += 1 + result.total_word_count = len(tokens) + result.total_capitalized_count = sum(1 for token in tokens if token[0].isupper()) + result.total_number_count = sum(1 for token in tokens if any(char.isdigit() for char in token)) + result.total_forbidden_count = sum(1 for token in tokens if is_forbidden(token)) def main(): if len(sys.argv) < 2: print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>") return - threads = [] results = [] - for filename in sys.argv[1:]: - result = AnalysisResult(filename) - results.append(result) - thread = threading.Thread(target=analyze_file, args=(result,)) - threads.append(thread) - thread.start() + with ThreadPoolExecutor() as executor: + futures = [] + for filename in sys.argv[1:]: + result = AnalysisResult(filename) + results.append(result) + futures.append(executor.submit(analyze_file, result)) - for thread in threads: - thread.join() + for future in futures: + future.result() total_word_count = sum(result.total_word_count for result in results) total_capitalized_count = sum(result.total_capitalized_count for result in results)