This commit is contained in:
retoor 2025-03-24 02:31:28 +01:00
parent a41f819cdf
commit e4354a7645
2 changed files with 17 additions and 30 deletions

1
.gitignore vendored
View File

@ -5,6 +5,7 @@ publish
books books
__pycache__ __pycache__
target target
./isspam.py
isspam isspam
risspam risspam
/jisspam /jisspam

View File

@ -1,16 +1,17 @@
import os import os
import sys import sys
import threading import threading
from concurrent.futures import ThreadPoolExecutor
MAX_TEXT_LENGTH = 1024 MAX_TEXT_LENGTH = 1024
FORBIDDEN_WORDS_COUNT = 40 FORBIDDEN_WORDS_COUNT = 40
forbidden_words = [ forbidden_words = set([
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com", "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency", "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century", "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds", "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
] ])
class AnalysisResult: class AnalysisResult:
def __init__(self, filename): def __init__(self, filename):
@ -29,15 +30,8 @@ def read_file(filename):
print(f"File doesn't exist: {filename}") print(f"File doesn't exist: {filename}")
return None return None
content = ''
with open(filename, 'r') as file: with open(filename, 'r') as file:
while True: return file.read()
chunk = file.read(MAX_TEXT_LENGTH)
if not chunk:
break
content += chunk
return content
def analyze_file(result): def analyze_file(result):
text = read_file(result.filename) text = read_file(result.filename)
@ -45,35 +39,27 @@ def analyze_file(result):
result.total_sentence_count = text.count('.') result.total_sentence_count = text.count('.')
tokens = text.split() tokens = text.split()
for token in tokens: result.total_word_count = len(tokens)
result.total_word_count += 1 result.total_capitalized_count = sum(1 for token in tokens if token[0].isupper())
result.total_number_count = sum(1 for token in tokens if any(char.isdigit() for char in token))
if token[0].isupper(): result.total_forbidden_count = sum(1 for token in tokens if is_forbidden(token))
result.total_capitalized_count += 1
if any(char.isdigit() for char in token):
result.total_number_count += 1
if is_forbidden(token):
result.total_forbidden_count += 1
def main(): def main():
if len(sys.argv) < 2: if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>") print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>")
return return
threads = []
results = [] results = []
for filename in sys.argv[1:]: with ThreadPoolExecutor() as executor:
result = AnalysisResult(filename) futures = []
results.append(result) for filename in sys.argv[1:]:
thread = threading.Thread(target=analyze_file, args=(result,)) result = AnalysisResult(filename)
threads.append(thread) results.append(result)
thread.start() futures.append(executor.submit(analyze_file, result))
for thread in threads: for future in futures:
thread.join() future.result()
total_word_count = sum(result.total_word_count for result in results) total_word_count = sum(result.total_word_count for result in results)
total_capitalized_count = sum(result.total_capitalized_count for result in results) total_capitalized_count = sum(result.total_capitalized_count for result in results)