parent
a41f819cdf
commit
e4354a7645
1
.gitignore
vendored
1
.gitignore
vendored
@ -5,6 +5,7 @@ publish
|
|||||||
books
|
books
|
||||||
__pycache__
|
__pycache__
|
||||||
target
|
target
|
||||||
|
./isspam.py
|
||||||
isspam
|
isspam
|
||||||
risspam
|
risspam
|
||||||
/jisspam
|
/jisspam
|
||||||
|
@ -1,16 +1,17 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
MAX_TEXT_LENGTH = 1024
|
MAX_TEXT_LENGTH = 1024
|
||||||
FORBIDDEN_WORDS_COUNT = 40
|
FORBIDDEN_WORDS_COUNT = 40
|
||||||
|
|
||||||
forbidden_words = [
|
forbidden_words = set([
|
||||||
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
|
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
|
||||||
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
|
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
|
||||||
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
|
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
|
||||||
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
|
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
|
||||||
]
|
])
|
||||||
|
|
||||||
class AnalysisResult:
|
class AnalysisResult:
|
||||||
def __init__(self, filename):
|
def __init__(self, filename):
|
||||||
@ -29,15 +30,8 @@ def read_file(filename):
|
|||||||
print(f"File doesn't exist: {filename}")
|
print(f"File doesn't exist: {filename}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
content = ''
|
|
||||||
with open(filename, 'r') as file:
|
with open(filename, 'r') as file:
|
||||||
while True:
|
return file.read()
|
||||||
chunk = file.read(MAX_TEXT_LENGTH)
|
|
||||||
if not chunk:
|
|
||||||
break
|
|
||||||
content += chunk
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
def analyze_file(result):
|
def analyze_file(result):
|
||||||
text = read_file(result.filename)
|
text = read_file(result.filename)
|
||||||
@ -45,35 +39,27 @@ def analyze_file(result):
|
|||||||
result.total_sentence_count = text.count('.')
|
result.total_sentence_count = text.count('.')
|
||||||
tokens = text.split()
|
tokens = text.split()
|
||||||
|
|
||||||
for token in tokens:
|
result.total_word_count = len(tokens)
|
||||||
result.total_word_count += 1
|
result.total_capitalized_count = sum(1 for token in tokens if token[0].isupper())
|
||||||
|
result.total_number_count = sum(1 for token in tokens if any(char.isdigit() for char in token))
|
||||||
if token[0].isupper():
|
result.total_forbidden_count = sum(1 for token in tokens if is_forbidden(token))
|
||||||
result.total_capitalized_count += 1
|
|
||||||
|
|
||||||
if any(char.isdigit() for char in token):
|
|
||||||
result.total_number_count += 1
|
|
||||||
|
|
||||||
if is_forbidden(token):
|
|
||||||
result.total_forbidden_count += 1
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>")
|
print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>")
|
||||||
return
|
return
|
||||||
|
|
||||||
threads = []
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
for filename in sys.argv[1:]:
|
with ThreadPoolExecutor() as executor:
|
||||||
result = AnalysisResult(filename)
|
futures = []
|
||||||
results.append(result)
|
for filename in sys.argv[1:]:
|
||||||
thread = threading.Thread(target=analyze_file, args=(result,))
|
result = AnalysisResult(filename)
|
||||||
threads.append(thread)
|
results.append(result)
|
||||||
thread.start()
|
futures.append(executor.submit(analyze_file, result))
|
||||||
|
|
||||||
for thread in threads:
|
for future in futures:
|
||||||
thread.join()
|
future.result()
|
||||||
|
|
||||||
total_word_count = sum(result.total_word_count for result in results)
|
total_word_count = sum(result.total_word_count for result in results)
|
||||||
total_capitalized_count = sum(result.total_capitalized_count for result in results)
|
total_capitalized_count = sum(result.total_capitalized_count for result in results)
|
||||||
|
Loading…
Reference in New Issue
Block a user