|
import os
|
|
import sys
|
|
import threading
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
MAX_TEXT_LENGTH = 1024
|
|
FORBIDDEN_WORDS_COUNT = 40
|
|
|
|
forbidden_words = set([
|
|
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
|
|
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
|
|
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
|
|
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
|
|
])
|
|
|
|
class AnalysisResult:
|
|
def __init__(self, filename):
|
|
self.filename = filename
|
|
self.total_word_count = 0
|
|
self.total_capitalized_count = 0
|
|
self.total_sentence_count = 0
|
|
self.total_number_count = 0
|
|
self.total_forbidden_count = 0
|
|
|
|
def is_forbidden(word):
|
|
return word in forbidden_words
|
|
|
|
def read_file(filename):
|
|
if not os.path.exists(filename):
|
|
print(f"File doesn't exist: {filename}")
|
|
return None
|
|
|
|
with open(filename, 'r') as file:
|
|
return file.read()
|
|
|
|
def analyze_file(result):
|
|
text = read_file(result.filename)
|
|
if text:
|
|
result.total_sentence_count = text.count('.')
|
|
tokens = text.split()
|
|
|
|
result.total_word_count = len(tokens)
|
|
result.total_capitalized_count = sum(1 for token in tokens if token[0].isupper())
|
|
result.total_number_count = sum(1 for token in tokens if any(char.isdigit() for char in token))
|
|
result.total_forbidden_count = sum(1 for token in tokens if is_forbidden(token))
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>")
|
|
return
|
|
|
|
results = []
|
|
|
|
with ThreadPoolExecutor() as executor:
|
|
futures = []
|
|
for filename in sys.argv[1:]:
|
|
result = AnalysisResult(filename)
|
|
results.append(result)
|
|
futures.append(executor.submit(analyze_file, result))
|
|
|
|
for future in futures:
|
|
future.result()
|
|
|
|
total_word_count = sum(result.total_word_count for result in results)
|
|
total_capitalized_count = sum(result.total_capitalized_count for result in results)
|
|
total_sentence_count = sum(result.total_sentence_count for result in results)
|
|
total_number_count = sum(result.total_number_count for result in results)
|
|
total_forbidden_count = sum(result.total_forbidden_count for result in results)
|
|
|
|
capitalized_percentage = (total_word_count > 0) * (total_capitalized_count / total_word_count * 100.0)
|
|
forbidden_percentage = (total_word_count > 0) * (total_forbidden_count / total_word_count * 100.0)
|
|
word_count_per_sentence = (total_sentence_count > 0) * (total_word_count / total_sentence_count)
|
|
|
|
print(f"\nTotal Words: {total_word_count}")
|
|
print(f"Total Capitalized words: {total_capitalized_count}")
|
|
print(f"Total Sentences: {total_sentence_count}")
|
|
print(f"Total Numbers: {total_number_count}")
|
|
print(f"Total Forbidden words: {total_forbidden_count}")
|
|
print(f"Capitalized percentage: {capitalized_percentage:.6f}%")
|
|
print(f"Forbidden percentage: {forbidden_percentage:.6f}%")
|
|
print(f"Word count per sentence: {word_count_per_sentence:.6f}")
|
|
print(f"Total files read: {len(sys.argv) - 1}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |