196 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			196 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
|   | #include <string>
 | ||
|  | #include <string_view>
 | ||
|  | #include <fstream>
 | ||
|  | #include <algorithm>
 | ||
|  | #include <iostream>
 | ||
|  | #include <numeric>
 | ||
|  | #include <execution>
 | ||
|  | #include <format>
 | ||
|  | #include <codecvt>
 | ||
|  | #include <ranges>
 | ||
|  | 
 | ||
|  | #ifdef __cpp_lib_print
 | ||
|  | #include <print>
 | ||
|  | #else
 | ||
|  | namespace std { | ||
|  | template <typename T, typename... Args> | ||
|  | inline void print(T format, Args &&... args) { | ||
|  |     auto f = std::vformat(format, std::make_format_args(args...)); | ||
|  |     std::cout << f; | ||
|  | } | ||
|  | 
 | ||
|  | template <typename T, typename... Args> | ||
|  | inline void println(T format, Args &&... args) { | ||
|  |     auto f = std::vformat(format, std::make_format_args(args...)); | ||
|  |     std::cout << f << std::endl; | ||
|  | } | ||
|  | } | ||
|  | #endif
 | ||
|  | 
 | ||
|  | constexpr std::array<std::wstring_view, 35> BAD_WORDS = { | ||
|  |     L"recovery", | ||
|  |     L"techie", | ||
|  |     L"http", | ||
|  |     L"https", | ||
|  |     L"digital", | ||
|  |     L"hack", | ||
|  |     L"::", | ||
|  |     L"//", | ||
|  |     L"com", | ||
|  |     L"@", | ||
|  |     L"crypto", | ||
|  |     L"bitcoin", | ||
|  |     L"wallet", | ||
|  |     L"hacker", | ||
|  |     L"welcome", | ||
|  |     L"whatsapp", | ||
|  |     L"email", | ||
|  |     L"cryptocurrency", | ||
|  |     L"stolen", | ||
|  |     L"freeze", | ||
|  |     L"quick", | ||
|  |     L"crucial", | ||
|  |     L"tracing", | ||
|  |     L"scammers", | ||
|  |     L"expers", | ||
|  |     L"hire", | ||
|  |     L"century", | ||
|  |     L"transaction", | ||
|  |     L"essential", | ||
|  |     L"managing", | ||
|  |     L"contact", | ||
|  |     L"contacting", | ||
|  |     L"understanding", | ||
|  |     L"assets", | ||
|  |     L"funds", | ||
|  | }; | ||
|  | 
 | ||
|  | constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(), | ||
|  |                                                           [](std::size_t current, const std::wstring_view &word) { | ||
|  |                                                               return std::min(current, word.size()); | ||
|  |                                                           } | ||
|  |     ); | ||
|  | constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(), | ||
|  |                                                          [](std::size_t current, const std::wstring_view &word) { | ||
|  |                                                              return std::max(current, word.size()); | ||
|  |                                                          } | ||
|  |     ); | ||
|  | 
 | ||
|  | std::size_t totalWordCount = 0; | ||
|  | std::size_t totalCapitalizedCount = 0; | ||
|  | std::size_t totalSentenceCount = 0; | ||
|  | std::size_t totalNumberCount = 0; | ||
|  | std::size_t totalForbiddenCount = 0; | ||
|  | std::size_t fileCount = 1; | ||
|  | 
 | ||
|  | std::size_t failCount = 0; | ||
|  | 
 | ||
|  | void check_word(std::wstring &word, std::size_t &forbiddenCount) { | ||
|  |     if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) { | ||
|  |         return; | ||
|  |     } | ||
|  |     std::ranges::transform(word, word.begin(), ::towlower); | ||
|  |     if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) { | ||
|  |         forbiddenCount++; | ||
|  |     } | ||
|  |     // if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) {
 | ||
|  |     //                              return word.contains(badWord);
 | ||
|  |     //                          }
 | ||
|  |     //         ) != BAD_WORDS.end()) {
 | ||
|  |     //     forbiddenCount++;
 | ||
|  |     // }
 | ||
|  | } | ||
|  | 
 | ||
|  | void parseFile(const std::string_view &filename) { | ||
|  |     std::wifstream file; | ||
|  | 
 | ||
|  |     // surpress warning of deprecation
 | ||
|  | #pragma warning(push)
 | ||
|  | #pragma warning(suppress : 4996)
 | ||
|  |     file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>)); | ||
|  | #pragma warning(pop)
 | ||
|  | 
 | ||
|  |     file.open(std::string(filename)); | ||
|  |     if (!file.is_open()) { | ||
|  |         std::println("File doesn't exist: {}", filename); | ||
|  |         return; | ||
|  |     } | ||
|  | 
 | ||
|  |     bool inWord = false; | ||
|  |     bool isDigit = false; | ||
|  |     wchar_t c; | ||
|  | 
 | ||
|  |     std::wstring word; | ||
|  |     while (file.get(c)) { | ||
|  |         if (c == '.') { | ||
|  |             totalSentenceCount++; | ||
|  |         } | ||
|  | 
 | ||
|  |         if (std::isspace(c)) { | ||
|  |             inWord = false; | ||
|  |             isDigit = false; | ||
|  | 
 | ||
|  |             if (!word.empty()) { | ||
|  |                 check_word(word, totalForbiddenCount); | ||
|  |                 word.clear(); | ||
|  |             } | ||
|  |             continue; | ||
|  |         } else { | ||
|  |             if (!inWord) { | ||
|  |                 totalWordCount++; | ||
|  |                 if (std::isupper(c)) { | ||
|  |                     totalCapitalizedCount++; | ||
|  |                 } | ||
|  |             } | ||
|  |             inWord = true; | ||
|  | 
 | ||
|  |             if (std::isdigit(c) && !isDigit) { | ||
|  |                 totalNumberCount++; | ||
|  |                 isDigit = true; | ||
|  |             } | ||
|  | 
 | ||
|  |             word.push_back(c); | ||
|  |         } | ||
|  |     }; | ||
|  | 
 | ||
|  |     // std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl;
 | ||
|  | 
 | ||
|  |     if (!word.empty()) { | ||
|  |         check_word(word, totalForbiddenCount); | ||
|  |     } | ||
|  | 
 | ||
|  |     file.close(); | ||
|  | 
 | ||
|  |     if (file.fail() && !file.eof()) { | ||
|  |         failCount++; | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | int main(const int argc, char *argv[]) { | ||
|  |     if (argc < 2) { | ||
|  |         std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]); | ||
|  |         return 1; | ||
|  |     } | ||
|  | 
 | ||
|  |     std::for_each(std::execution::par_unseq, std::next(argv), argv + argc, parseFile); | ||
|  | 
 | ||
|  |     double capitalizedPercentage = (totalWordCount > 0) | ||
|  |                                        ? static_cast<double>(totalCapitalizedCount) / totalWordCount * 100.0 | ||
|  |                                        : 0; | ||
|  |     double forbiddenPercentage = (totalWordCount > 0) | ||
|  |                                      ? static_cast<double>(totalForbiddenCount) / totalWordCount * 100.0 | ||
|  |                                      : 0; | ||
|  |     double wordCountPerSentence = (totalSentenceCount > 0) | ||
|  |                                       ? static_cast<double>(totalWordCount) / totalSentenceCount | ||
|  |                                       : 0; | ||
|  | 
 | ||
|  |     std::println( | ||
|  |         "Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}", | ||
|  |         totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount, | ||
|  |         capitalizedPercentage, forbiddenPercentage, wordCountPerSentence | ||
|  |         ); | ||
|  | 
 | ||
|  |     return 0; | ||
|  | } |