diff --git a/borded_cpp/src/main.cpp b/borded_cpp/src/main.cpp index 862a2c1..f6e6062 100644 --- a/borded_cpp/src/main.cpp +++ b/borded_cpp/src/main.cpp @@ -13,21 +13,21 @@ #include <print> #else namespace std { - template <typename T, typename... Args> - inline void print(T format, Args&&... args) { - auto f = std::vformat(format, std::make_format_args(args...)); - std::cout << f; - } +template <typename T, typename... Args> +inline void print(T format, Args &&... args) { + auto f = std::vformat(format, std::make_format_args(args...)); + std::cout << f; +} - template <typename T, typename... Args> - inline void println(T format, Args&&... args) { - auto f = std::vformat(format, std::make_format_args(args...)); - std::cout << f << std::endl; - } +template <typename T, typename... Args> +inline void println(T format, Args &&... args) { + auto f = std::vformat(format, std::make_format_args(args...)); + std::cout << f << std::endl; +} } #endif -constexpr std::array<std::wstring_view, 35> BAD_WORDS ={ +constexpr std::array<std::wstring_view, 35> BAD_WORDS = { L"recovery", L"techie", L"http", @@ -106,24 +106,32 @@ struct AnalysisResult { }; }; -void check_word(std::wstring& word, std::size_t &forbiddenCount) { +void check_word(std::wstring &word, std::size_t &forbiddenCount) { if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) { return; } - std::transform(word.begin(), word.end(), word.begin(), ::towlower); - if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) { - // Case-insensitive check if badword is inside word - return word.contains(badWord); - }) != BAD_WORDS.end()) { - forbiddenCount++; - } + std::ranges::transform(word, word.begin(), ::towlower); + if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) { + forbiddenCount++; + } + // if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) { + // return word.contains(badWord); + // } + // ) != BAD_WORDS.end()) { + // forbiddenCount++; + // } } AnalysisResult parseFile(const std::string_view &filename) { std::wifstream file; + // surpress warning of deprecation +#pragma warning(push) +#pragma warning(suppress : 4996) file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>)); - file.open(std::string(filename)); // Modified line +#pragma warning(pop) + + file.open(std::string(filename)); if (!file.is_open()) { std::println("File doesn't exist: {}", filename); return { }; @@ -189,7 +197,8 @@ int main(const int argc, char *argv[]) { return 1; } - const AnalysisResult result = std::transform_reduce(std::execution::par_unseq, std::next(argv), argv + argc, AnalysisResult{ .fileCount = 0 }, + const AnalysisResult result = std::transform_reduce(std::execution::par_unseq, std::next(argv), argv + argc, + AnalysisResult{.fileCount = 0}, std::plus{ }, parseFile ); diff --git a/borded_cpp/src/main2.cpp b/borded_cpp/src/main2.cpp new file mode 100644 index 0000000..fbb31cf --- /dev/null +++ b/borded_cpp/src/main2.cpp @@ -0,0 +1,195 @@ +#include <string> +#include <string_view> +#include <fstream> +#include <algorithm> +#include <iostream> +#include <numeric> +#include <execution> +#include <format> +#include <codecvt> +#include <ranges> + +#ifdef __cpp_lib_print +#include <print> +#else +namespace std { +template <typename T, typename... Args> +inline void print(T format, Args &&... args) { + auto f = std::vformat(format, std::make_format_args(args...)); + std::cout << f; +} + +template <typename T, typename... Args> +inline void println(T format, Args &&... args) { + auto f = std::vformat(format, std::make_format_args(args...)); + std::cout << f << std::endl; +} +} +#endif + +constexpr std::array<std::wstring_view, 35> BAD_WORDS = { + L"recovery", + L"techie", + L"http", + L"https", + L"digital", + L"hack", + L"::", + L"//", + L"com", + L"@", + L"crypto", + L"bitcoin", + L"wallet", + L"hacker", + L"welcome", + L"whatsapp", + L"email", + L"cryptocurrency", + L"stolen", + L"freeze", + L"quick", + L"crucial", + L"tracing", + L"scammers", + L"expers", + L"hire", + L"century", + L"transaction", + L"essential", + L"managing", + L"contact", + L"contacting", + L"understanding", + L"assets", + L"funds", +}; + +constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(), + [](std::size_t current, const std::wstring_view &word) { + return std::min(current, word.size()); + } + ); +constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(), + [](std::size_t current, const std::wstring_view &word) { + return std::max(current, word.size()); + } + ); + +std::size_t totalWordCount = 0; +std::size_t totalCapitalizedCount = 0; +std::size_t totalSentenceCount = 0; +std::size_t totalNumberCount = 0; +std::size_t totalForbiddenCount = 0; +std::size_t fileCount = 1; + +std::size_t failCount = 0; + +void check_word(std::wstring &word, std::size_t &forbiddenCount) { + if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) { + return; + } + std::ranges::transform(word, word.begin(), ::towlower); + if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) { + forbiddenCount++; + } + // if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) { + // return word.contains(badWord); + // } + // ) != BAD_WORDS.end()) { + // forbiddenCount++; + // } +} + +void parseFile(const std::string_view &filename) { + std::wifstream file; + + // surpress warning of deprecation +#pragma warning(push) +#pragma warning(suppress : 4996) + file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>)); +#pragma warning(pop) + + file.open(std::string(filename)); + if (!file.is_open()) { + std::println("File doesn't exist: {}", filename); + return; + } + + bool inWord = false; + bool isDigit = false; + wchar_t c; + + std::wstring word; + while (file.get(c)) { + if (c == '.') { + totalSentenceCount++; + } + + if (std::isspace(c)) { + inWord = false; + isDigit = false; + + if (!word.empty()) { + check_word(word, totalForbiddenCount); + word.clear(); + } + continue; + } else { + if (!inWord) { + totalWordCount++; + if (std::isupper(c)) { + totalCapitalizedCount++; + } + } + inWord = true; + + if (std::isdigit(c) && !isDigit) { + totalNumberCount++; + isDigit = true; + } + + word.push_back(c); + } + }; + + // std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl; + + if (!word.empty()) { + check_word(word, totalForbiddenCount); + } + + file.close(); + + if (file.fail() && !file.eof()) { + failCount++; + } + +} + +int main(const int argc, char *argv[]) { + if (argc < 2) { + std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]); + return 1; + } + + std::for_each(std::execution::par_unseq, std::next(argv), argv + argc, parseFile); + + double capitalizedPercentage = (totalWordCount > 0) + ? static_cast<double>(totalCapitalizedCount) / totalWordCount * 100.0 + : 0; + double forbiddenPercentage = (totalWordCount > 0) + ? static_cast<double>(totalForbiddenCount) / totalWordCount * 100.0 + : 0; + double wordCountPerSentence = (totalSentenceCount > 0) + ? static_cast<double>(totalWordCount) / totalSentenceCount + : 0; + + std::println( + "Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}", + totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount, + capitalizedPercentage, forbiddenPercentage, wordCountPerSentence + ); + + return 0; +}