parent
2f2710df41
commit
611ef8d124
@ -1,4 +1,3 @@
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <fstream>
|
||||
@ -7,8 +6,28 @@
|
||||
#include <numeric>
|
||||
#include <execution>
|
||||
#include <format>
|
||||
#include <codecvt>
|
||||
#include <ranges>
|
||||
|
||||
const std::vector<std::wstring_view> BAD_WORDS = {
|
||||
#ifdef __cpp_lib_print
|
||||
#include <print>
|
||||
#else
|
||||
namespace std {
|
||||
template <typename T, typename... Args>
|
||||
inline void print(T format, Args&&... args) {
|
||||
auto f = std::vformat(format, std::make_format_args(args...));
|
||||
std::cout << f;
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
inline void println(T format, Args&&... args) {
|
||||
auto f = std::vformat(format, std::make_format_args(args...));
|
||||
std::cout << f << std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
constexpr std::array<std::wstring_view, 35> BAD_WORDS ={
|
||||
L"recovery",
|
||||
L"techie",
|
||||
L"http",
|
||||
@ -46,43 +65,56 @@ const std::vector<std::wstring_view> BAD_WORDS = {
|
||||
L"funds",
|
||||
};
|
||||
|
||||
constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
|
||||
[](std::size_t current, const std::wstring_view &word) {
|
||||
return std::min(current, word.size());
|
||||
}
|
||||
);
|
||||
constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
|
||||
[](std::size_t current, const std::wstring_view &word) {
|
||||
return std::max(current, word.size());
|
||||
}
|
||||
);
|
||||
|
||||
struct AnalysisResult {
|
||||
std::size_t totalWordCount = 0;
|
||||
std::size_t totalCapitalizedCount = 0;
|
||||
std::size_t totalSentenceCount = 0;
|
||||
std::size_t totalNumberCount = 0;
|
||||
std::size_t totalForbiddenCount = 0;
|
||||
std::size_t fileCount = 1;
|
||||
|
||||
std::size_t failCount = 0;
|
||||
|
||||
operator std::string() const {
|
||||
return std::format(
|
||||
"Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}",
|
||||
totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount
|
||||
"Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}",
|
||||
totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount
|
||||
);
|
||||
}
|
||||
|
||||
AnalysisResult operator+(const AnalysisResult &other) const {
|
||||
friend AnalysisResult operator+(const AnalysisResult &lhs, const AnalysisResult &rhs) {
|
||||
return {
|
||||
totalWordCount + other.totalWordCount,
|
||||
totalCapitalizedCount + other.totalCapitalizedCount,
|
||||
totalSentenceCount + other.totalSentenceCount,
|
||||
totalNumberCount + other.totalNumberCount,
|
||||
totalForbiddenCount + other.totalForbiddenCount
|
||||
lhs.totalWordCount + rhs.totalWordCount,
|
||||
lhs.totalCapitalizedCount + rhs.totalCapitalizedCount,
|
||||
lhs.totalSentenceCount + rhs.totalSentenceCount,
|
||||
lhs.totalNumberCount + rhs.totalNumberCount,
|
||||
lhs.totalForbiddenCount + rhs.totalForbiddenCount,
|
||||
lhs.fileCount + rhs.fileCount,
|
||||
lhs.failCount + rhs.failCount
|
||||
};
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
// Overloaded operator<< for AnalysisResult
|
||||
std::ostream& operator<<(std::ostream& os, const AnalysisResult& result) {
|
||||
os << "Word Count: " << result.totalWordCount << "\n"
|
||||
<< "Capitalized Count: " << result.totalCapitalizedCount << "\n"
|
||||
<< "Sentence Count: " << result.totalSentenceCount << "\n"
|
||||
<< "Number Count: " << result.totalNumberCount << "\n"
|
||||
<< "Forbidden Count: " << result.totalForbiddenCount;
|
||||
return os;
|
||||
}
|
||||
|
||||
void check_word(const std::wstring &word, std::size_t &forbiddenCount) {
|
||||
if (std::find(BAD_WORDS.begin(), BAD_WORDS.end(), word) != BAD_WORDS.end()) {
|
||||
void check_word(std::wstring& word, std::size_t &forbiddenCount) {
|
||||
if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) {
|
||||
return;
|
||||
}
|
||||
std::transform(word.begin(), word.end(), word.begin(), ::towlower);
|
||||
if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) {
|
||||
// Case-insensitive check if badword is inside word
|
||||
return word.contains(badWord);
|
||||
}) != BAD_WORDS.end()) {
|
||||
forbiddenCount++;
|
||||
}
|
||||
}
|
||||
@ -90,15 +122,17 @@ void check_word(const std::wstring &word, std::size_t &forbiddenCount) {
|
||||
AnalysisResult parseFile(const std::string_view &filename) {
|
||||
std::wifstream file;
|
||||
|
||||
file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>));
|
||||
file.open(std::string(filename)); // Modified line
|
||||
if (!file.is_open()) {
|
||||
std::cout << "File doesn't exist: " << filename << std::endl;
|
||||
std::println("File doesn't exist: {}", filename);
|
||||
return { };
|
||||
}
|
||||
|
||||
AnalysisResult result{ };
|
||||
|
||||
bool inWord = false;
|
||||
bool isDigit = false;
|
||||
wchar_t c;
|
||||
|
||||
std::wstring word;
|
||||
@ -107,11 +141,14 @@ AnalysisResult parseFile(const std::string_view &filename) {
|
||||
result.totalSentenceCount++;
|
||||
}
|
||||
|
||||
if (std::isspace(c) || c == '.' || c == '?' || c == '!' || c == ';' || c == ':') {
|
||||
if (std::isspace(c)) {
|
||||
inWord = false;
|
||||
isDigit = false;
|
||||
|
||||
check_word(word, result.totalForbiddenCount);
|
||||
word.clear();
|
||||
if (!word.empty()) {
|
||||
check_word(word, result.totalForbiddenCount);
|
||||
word.clear();
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
if (!inWord) {
|
||||
@ -122,36 +159,54 @@ AnalysisResult parseFile(const std::string_view &filename) {
|
||||
}
|
||||
inWord = true;
|
||||
|
||||
if (std::isdigit(c)) {
|
||||
if (std::isdigit(c) && !isDigit) {
|
||||
result.totalNumberCount++;
|
||||
while (file.get(c) && std::isdigit(c)) { }
|
||||
file.unget();
|
||||
continue;
|
||||
isDigit = true;
|
||||
}
|
||||
|
||||
word.push_back(c);
|
||||
}
|
||||
};
|
||||
|
||||
// std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl;
|
||||
|
||||
if (!word.empty()) {
|
||||
check_word(word, result.totalForbiddenCount);
|
||||
}
|
||||
|
||||
file.close();
|
||||
|
||||
if (file.fail() && !file.eof()) {
|
||||
result.failCount++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int main(const int argc, char *argv[]) {
|
||||
if (argc < 2) {
|
||||
std::cout << "Usage: " << argv[0] << " <file1> <file2> ... <fileN>" << std::endl;
|
||||
std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const AnalysisResult result = std::transform_reduce(std::execution::par, std::next(argv), argv + argc, AnalysisResult{ }, std::plus{ },
|
||||
const AnalysisResult result = std::transform_reduce(std::execution::par_unseq, std::next(argv), argv + argc, AnalysisResult{ .fileCount = 0 },
|
||||
std::plus{ },
|
||||
parseFile
|
||||
);
|
||||
|
||||
std::cout << result << std::endl; // This will now work
|
||||
double capitalizedPercentage = (result.totalWordCount > 0)
|
||||
? static_cast<double>(result.totalCapitalizedCount) / result.totalWordCount * 100.0
|
||||
: 0;
|
||||
double forbiddenPercentage = (result.totalWordCount > 0)
|
||||
? static_cast<double>(result.totalForbiddenCount) / result.totalWordCount * 100.0
|
||||
: 0;
|
||||
double wordCountPerSentence = (result.totalSentenceCount > 0)
|
||||
? static_cast<double>(result.totalWordCount) / result.totalSentenceCount
|
||||
: 0;
|
||||
|
||||
std::println("{}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}", std::string(result),
|
||||
capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
|
||||
);
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user