Setup v1 of borded spam parser

This commit is contained in:
BordedDev 2025-03-20 21:44:22 +01:00
parent fd9c7ab669
commit 1fb6481f2b
No known key found for this signature in database
GPG Key ID: C5F495EAE56673BF
3 changed files with 258 additions and 0 deletions

97
borded_cpp/.gitignore vendored Normal file
View File

@ -0,0 +1,97 @@
*.d
*.slo
*.lo
*.o
*.obj
*.gch
*.pch
*.so
*.dylib
*.dll
*.mod
*.smod
*.lai
*.la
*.a
*.lib
*.exe
*.out
*.app
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
.idea/**/aws.xml
.idea/**/contentModel.xml
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
.idea/**/gradle.xml
.idea/**/libraries
.idea
cmake-build-*/
.idea/**/mongoSettings.xml
*.iws
out/
.idea_modules/
atlassian-ide-plugin.xml
.idea/replstate.xml
.idea/sonarlint/
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
.idea/httpRequests
.idea/caches/build_file_checksums.ser
*~
.fuse_hidden*
.directory
.Trash-*
.nfs*
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
_deps
CMakeUserPresets.json
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
*.stackdump
[Dd]esktop.ini
$RECYCLE.BIN/
*.cab
*.msi
*.msix
*.msm
*.msp
*.lnk
.DS_Store
.AppleDouble
.LSOverride
Icon
._*
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

21
borded_cpp/CMakeLists.txt Normal file
View File

@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 3.30)
project(isspam)
set(CMAKE_CXX_STANDARD 26)
if (MSVC)
add_compile_options(/W4)
add_compile_options(/WX)
add_compile_options(/external:anglebrackets)
add_compile_options(/external:W0)
add_compile_options(/wd4100)
add_compile_options(/wd5050)
add_definitions(-DWIN32_LEAN_AND_MEAN -DVC_EXTRALEAN)
add_compile_definitions(WIN32_LEAN_AND_MEAN NOMINMAX)
else ()
add_compile_options(-Wall)
add_compile_options(-Wextra)
add_compile_options(-Wpedantic)
add_compile_options(-Werror)
endif ()
add_executable(${PROJECT_NAME} src/main.cpp)

140
borded_cpp/src/main.cpp Normal file
View File

@ -0,0 +1,140 @@
import std;
const std::vector<std::wstring_view> BAD_WORDS = {
L"recovery",
L"techie",
L"http",
L"https",
L"digital",
L"hack",
L"::",
L"//",
L"com",
L"@",
L"crypto",
L"bitcoin",
L"wallet",
L"hacker",
L"welcome",
L"whatsapp",
L"email",
L"cryptocurrency",
L"stolen",
L"freeze",
L"quick",
L"crucial",
L"tracing",
L"scammers",
L"expers",
L"hire",
L"century",
L"transaction",
L"essential",
L"managing",
L"contact",
L"contacting",
L"understanding",
L"assets",
L"funds",
};
struct AnalysisResult {
std::size_t totalWordCount = 0;
std::size_t totalCapitalizedCount = 0;
std::size_t totalSentenceCount = 0;
std::size_t totalNumberCount = 0;
std::size_t totalForbiddenCount = 0;
operator std::string() const {
return std::format(
"Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}",
totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount
);
}
AnalysisResult operator+(const AnalysisResult &other) const {
return {
totalWordCount + other.totalWordCount,
totalCapitalizedCount + other.totalCapitalizedCount,
totalSentenceCount + other.totalSentenceCount,
totalNumberCount + other.totalNumberCount,
totalForbiddenCount + other.totalForbiddenCount
};
}
};
void check_word(const std::wstring &word, std::size_t &forbiddenCount) {
if (std::find(BAD_WORDS.begin(), BAD_WORDS.end(), word) != BAD_WORDS.end()) {
forbiddenCount++;
}
}
AnalysisResult parseFile(const std::string_view &filename) {
std::wifstream file;
file.open(filename);
if (!file.is_open()) {
std::println("File doesn't exist: {}", filename);
return { };
}
AnalysisResult result{ };
bool inWord = false;
wchar_t c;
std::wstring word;
while (file.get(c)) {
if (c == '.') {
result.totalSentenceCount++;
}
if (std::isspace(c) || c == '.' || c == '?' || c == '!' || c == ';' || c == ':') {
inWord = false;
check_word(word, result.totalForbiddenCount);
word.clear();
continue;
} else {
if (!inWord) {
result.totalWordCount++;
if (std::isupper(c)) {
result.totalCapitalizedCount++;
}
}
inWord = true;
if (std::isdigit(c)) {
result.totalNumberCount++;
while (file.get(c) && std::isdigit(c)) { }
file.unget();
continue;
}
word.push_back(c);
}
};
if (!word.empty()) {
check_word(word, result.totalForbiddenCount);
}
file.close();
return result;
}
int main(const int argc, char *argv[]) {
if (argc < 2) {
std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
return 1;
}
const AnalysisResult result = std::transform_reduce(std::execution::par, std::next(argv), argv + argc, AnalysisResult{ }, std::plus{ },
parseFile
);
std::println("{}", std::string(result));
return 0;
}