notes

more tests
reorg
2025-10-04 10:03:15 -04:00 · 2025-10-04 09:58:40 -04:00 · 2025-10-04 09:18:03 -04:00 · 2025-03-24 16:03:36 -04:00 · 2025-03-24 00:48:04 -04:00 · 2025-03-24 00:20:15 -04:00
22 changed files with 1675 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,7 +5,10 @@ publish
 books
 __pycache__
 target
 ./isspam.py
 isspam
 risspam
 /jisspam
 isspam_cpp
 .build-trigger-2014-12-02 15:26
 borded_cpp_exec
--- a/14
+++ b/14
@ -1,20 +1,32 @@
 CC = gcc 
 CFLAGS = -Ofast
-all: build run valgrind build_risspam run_risspam build_cpp
+all: build run valgrind build_risspam run_risspam build_cpp build_borded_cpp build_py build_jest
 build:
 	@echo "Compiling retoor_c project.".
 	@$(CC) $(CFLAGS) retoor_c/isspam.c -o isspam
 build_py:
 	@echo "Copying py file"
 	@cp retoor_c/isspam.py isspam.py
 build_cpp:
 	@echo "Compiling C++ version of isspam."
 	@g++ -Ofast retoor_c/isspam.cpp -o isspam_cpp
 build_borded_cpp:
 	@echo "Compiling Borded C++ version of isspam."
 	@g++ -std=c++23 -Ofast borded_cpp/src/main3.cpp -o borded_cpp_exec
 build_risspam:
 	@echo "Compiling 12bitfloat_risspam project."
 	cd 12bitfloat_rust/risspam && cargo run --release && cp target/release/risspam ../../
 build_jest:
 	@echo "compiling jest_rust project"
 	cd jest_rust && cargo build --release && cp target/release/jisspam ..
 run: run_spam wl run_not_spam
 run_risspam: run_spam_risspam run_not_spam_risspam
--- a/bench.py
+++ b/bench.py
@ -11,4 +11,13 @@ print("Time Rust:",time.time() - time_start)
 time_start = time.time()
 subprocess.check_output('./isspam_cpp books/*.txt', shell=True)
 print("Time CPP:",time.time() - time_start)
 time_start = time.time()
 subprocess.check_output('./borded_cpp_exec books/*.txt', shell=True)
 print("Time Borded CPP:",time.time() - time_start)
 time_start = time.time()
 subprocess.check_output('./jisspam books/*.txt', shell=True)
 print("Time Jest Rust:", time.time() - time_start)
 time_start = time.time()
 subprocess.check_output('python3 isspam.py books/*.txt', shell=True)
 print("Time Retoor Python:",time.time() - time_start)
 print("***end benchmark***")
--- a/borded_cpp/.gitignore
+++ b/borded_cpp/.gitignore
@ -0,0 +1,97 @@
 *.d
 *.slo
 *.lo
 *.o
 *.obj
 *.gch
 *.pch
 *.so
 *.dylib
 *.dll
 *.mod
 *.smod
 *.lai
 *.la
 *.a
 *.lib
 *.exe
 *.out
 *.app
 .idea/**/workspace.xml
 .idea/**/tasks.xml
 .idea/**/usage.statistics.xml
 .idea/**/dictionaries
 .idea/**/shelf
 .idea/**/aws.xml
 .idea/**/contentModel.xml
 .idea/**/dataSources/
 .idea/**/dataSources.ids
 .idea/**/dataSources.local.xml
 .idea/**/sqlDataSources.xml
 .idea/**/dynamic.xml
 .idea/**/uiDesigner.xml
 .idea/**/dbnavigator.xml
 .idea/**/gradle.xml
 .idea/**/libraries
 .idea
 cmake-build-*/
 .idea/**/mongoSettings.xml
 *.iws
 out/
 .idea_modules/
 atlassian-ide-plugin.xml
 .idea/replstate.xml
 .idea/sonarlint/
 com_crashlytics_export_strings.xml
 crashlytics.properties
 crashlytics-build.properties
 fabric.properties
 .idea/httpRequests
 .idea/caches/build_file_checksums.ser
 *~
 .fuse_hidden*
 .directory
 .Trash-*
 .nfs*
 CMakeLists.txt.user
 CMakeCache.txt
 CMakeFiles
 CMakeScripts
 Testing
 Makefile
 cmake_install.cmake
 install_manifest.txt
 compile_commands.json
 CTestTestfile.cmake
 _deps
 CMakeUserPresets.json
 Thumbs.db
 Thumbs.db:encryptable
 ehthumbs.db
 ehthumbs_vista.db
 *.stackdump
 [Dd]esktop.ini
 $RECYCLE.BIN/
 *.cab
 *.msi
 *.msix
 *.msm
 *.msp
 *.lnk
 .DS_Store
 .AppleDouble
 .LSOverride
 Icon
 ._*
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
 .com.apple.timemachine.donotpresent
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
 .apdisk
--- a/borded_cpp/CMakeLists.txt
+++ b/borded_cpp/CMakeLists.txt
@ -0,0 +1,27 @@
 cmake_minimum_required(VERSION 3.25)
 project(isspam)
 set(CMAKE_CXX_STANDARD 26)
 if (MSVC)
    add_compile_options(/W4)
    add_compile_options(/WX)
    add_compile_options(/external:anglebrackets)
    add_compile_options(/external:W0)
    add_compile_options(/wd4100)
    add_compile_options(/wd5050)
    add_definitions(-DWIN32_LEAN_AND_MEAN -DVC_EXTRALEAN)
    add_compile_definitions(WIN32_LEAN_AND_MEAN NOMINMAX)
 else ()
    add_compile_options(-Wall)
    add_compile_options(-Wextra)
    add_compile_options(-Wpedantic)
 #    add_compile_options(-Werror)
 endif ()
 add_executable(${PROJECT_NAME} src/main.cpp)
 add_executable(${PROJECT_NAME}3 src/main3.cpp)
 if (LINUX)
    target_link_libraries(${PROJECT_NAME} tbb)
    target_link_libraries(${PROJECT_NAME}3 tbb)
 endif ()
--- a/borded_cpp/Dockerfile
+++ b/borded_cpp/Dockerfile
@ -0,0 +1,3 @@
 FROM gcc:latest
 RUN apt update && apt install -y cmake gdb
 WORKDIR /home
--- a/borded_cpp/compose.yml
+++ b/borded_cpp/compose.yml
@ -0,0 +1,9 @@
 services:
  cpp:
    build: .
    command: ["sh","doit.sh"]
    tty: true 
    stdin_open: true 
    volumes:
      - ./:/home
      - ../books:/books
--- a/borded_cpp/doit.sh
+++ b/borded_cpp/doit.sh
@ -0,0 +1,2 @@
 rm -rf build | true
 mkdir build && cd build && cmake .. && make
--- a/borded_cpp/src/main.cpp
+++ b/borded_cpp/src/main.cpp
@ -0,0 +1,221 @@
 #include <string>
 #include <string_view>
 #include <fstream>
 #include <algorithm>
 #include <iostream>
 #include <numeric>
 #include <execution>
 #include <format>
 #include <codecvt>
 #include <ranges>
 #ifdef __cpp_lib_print
 #include <print>
 #else
 namespace std {
 template <typename T, typename... Args>
 inline void print(T format, Args &&... args) {
    auto f = std::vformat(format, std::make_format_args(args...));
    std::cout << f;
 }
 template <typename T, typename... Args>
 inline void println(T format, Args &&... args) {
    auto f = std::vformat(format, std::make_format_args(args...));
    std::cout << f << std::endl;
 }
 }
 #endif
 constexpr std::array<std::wstring_view, 35> BAD_WORDS = {
    L"recovery",
    L"techie",
    L"http",
    L"https",
    L"digital",
    L"hack",
    L"::",
    L"//",
    L"com",
    L"@",
    L"crypto",
    L"bitcoin",
    L"wallet",
    L"hacker",
    L"welcome",
    L"whatsapp",
    L"email",
    L"cryptocurrency",
    L"stolen",
    L"freeze",
    L"quick",
    L"crucial",
    L"tracing",
    L"scammers",
    L"expers",
    L"hire",
    L"century",
    L"transaction",
    L"essential",
    L"managing",
    L"contact",
    L"contacting",
    L"understanding",
    L"assets",
    L"funds",
 };
 constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
                                                          [](std::size_t current, const std::wstring_view &word) {
                                                              return std::min(current, word.size());
                                                          }
    );
 constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
                                                         [](std::size_t current, const std::wstring_view &word) {
                                                             return std::max(current, word.size());
                                                         }
    );
 struct AnalysisResult {
    std::size_t totalWordCount = 0;
    std::size_t totalCapitalizedCount = 0;
    std::size_t totalSentenceCount = 0;
    std::size_t totalNumberCount = 0;
    std::size_t totalForbiddenCount = 0;
    std::size_t fileCount = 1;
    std::size_t failCount = 0;
    operator std::string() const {
        return std::format(
            "Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}",
            totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount
            );
    }
    friend AnalysisResult operator+(const AnalysisResult &lhs, const AnalysisResult &rhs) {
        return {
            lhs.totalWordCount + rhs.totalWordCount,
            lhs.totalCapitalizedCount + rhs.totalCapitalizedCount,
            lhs.totalSentenceCount + rhs.totalSentenceCount,
            lhs.totalNumberCount + rhs.totalNumberCount,
            lhs.totalForbiddenCount + rhs.totalForbiddenCount,
            lhs.fileCount + rhs.fileCount,
            lhs.failCount + rhs.failCount
        };
    };
 };
 void check_word(std::wstring &word, std::size_t &forbiddenCount) {
    if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) {
        return;
    }
    std::ranges::transform(word, word.begin(), ::towlower);
        if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) {
            forbiddenCount++;
        }
    // if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) {
    //                              return word.contains(badWord);
    //                          }
    //         ) != BAD_WORDS.end()) {
    //     forbiddenCount++;
    // }
 }
 AnalysisResult parseFile(const std::string_view &filename) {
    std::wifstream file;
    // surpress warning of deprecation
 #pragma warning(push)
 #pragma warning(suppress : 4996)
    file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>));
 #pragma warning(pop)
    file.open(std::string(filename));
    if (!file.is_open()) {
        std::println("File doesn't exist: {}", filename);
        return { };
    }
    AnalysisResult result{ };
    bool inWord = false;
    bool isDigit = false;
    wchar_t c;
    std::wstring word;
    while (file.get(c)) {
        if (c == '.') {
            result.totalSentenceCount++;
        }
        if (std::isspace(c)) {
            inWord = false;
            isDigit = false;
            if (!word.empty()) {
                check_word(word, result.totalForbiddenCount);
                word.clear();
            }
            continue;
        } else {
            if (!inWord) {
                result.totalWordCount++;
                if (std::isupper(c)) {
                    result.totalCapitalizedCount++;
                }
            }
            inWord = true;
            if (std::isdigit(c) && !isDigit) {
                result.totalNumberCount++;
                isDigit = true;
            }
            word.push_back(c);
        }
    };
    // std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl;
    if (!word.empty()) {
        check_word(word, result.totalForbiddenCount);
    }
    file.close();
    if (file.fail() && !file.eof()) {
        result.failCount++;
    }
    return result;
 }
 int main(const int argc, char *argv[]) {
    if (argc < 2) {
        std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
        return 1;
    }
    const AnalysisResult result = std::transform_reduce(std::execution::par_unseq, std::next(argv), argv + argc,
                                                        AnalysisResult{.fileCount = 0},
                                                        std::plus{ },
                                                        parseFile
        );
    double capitalizedPercentage = (result.totalWordCount > 0)
                                       ? static_cast<double>(result.totalCapitalizedCount) / result.totalWordCount * 100.0
                                       : 0;
    double forbiddenPercentage = (result.totalWordCount > 0)
                                     ? static_cast<double>(result.totalForbiddenCount) / result.totalWordCount * 100.0
                                     : 0;
    double wordCountPerSentence = (result.totalSentenceCount > 0)
                                      ? static_cast<double>(result.totalWordCount) / result.totalSentenceCount
                                      : 0;
    std::println("{}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}", std::string(result),
                 capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
        );
    return 0;
 }
--- a/borded_cpp/src/main2.cpp
+++ b/borded_cpp/src/main2.cpp
@ -0,0 +1,195 @@
 #include <string>
 #include <string_view>
 #include <fstream>
 #include <algorithm>
 #include <iostream>
 #include <numeric>
 #include <execution>
 #include <format>
 #include <codecvt>
 #include <ranges>
 #ifdef __cpp_lib_print
 #include <print>
 #else
 namespace std {
 template <typename T, typename... Args>
 inline void print(T format, Args &&... args) {
    auto f = std::vformat(format, std::make_format_args(args...));
    std::cout << f;
 }
 template <typename T, typename... Args>
 inline void println(T format, Args &&... args) {
    auto f = std::vformat(format, std::make_format_args(args...));
    std::cout << f << std::endl;
 }
 }
 #endif
 constexpr std::array<std::wstring_view, 35> BAD_WORDS = {
    L"recovery",
    L"techie",
    L"http",
    L"https",
    L"digital",
    L"hack",
    L"::",
    L"//",
    L"com",
    L"@",
    L"crypto",
    L"bitcoin",
    L"wallet",
    L"hacker",
    L"welcome",
    L"whatsapp",
    L"email",
    L"cryptocurrency",
    L"stolen",
    L"freeze",
    L"quick",
    L"crucial",
    L"tracing",
    L"scammers",
    L"expers",
    L"hire",
    L"century",
    L"transaction",
    L"essential",
    L"managing",
    L"contact",
    L"contacting",
    L"understanding",
    L"assets",
    L"funds",
 };
 constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
                                                          [](std::size_t current, const std::wstring_view &word) {
                                                              return std::min(current, word.size());
                                                          }
    );
 constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
                                                         [](std::size_t current, const std::wstring_view &word) {
                                                             return std::max(current, word.size());
                                                         }
    );
 std::size_t totalWordCount = 0;
 std::size_t totalCapitalizedCount = 0;
 std::size_t totalSentenceCount = 0;
 std::size_t totalNumberCount = 0;
 std::size_t totalForbiddenCount = 0;
 std::size_t fileCount = 1;
 std::size_t failCount = 0;
 void check_word(std::wstring &word, std::size_t &forbiddenCount) {
    if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) {
        return;
    }
    std::ranges::transform(word, word.begin(), ::towlower);
    if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) {
        forbiddenCount++;
    }
    // if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) {
    //                              return word.contains(badWord);
    //                          }
    //         ) != BAD_WORDS.end()) {
    //     forbiddenCount++;
    // }
 }
 void parseFile(const std::string_view &filename) {
    std::wifstream file;
    // surpress warning of deprecation
 #pragma warning(push)
 #pragma warning(suppress : 4996)
    file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>));
 #pragma warning(pop)
    file.open(std::string(filename));
    if (!file.is_open()) {
        std::println("File doesn't exist: {}", filename);
        return;
    }
    bool inWord = false;
    bool isDigit = false;
    wchar_t c;
    std::wstring word;
    while (file.get(c)) {
        if (c == '.') {
            totalSentenceCount++;
        }
        if (std::isspace(c)) {
            inWord = false;
            isDigit = false;
            if (!word.empty()) {
                check_word(word, totalForbiddenCount);
                word.clear();
            }
            continue;
        } else {
            if (!inWord) {
                totalWordCount++;
                if (std::isupper(c)) {
                    totalCapitalizedCount++;
                }
            }
            inWord = true;
            if (std::isdigit(c) && !isDigit) {
                totalNumberCount++;
                isDigit = true;
            }
            word.push_back(c);
        }
    };
    // std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl;
    if (!word.empty()) {
        check_word(word, totalForbiddenCount);
    }
    file.close();
    if (file.fail() && !file.eof()) {
        failCount++;
    }
 }
 int main(const int argc, char *argv[]) {
    if (argc < 2) {
        std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
        return 1;
    }
    std::for_each(std::execution::par_unseq, std::next(argv), argv + argc, parseFile);
    double capitalizedPercentage = (totalWordCount > 0)
                                       ? static_cast<double>(totalCapitalizedCount) / totalWordCount * 100.0
                                       : 0;
    double forbiddenPercentage = (totalWordCount > 0)
                                     ? static_cast<double>(totalForbiddenCount) / totalWordCount * 100.0
                                     : 0;
    double wordCountPerSentence = (totalSentenceCount > 0)
                                      ? static_cast<double>(totalWordCount) / totalSentenceCount
                                      : 0;
    std::println(
        "Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}",
        totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount,
        capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
        );
    return 0;
 }
--- a/borded_cpp/src/main3.cpp
+++ b/borded_cpp/src/main3.cpp
@ -0,0 +1,576 @@
 #include <string>
 #include <string_view>
 #include <fstream>
 #include <algorithm>
 #include <iostream>
 #include <execution>
 #include <format>
 #include <cstdio>
 #include <fcntl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <aio.h>
 #include <condition_variable>
 #include <unordered_set>
 #include <sys/signal.h>
 #ifdef __cpp_lib_print
 #include <print>
 #else
 namespace std {
 template <typename T, typename... Args>
 inline void print(T format, Args &&... args) {
    auto f = std::vformat(format, std::make_format_args(args...));
    std::cout << f;
 }
 template <typename T, typename... Args>
 inline void println(T format, Args &&... args) {
    auto f = std::vformat(format, std::make_format_args(args...));
    std::cout << f << std::endl;
 }
 }
 #endif
 constexpr std::array<std::string_view, 35> BAD_WORDS = {
    "recovery",
    "techie",
    "http",
    "https",
    "digital",
    "hack",
    "::",
    "//",
    "com",
    "@",
    "crypto",
    "bitcoin",
    "wallet",
    "hacker",
    "welcome",
    "whatsapp",
    "email",
    "cryptocurrency",
    "stolen",
    "freeze",
    "quick",
    "crucial",
    "tracing",
    "scammers",
    "expers",
    "hire",
    "century",
    "transaction",
    "essential",
    "managing",
    "contact",
    "contacting",
    "understanding",
    "assets",
    "funds",
 };
 const std::unordered_set<std::string_view> BAD_WORDS_SET(BAD_WORDS.begin(), BAD_WORDS.end());
 static constexpr unsigned int crc_table[256] = {
    0x00000000,
    0x77073096,
    0xee0e612c,
    0x990951ba,
    0x076dc419,
    0x706af48f,
    0xe963a535,
    0x9e6495a3,
    0x0edb8832,
    0x79dcb8a4,
    0xe0d5e91e,
    0x97d2d988,
    0x09b64c2b,
    0x7eb17cbd,
    0xe7b82d07,
    0x90bf1d91,
    0x1db71064,
    0x6ab020f2,
    0xf3b97148,
    0x84be41de,
    0x1adad47d,
    0x6ddde4eb,
    0xf4d4b551,
    0x83d385c7,
    0x136c9856,
    0x646ba8c0,
    0xfd62f97a,
    0x8a65c9ec,
    0x14015c4f,
    0x63066cd9,
    0xfa0f3d63,
    0x8d080df5,
    0x3b6e20c8,
    0x4c69105e,
    0xd56041e4,
    0xa2677172,
    0x3c03e4d1,
    0x4b04d447,
    0xd20d85fd,
    0xa50ab56b,
    0x35b5a8fa,
    0x42b2986c,
    0xdbbbc9d6,
    0xacbcf940,
    0x32d86ce3,
    0x45df5c75,
    0xdcd60dcf,
    0xabd13d59,
    0x26d930ac,
    0x51de003a,
    0xc8d75180,
    0xbfd06116,
    0x21b4f4b5,
    0x56b3c423,
    0xcfba9599,
    0xb8bda50f,
    0x2802b89e,
    0x5f058808,
    0xc60cd9b2,
    0xb10be924,
    0x2f6f7c87,
    0x58684c11,
    0xc1611dab,
    0xb6662d3d,
    0x76dc4190,
    0x01db7106,
    0x98d220bc,
    0xefd5102a,
    0x71b18589,
    0x06b6b51f,
    0x9fbfe4a5,
    0xe8b8d433,
    0x7807c9a2,
    0x0f00f934,
    0x9609a88e,
    0xe10e9818,
    0x7f6a0dbb,
    0x086d3d2d,
    0x91646c97,
    0xe6635c01,
    0x6b6b51f4,
    0x1c6c6162,
    0x856530d8,
    0xf262004e,
    0x6c0695ed,
    0x1b01a57b,
    0x8208f4c1,
    0xf50fc457,
    0x65b0d9c6,
    0x12b7e950,
    0x8bbeb8ea,
    0xfcb9887c,
    0x62dd1ddf,
    0x15da2d49,
    0x8cd37cf3,
    0xfbd44c65,
    0x4db26158,
    0x3ab551ce,
    0xa3bc0074,
    0xd4bb30e2,
    0x4adfa541,
    0x3dd895d7,
    0xa4d1c46d,
    0xd3d6f4fb,
    0x4369e96a,
    0x346ed9fc,
    0xad678846,
    0xda60b8d0,
    0x44042d73,
    0x33031de5,
    0xaa0a4c5f,
    0xdd0d7cc9,
    0x5005713c,
    0x270241aa,
    0xbe0b1010,
    0xc90c2086,
    0x5768b525,
    0x206f85b3,
    0xb966d409,
    0xce61e49f,
    0x5edef90e,
    0x29d9c998,
    0xb0d09822,
    0xc7d7a8b4,
    0x59b33d17,
    0x2eb40d81,
    0xb7bd5c3b,
    0xc0ba6cad,
    0xedb88320,
    0x9abfb3b6,
    0x03b6e20c,
    0x74b1d29a,
    0xead54739,
    0x9dd277af,
    0x04db2615,
    0x73dc1683,
    0xe3630b12,
    0x94643b84,
    0x0d6d6a3e,
    0x7a6a5aa8,
    0xe40ecf0b,
    0x9309ff9d,
    0x0a00ae27,
    0x7d079eb1,
    0xf00f9344,
    0x8708a3d2,
    0x1e01f268,
    0x6906c2fe,
    0xf762575d,
    0x806567cb,
    0x196c3671,
    0x6e6b06e7,
    0xfed41b76,
    0x89d32be0,
    0x10da7a5a,
    0x67dd4acc,
    0xf9b9df6f,
    0x8ebeeff9,
    0x17b7be43,
    0x60b08ed5,
    0xd6d6a3e8,
    0xa1d1937e,
    0x38d8c2c4,
    0x4fdff252,
    0xd1bb67f1,
    0xa6bc5767,
    0x3fb506dd,
    0x48b2364b,
    0xd80d2bda,
    0xaf0a1b4c,
    0x36034af6,
    0x41047a60,
    0xdf60efc3,
    0xa867df55,
    0x316e8eef,
    0x4669be79,
    0xcb61b38c,
    0xbc66831a,
    0x256fd2a0,
    0x5268e236,
    0xcc0c7795,
    0xbb0b4703,
    0x220216b9,
    0x5505262f,
    0xc5ba3bbe,
    0xb2bd0b28,
    0x2bb45a92,
    0x5cb36a04,
    0xc2d7ffa7,
    0xb5d0cf31,
    0x2cd99e8b,
    0x5bdeae1d,
    0x9b64c2b0,
    0xec63f226,
    0x756aa39c,
    0x026d930a,
    0x9c0906a9,
    0xeb0e363f,
    0x72076785,
    0x05005713,
    0x95bf4a82,
    0xe2b87a14,
    0x7bb12bae,
    0x0cb61b38,
    0x92d28e9b,
    0xe5d5be0d,
    0x7cdcefb7,
    0x0bdbdf21,
    0x86d3d2d4,
    0xf1d4e242,
    0x68ddb3f8,
    0x1fda836e,
    0x81be16cd,
    0xf6b9265b,
    0x6fb077e1,
    0x18b74777,
    0x88085ae6,
    0xff0f6a70,
    0x66063bca,
    0x11010b5c,
    0x8f659eff,
    0xf862ae69,
    0x616bffd3,
    0x166ccf45,
    0xa00ae278,
    0xd70dd2ee,
    0x4e048354,
    0x3903b3c2,
    0xa7672661,
    0xd06016f7,
    0x4969474d,
    0x3e6e77db,
    0xaed16a4a,
    0xd9d65adc,
    0x40df0b66,
    0x37d83bf0,
    0xa9bcae53,
    0xdebb9ec5,
    0x47b2cf7f,
    0x30b5ffe9,
    0xbdbdf21c,
    0xcabac28a,
    0x53b39330,
    0x24b4a3a6,
    0xbad03605,
    0xcdd70693,
    0x54de5729,
    0x23d967bf,
    0xb3667a2e,
    0xc4614ab8,
    0x5d681b02,
    0x2a6f2b94,
    0xb40bbe37,
    0xc30c8ea1,
    0x5a05df1b,
    0x2d02ef8d
 };
 constexpr uint32_t crc32(std::string_view str) {
    uint32_t crc = 0xffffffff;
    for (auto c : str)
        crc = (crc >> 8) ^ crc_table[(crc ^ c) & 0xff];
    return crc ^ 0xffffffff;
 }
 constexpr uint32_t crc32(char const *str, const size_t size) {
    uint32_t crc = 0xffffffff;
    for (size_t i = 0; i < size; ++i)
        crc = (crc >> 8) ^ crc_table[(crc ^ str[i]) & 0xff];
    return crc ^ 0xffffffff;
 }
 constexpr std::array<uint32_t, 35> BAD_WORDS_HASH = {
    crc32("recovery"),
    crc32("techie"),
    crc32("http"),
    crc32("https"),
    crc32("digital"),
    crc32("hack"),
    crc32("::"),
    crc32("//"),
    crc32("com"),
    crc32("@"),
    crc32("crypto"),
    crc32("bitcoin"),
    crc32("wallet"),
    crc32("hacker"),
    crc32("welcome"),
    crc32("whatsapp"),
    crc32("email"),
    crc32("cryptocurrency"),
    crc32("stolen"),
    crc32("freeze"),
    crc32("quick"),
    crc32("crucial"),
    crc32("tracing"),
    crc32("scammers"),
    crc32("expers"),
    crc32("hire"),
    crc32("century"),
    crc32("transaction"),
    crc32("essential"),
    crc32("managing"),
    crc32("contact"),
    crc32("contacting"),
    crc32("understanding"),
    crc32("assets"),
    crc32("funds")
 };
 const std::unordered_set BAD_WORDS_STR(BAD_WORDS.begin(), BAD_WORDS.end());
 constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
                                                          [](std::size_t current, const std::string_view &word) {
                                                              return std::min(current, word.size());
                                                          }
    );
 constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
                                                         [](std::size_t current, const std::string_view &word) {
                                                             return std::max(current, word.size());
                                                         }
    );
 int totalWordCount = 0;
 int totalCapitalizedCount = 0;
 int totalSentenceCount = 0;
 int totalNumberCount = 0;
 int totalForbiddenCount = 0;
 int fileCount = 1;
 int failCount = 0;
 int done = 0;
 struct info {
    std::string_view name;
    aiocb *cb;
    const std::chrono::time_point<std::chrono::steady_clock> start = std::chrono::steady_clock::now();
 };
 constexpr void check_word_simple(const char *word, const ssize_t size) {
    if (size < SHORTEST_BAD_WORD || size > LONGEST_BAD_WORD) {
        return;
    }
    // if (BAD_WORDS_SET.contains(word)) {
    //     totalForbiddenCount++;
    // }
    const auto hs = crc32(word, size);
    for (int i = 0; i < BAD_WORDS_HASH.size(); ++i) {
        if (BAD_WORDS_HASH[i] == hs) {
            totalForbiddenCount++;
            return;
        }
    }
 }
 void read_str(char *str, ssize_t size) {
    int mark = -1;
    int fileWords = 0;
    for (int pos = 0; pos <= size; ++pos) {
        char *c = str + pos;
        if (*c == '.') {
            totalSentenceCount++;
        }
        if (*c == ' ' || *c == '\n' || *c == '\r' || *c == '\t') {
            if (mark != -1) {
                check_word_simple(str + mark, pos - mark);
                mark = -1;
            }
        } else if (mark == -1) {
            ++fileWords;
            if (*c >= 'A' && *c <= 'Z') {
                totalCapitalizedCount++;
            }
            mark = pos;
        } else if (*c >= '0' && *c <= '9') {
            totalNumberCount++;
            for (; pos <= size; ++pos) {
                c = str + pos;
                if (*c == '.') {
                    totalSentenceCount++;
                }
                if (*c == ' ' || *c == '\n' || *c == '\r' || *c == '\t') {
                    break;
                }
            }
            mark = -1;
        }
    }
    if (mark != -1) {
        check_word_simple(str + mark, size - mark);
    }
    totalWordCount += fileWords;
 }
 void aio_completion_handler(sigval_t sigval) {
        fileCount++;
        info *data = (info *)sigval.sival_ptr;
        auto req = data->cb;
        // auto req = (struct aiocb *)sigval.sival_ptr;
        /* Did the request complete? */
        auto error = aio_error(req);
        if (error == 0) {
            /* Request completed successfully, get the return status */
            // const auto start{std::chrono::steady_clock::now()};
            // const std::chrono::duration<double> start_seconds{start - (data->start)};
            // std::println("File started {} in {}", data->name, start_seconds.count());
            read_str((char *)req->aio_buf, aio_return(req));
            // const auto finish{std::chrono::steady_clock::now()};
            // const std::chrono::duration<double> elapsed_seconds{finish - (data->start)};
            // std::println("File read {} in {}", data->name, elapsed_seconds.count());
        } else {
            std::println("Error at aio_error ({}): ", error);
            failCount++;
        }
        --done;
 }
 int main(const int argc, char *argv[]) {
    if (argc < 2) {
        std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
        return 1;
    }
    done = argc - 1;
    // lio_listio
    auto aiocb_list = (struct aiocb *)malloc(sizeof(struct aiocb) * (argc - 1));
    auto aiocb_list_ptr = (struct aiocb **)malloc(sizeof(struct aiocb *) * (argc - 1));
    // char *memchnk = (char *)malloc(5 * 1024 * 1024 * (argc - 1));
    for (std::size_t i = 0; i < argc - 1; i++) {
        aiocb_list[i].aio_fildes = open(argv[i + 1], O_RDONLY);
        aiocb_list[i].aio_offset = 0;
        // 5mb
        aiocb_list[i].aio_buf = malloc(5 * 1024 * 1024);
        aiocb_list[i].aio_nbytes = (5 * 1024 * 1024);;
        aiocb_list[i].aio_sigevent.sigev_notify = SIGEV_THREAD;
        aiocb_list[i].aio_sigevent.sigev_notify_function = aio_completion_handler;
        aiocb_list[i].aio_sigevent.sigev_notify_attributes = nullptr;
        // aiocb_list[i].aio_sigevent.sigev_value.sival_ptr = &aiocb_list[i];
        aiocb_list[i].aio_sigevent.sigev_value.sival_ptr = new info{
            argv[i + 1],
            &aiocb_list[i]};
        // aiocb_list[i].aio_reqprio = SIGRTMIN;
        aiocb_list_ptr[i] = &aiocb_list[i];
    }
    lio_listio(LIO_WAIT, aiocb_list_ptr, argc - 1, nullptr);
    while (done > 0) {
        std::this_thread::sleep_for(std::chrono::milliseconds(1));
    }
    std::println("Done reading files, {} done", done);
    double capitalizedPercentage = (totalWordCount > 0)
                                       ? static_cast<double>(totalCapitalizedCount) / totalWordCount * 100.0
                                       : 0;
    double forbiddenPercentage = (totalWordCount > 0)
                                     ? static_cast<double>(totalForbiddenCount) / totalWordCount * 100.0
                                     : 0;
    double wordCountPerSentence = (totalSentenceCount > 0)
                                      ? static_cast<double>(totalWordCount) / totalSentenceCount
                                      : 0;
    std::println(
        "Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}",
        totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount,
        capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
        );
    for (std::size_t i = 0; i < argc - 1; i++) {
        close(aiocb_list[i].aio_fildes);
        free((void *)aiocb_list[i].aio_buf);
    }
    free(aiocb_list);
    free(aiocb_list_ptr);
    // free(memchnk);
    if (failCount > 0) {
        return 1;
    }
 }
--- a/jest_rust/.gitignore
+++ b/jest_rust/.gitignore
@ -0,0 +1,3 @@
 /target
 /Cargo.lock
 /test_books
--- a/jest_rust/Cargo.toml
+++ b/jest_rust/Cargo.toml
@ -0,0 +1,15 @@
 [package]
 name = "jisspam"
 version = "0.1.0"
 edition = "2024"
 [dependencies]
 fxhash = "0.2.1"
 tokio = { version = "1.44.1", features = ["full"] }
 [profile.release]
 codegen-units = 1 # less means more compile work but better optimized
 lto = "fat"      # thin has best performance. fat the worst
 strip = true
 # opt-level = "z" # slows down
 panic = "abort"
--- a/jest_rust/README.md
+++ b/jest_rust/README.md
@ -0,0 +1,84 @@
 for https://retoor.molodetz.nl/retoor/isspam
 extract `../books.tar.gz`
 # local machine benchmarks
 single threaded: `33.63373279571533`
 rayon: `4.294418811798096`
 tokio: `4.717588901519775`
 tokio:
 muncher: `2486ms`
 for_loops: `1227ms`
 for_loops_forbidden_only: `987ms`
 trie creation and stats accumulation take 0ms
 FxHashMap faster than BTreeMap
 ## compile options benchmarks
 `lto` thin, fat doesn't change much
 `codegen-units` 0, 1 doesn't change much
 `opt-level = "z"` slow things down
 # ubuntu terminal running
 https://snek.molodetz.nl/terminal.html ubuntu running thing instructions:
 ```
 mkdir /project
 cd /project
 git clone https://retoor.molodetz.nl/retoor/isspam.git
 apt install valgrind curl
 export RUSTUP_HOME=/project/.rustup
 export CARGO_HOME=/project/.cargo
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 . "/project/.cargo/env"
 cd isspam
 rustup install nightly
 rustup default nightly
 make
 make benchmark
 python3 bench.py
 ```
 clone: `git clone https://gitlab.com/jestdotty-group/draft/jisspam.git jest_rust`
 edit make: `vi makefile` and add build:
 ```
 build_jest:
 	@echo "compiling jest_rust project"
 	cd jest_rust && cargo build --release && cp target/release/jisspam ..
 ```
 append to all script:
 ```
 all: build run valgrind build_risspam run_risspam build_cpp build_borded_cpp build_py build_jest
 ```
 add to bench: `vi bench.py`
 ```py
 time_start = time.time()
 subprocess.check_output('./jisspam books/*.txt', shell=True)
 print("Time Jest Rust:", time.time() - time_start)
 ```
 run: `python3 bench.py`
 output looks something like this:
 ```
 ***benchmarking***
 Time C: 31.315868377685547
 Time Rust: 41.232205867767334
 Time CPP: 20.1683189868927
 Time Borded CPP: 15.468477964401245
 Time Jest Rust: 54.74523115158081
 Time Retoor Python: 287.63036131858826
 ***end benchmark***
 ```
 add `/jisspam` to `.gitignore` to not commit the executable accidentally
--- a/jest_rust/src/main.rs
+++ b/jest_rust/src/main.rs
@ -0,0 +1,122 @@
 mod parser;
 mod stats;
 mod trie;
 use stats::Stats;
 use std::{env, fs, sync::LazyLock};
 use tokio::sync::mpsc;
 use trie::Trie;
 static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
 	let mut trie = Trie::default();
 	for word in [
 		"recovery",
 		"techie",
 		"http",
 		"https",
 		"digital",
 		"hack",
 		"::",
 		"//",
 		"@",
 		"com",
 		"crypto",
 		"bitcoin",
 		"wallet",
 		"hacker",
 		"welcome",
 		"whatsapp",
 		"email",
 		"cryptocurrency",
 		"stolen",
 		"freeze",
 		"quick",
 		"crucial",
 		"tracing",
 		"scammers",
 		"expers",
 		"hire",
 		"century",
 		"transaction",
 		"essential",
 		"managing",
 		"contact",
 		"contacting",
 		"understanding",
 		"assets",
 		"funds",
 	] {
 		trie.insert(word);
 	}
 	trie
 });
 #[tokio::main]
 async fn main() {
 	let files = env::args().skip(1);
 	let mut rx = {
 		let (tx, rx) = mpsc::unbounded_channel();
 		for file in files {
 			let tx = tx.clone();
 			tokio::spawn(async move {
 				let mut stats = Stats::default();
 				//reading files in threads doesn't change speed of any sort but oh well
 				if let Ok(text) = fs::read_to_string(&file) {
 					stats.file_count += 1;
 					parser::for_loops::parse(&mut stats, &text);
 				} else {
 					stats.failed_file_count += 1;
 				}
 				let _ = tx.send(stats);
 			});
 		}
 		rx
 	};
 	let mut stats = Stats::default();
 	while let Some(file_stat) = rx.recv().await {
 		stats += file_stat;
 	}
 	println!("{stats}");
 }
 /// needs ../books.tar.gz to be extracted into ../books
 #[test]
 fn test() {
 	use std::{env, fs, process::Command, time::Instant};
 	println!("cwd: {}", env::current_dir().unwrap().display());
 	//compile
 	let mut compile = Command::new("cargo");
 	let compile_arged = compile.arg("build").arg("--release");
 	match compile_arged.output() {
 		Ok(output) => println!("compiled {}", String::from_utf8_lossy(&output.stdout)),
 		Err(err) => eprintln!("compile failed: {err}"),
 	}
 	//get test files
 	let files = fs::read_dir("../books")
 		.unwrap()
 		.map(|f| {
 			f.unwrap()
 				.path()
 				.canonicalize()
 				.unwrap()
 				.to_str()
 				.unwrap()
 				.to_string()
 		})
 		.collect::<Vec<_>>();
 	println!("test files found: {}", files.len());
 	println!();
 	//benchmark run
 	let benchmark = Instant::now();
 	let mut run = Command::new("target/release/jisspam");
 	let run_arged = run.args(files);
 	match run_arged.output() {
 		Ok(output) => println!("{}", String::from_utf8_lossy(&output.stdout)),
 		Err(err) => eprintln!("run failed: {err}"),
 	}
 	println!("benchmark: {}ms", benchmark.elapsed().as_millis());
 }
--- a/jest_rust/src/parser/for_loops.rs
+++ b/jest_rust/src/parser/for_loops.rs
@ -0,0 +1,37 @@
 use crate::{FORBIDDEN_WORDS, stats::Stats};
 #[allow(dead_code)]
 /// typically 5000ms
 /// with trie this is 1600ms
 pub fn parse(stats: &mut Stats, text: &str) {
 	for sentence in text
 		.split('.')
 		.map(|s| s.trim())
 		.filter(|s| !s.is_empty())
 	{
 		stats.sentence_count += 1;
 		for word in sentence
 			.split_ascii_whitespace()
 			.map(|s| s.trim())
 			.filter(|s| !s.is_empty())
 		{
 			stats.word_count += 1;
 			//get all numbers counted
 			let mut all_capitalized = true;
 			for char in word.chars() {
 				if char.is_numeric() {
 					stats.numeric_count += 1;
 					all_capitalized = false;
 				} else if !char.is_ascii_uppercase() {
 					all_capitalized = false;
 				}
 			}
 			if all_capitalized {
 				stats.capitalized_count += 1;
 			}
 			if FORBIDDEN_WORDS.contains(&word.to_lowercase()) {
 				stats.forbidden_count += 1;
 			}
 		}
 	}
 }
--- a/jest_rust/src/parser/for_loops_forbidden_only.rs
+++ b/jest_rust/src/parser/for_loops_forbidden_only.rs
@ -0,0 +1,14 @@
 use crate::{FORBIDDEN_WORDS, stats::Stats};
 #[allow(dead_code)]
 pub fn parse(stats: &mut Stats, text: &str) {
 	for word in text
 		.split_ascii_whitespace()
 		.map(|s| s.trim())
 		.filter(|s| !s.is_empty())
 	{
 		if FORBIDDEN_WORDS.contains(&word.to_lowercase()) {
 			stats.forbidden_count += 1;
 		}
 	}
 }
--- a/jest_rust/src/parser/mod.rs
+++ b/jest_rust/src/parser/mod.rs
@ -0,0 +1,3 @@
 pub mod for_loops;
 pub mod for_loops_forbidden_only;
 pub mod muncher;
--- a/jest_rust/src/parser/muncher.rs
+++ b/jest_rust/src/parser/muncher.rs
@ -0,0 +1,66 @@
 use crate::{FORBIDDEN_WORDS, stats::Stats};
 #[allow(dead_code)]
 /// probably buggy. for example, are new lines sentences? what if the text has no last period?
 /// 500ms is without forbidden words check, but...
 /// 6000ms if adding forbidden words.. so not faster
 /// with trie this is 2600ms
 pub fn parse(stats: &mut Stats, text: &str) {
 	let mut capitalized = true;
 	let mut whitespaced = false;
 	let mut dotted = false;
 	let mut word = String::new();
 	for char in text.chars() {
 		if whitespaced {
 			if !char.is_whitespace() {
 				whitespaced = false; //end whiteness
 			}
 			continue;
 		} else if char.is_whitespace() {
 			whitespaced = true;
 			stats.word_count += 1; //end of word
 			if capitalized {
 				stats.capitalized_count += 1;
 			} else {
 				//reset capitalized word
 				capitalized = true;
 			}
 			let lowercase_word = word.to_lowercase();
 			if FORBIDDEN_WORDS.contains(&lowercase_word) {
 				stats.forbidden_count += 1;
 			}
 			word = String::new();
 			continue;
 		}
 		if dotted {
 			if char != '.' {
 				dotted = false; //end sentencing
 			}
 			continue;
 		} else if char == '.' {
 			dotted = true;
 			stats.sentence_count += 1;
 			stats.word_count += 1; //end of word
 			if capitalized {
 				stats.capitalized_count += 1;
 			} else {
 				//reset capitalized word
 				capitalized = true;
 			}
 			let lowercase_word = word.to_lowercase();
 			if FORBIDDEN_WORDS.contains(&lowercase_word) {
 				stats.forbidden_count += 1;
 			}
 			word = String::new();
 			continue;
 		}
 		word += &char.to_string();
 		if char.is_numeric() {
 			stats.numeric_count += 1;
 			capitalized = false;
 		}
 		if !char.is_ascii_uppercase() {
 			capitalized = false;
 		}
 	}
 }
--- a/jest_rust/src/stats.rs
+++ b/jest_rust/src/stats.rs
@ -0,0 +1,58 @@
 use std::{fmt::Display, ops::AddAssign};
 #[derive(Debug, Default)]
 pub struct Stats {
 	pub file_count: u32,
 	pub failed_file_count: u32,
 	pub sentence_count: u32,
 	pub word_count: u32,
 	pub capitalized_count: u32,
 	pub numeric_count: u32,
 	pub forbidden_count: u32,
 }
 impl AddAssign for Stats {
 	fn add_assign(&mut self, rhs: Self) {
 		self.file_count += rhs.file_count;
 		self.failed_file_count += rhs.failed_file_count;
 		self.sentence_count += rhs.sentence_count;
 		self.word_count += rhs.word_count;
 		self.capitalized_count += rhs.capitalized_count;
 		self.numeric_count += rhs.numeric_count;
 		self.forbidden_count += rhs.forbidden_count;
 	}
 }
 impl Display for Stats {
 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 		writeln!(f, "file count: {}", self.file_count)?;
 		writeln!(f, "failed file count: {}", self.failed_file_count)?;
 		writeln!(f, "sentence count: {}", self.sentence_count)?;
 		writeln!(f, "word count: {}", self.word_count)?;
 		writeln!(f, "capitalized count: {}", self.capitalized_count)?;
 		writeln!(f, "numeric count: {}", self.numeric_count)?;
 		writeln!(f, "forbidden count: {}", self.forbidden_count)?;
 		let word_count = self.word_count as f32;
 		writeln!(
 			f,
 			"words per sentence average: {:.1}",
 			word_count / self.sentence_count as f32
 		)?;
 		writeln!(
 			f,
 			"forbidden word percentage: {:.2}%",
 			(self.forbidden_count as f32 / word_count) * 100.0,
 		)?;
 		write!(
 			f,
 			"capitalized word percentage: {:.2}%",
 			(self.capitalized_count as f32 / word_count) * 100.0,
 		)
 	}
 }
--- a/jest_rust/src/trie.rs
+++ b/jest_rust/src/trie.rs
@ -0,0 +1,33 @@
 use fxhash::FxBuildHasher;
 use std::collections::HashMap;
 type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; //simpler, slightly faster
 #[derive(Default, Debug, Clone)]
 struct Node {
 	end: bool,
 	children: FxHashMap<char, Node>,
 }
 #[derive(Default, Debug, Clone)]
 pub struct Trie {
 	root: Node,
 }
 impl Trie {
 	pub fn insert(&mut self, word: &str) {
 		let mut node = &mut self.root;
 		for char in word.chars() {
 			node = node.children.entry(char).or_default();
 		}
 		node.end = true;
 	}
 	pub fn contains(&self, word: &str) -> bool {
 		let mut current_node = &self.root;
 		for char in word.chars() {
 			match current_node.children.get(&char) {
 				Some(node) => current_node = node,
 				None => return false,
 			}
 		}
 		current_node.end
 	}
 }
--- a/retoor_c/isspam.py
+++ b/retoor_c/isspam.py
@ -0,0 +1,85 @@
 import os
 import sys
 import threading
 from concurrent.futures import ThreadPoolExecutor
 MAX_TEXT_LENGTH = 1024
 FORBIDDEN_WORDS_COUNT = 40
 forbidden_words = set([
    "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
    "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
    "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
    "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
 ])
 class AnalysisResult:
    def __init__(self, filename):
        self.filename = filename
        self.total_word_count = 0
        self.total_capitalized_count = 0
        self.total_sentence_count = 0
        self.total_number_count = 0
        self.total_forbidden_count = 0
 def is_forbidden(word):
    return word in forbidden_words
 def read_file(filename):
    if not os.path.exists(filename):
        print(f"File doesn't exist: {filename}")
        return None
    with open(filename, 'r') as file:
        return file.read()
 def analyze_file(result):
    text = read_file(result.filename)
    if text:
        result.total_sentence_count = text.count('.')
        tokens = text.split()
        result.total_word_count = len(tokens)
        result.total_capitalized_count = sum(1 for token in tokens if token[0].isupper())
        result.total_number_count = sum(1 for token in tokens if any(char.isdigit() for char in token))
        result.total_forbidden_count = sum(1 for token in tokens if is_forbidden(token))
 def main():
    if len(sys.argv) < 2:
        print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>")
        return
    results = []
    with ThreadPoolExecutor() as executor:
        futures = []
        for filename in sys.argv[1:]:
            result = AnalysisResult(filename)
            results.append(result)
            futures.append(executor.submit(analyze_file, result))
        for future in futures:
            future.result()
    total_word_count = sum(result.total_word_count for result in results)
    total_capitalized_count = sum(result.total_capitalized_count for result in results)
    total_sentence_count = sum(result.total_sentence_count for result in results)
    total_number_count = sum(result.total_number_count for result in results)
    total_forbidden_count = sum(result.total_forbidden_count for result in results)
    capitalized_percentage = (total_word_count > 0) * (total_capitalized_count / total_word_count * 100.0)
    forbidden_percentage = (total_word_count > 0) * (total_forbidden_count / total_word_count * 100.0)
    word_count_per_sentence = (total_sentence_count > 0) * (total_word_count / total_sentence_count)
    print(f"\nTotal Words: {total_word_count}")
    print(f"Total Capitalized words: {total_capitalized_count}")
    print(f"Total Sentences: {total_sentence_count}")
    print(f"Total Numbers: {total_number_count}")
    print(f"Total Forbidden words: {total_forbidden_count}")
    print(f"Capitalized percentage: {capitalized_percentage:.6f}%")
    print(f"Forbidden percentage: {forbidden_percentage:.6f}%")
    print(f"Word count per sentence: {word_count_per_sentence:.6f}")
    print(f"Total files read: {len(sys.argv) - 1}")
 if __name__ == "__main__":
    main()