sequential file read slightly faster

clean up README
fxhash and I remembered how to static lazy right
2025-03-24 16:03:36 -04:00 · 2025-03-24 00:48:04 -04:00 · 2025-03-24 00:20:15 -04:00 · 2025-03-23 23:58:34 -04:00 · 2025-03-23 23:23:40 -04:00 · 2025-03-23 23:14:30 -04:00
22 changed files with 1837 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,7 +5,10 @@ publish
 books
 __pycache__
 target
+./isspam.py
 isspam
 risspam
+/jisspam
 isspam_cpp
 .build-trigger-2014-12-02 15:26
+borded_cpp_exec
--- a/14
+++ b/14
@ -1,20 +1,32 @@
 CC = gcc 
 CFLAGS = -Ofast

-all: build run valgrind build_risspam run_risspam build_cpp
+all: build run valgrind build_risspam run_risspam build_cpp build_borded_cpp build_py build_jest

 build:
 	@echo "Compiling retoor_c project.".
 	@$(CC) $(CFLAGS) retoor_c/isspam.c -o isspam

+build_py:
+	@echo "Copying py file"
+	@cp retoor_c/isspam.py isspam.py
+
 build_cpp:
 	@echo "Compiling C++ version of isspam."
 	@g++ -Ofast retoor_c/isspam.cpp -o isspam_cpp

+build_borded_cpp:
+	@echo "Compiling Borded C++ version of isspam."
+	@g++ -std=c++23 -Ofast borded_cpp/src/main3.cpp -o borded_cpp_exec
+
 build_risspam:
 	@echo "Compiling 12bitfloat_risspam project."
 	cd 12bitfloat_rust/risspam && cargo run --release && cp target/release/risspam ../../

+build_jest:
+	@echo "compiling jest_rust project"
+	cd jest_rust && cargo build --release && cp target/release/jisspam ..
+
 run: run_spam wl run_not_spam
 run_risspam: run_spam_risspam run_not_spam_risspam

--- a/bench.py
+++ b/bench.py
@ -11,4 +11,13 @@ print("Time Rust:",time.time() - time_start)
 time_start = time.time()
 subprocess.check_output('./isspam_cpp books/*.txt', shell=True)
 print("Time CPP:",time.time() - time_start)
+time_start = time.time()
+subprocess.check_output('./borded_cpp_exec books/*.txt', shell=True)
+print("Time Borded CPP:",time.time() - time_start)
+time_start = time.time()
+subprocess.check_output('./jisspam books/*.txt', shell=True)
+print("Time Jest Rust:", time.time() - time_start)
+time_start = time.time()
+subprocess.check_output('python3 isspam.py books/*.txt', shell=True)
+print("Time Retoor Python:",time.time() - time_start)
 print("***end benchmark***")
--- a/borded_cpp/.gitignore
+++ b/borded_cpp/.gitignore
@ -0,0 +1,97 @@
+*.d
+*.slo
+*.lo
+*.o
+*.obj
+*.gch
+*.pch
+*.so
+*.dylib
+*.dll
+*.mod
+*.smod
+*.lai
+*.la
+*.a
+*.lib
+*.exe
+*.out
+*.app
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+.idea/**/aws.xml
+.idea/**/contentModel.xml
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+.idea/**/gradle.xml
+.idea/**/libraries
+.idea
+cmake-build-*/
+.idea/**/mongoSettings.xml
+*.iws
+out/
+.idea_modules/
+atlassian-ide-plugin.xml
+.idea/replstate.xml
+.idea/sonarlint/
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+.idea/httpRequests
+.idea/caches/build_file_checksums.ser
+*~
+.fuse_hidden*
+.directory
+.Trash-*
+.nfs*
+CMakeLists.txt.user
+CMakeCache.txt
+CMakeFiles
+CMakeScripts
+Testing
+Makefile
+cmake_install.cmake
+install_manifest.txt
+compile_commands.json
+CTestTestfile.cmake
+_deps
+CMakeUserPresets.json
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+*.stackdump
+[Dd]esktop.ini
+$RECYCLE.BIN/
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+*.lnk
+.DS_Store
+.AppleDouble
+.LSOverride
+Icon
+._*
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
--- a/borded_cpp/CMakeLists.txt
+++ b/borded_cpp/CMakeLists.txt
@ -0,0 +1,27 @@
+cmake_minimum_required(VERSION 3.25)
+project(isspam)
+
+set(CMAKE_CXX_STANDARD 26)
+if (MSVC)
+    add_compile_options(/W4)
+    add_compile_options(/WX)
+    add_compile_options(/external:anglebrackets)
+    add_compile_options(/external:W0)
+    add_compile_options(/wd4100)
+    add_compile_options(/wd5050)
+    add_definitions(-DWIN32_LEAN_AND_MEAN -DVC_EXTRALEAN)
+    add_compile_definitions(WIN32_LEAN_AND_MEAN NOMINMAX)
+else ()
+    add_compile_options(-Wall)
+    add_compile_options(-Wextra)
+    add_compile_options(-Wpedantic)
+#    add_compile_options(-Werror)
+endif ()
+
+add_executable(${PROJECT_NAME} src/main.cpp)
+add_executable(${PROJECT_NAME}3 src/main3.cpp)
+
+if (LINUX)
+    target_link_libraries(${PROJECT_NAME} tbb)
+    target_link_libraries(${PROJECT_NAME}3 tbb)
+endif ()
--- a/borded_cpp/Dockerfile
+++ b/borded_cpp/Dockerfile
@ -0,0 +1,3 @@
+FROM gcc:latest
+RUN apt update && apt install -y cmake gdb
+WORKDIR /home
--- a/borded_cpp/compose.yml
+++ b/borded_cpp/compose.yml
@ -0,0 +1,9 @@
+services:
+  cpp:
+    build: .
+    command: ["sh","doit.sh"]
+    tty: true 
+    stdin_open: true 
+    volumes:
+      - ./:/home
+      - ../books:/books
--- a/borded_cpp/doit.sh
+++ b/borded_cpp/doit.sh
@ -0,0 +1,2 @@
+rm -rf build | true
+mkdir build && cd build && cmake .. && make
--- a/borded_cpp/src/main.cpp
+++ b/borded_cpp/src/main.cpp
@ -0,0 +1,221 @@
+#include <string>
+#include <string_view>
+#include <fstream>
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <execution>
+#include <format>
+#include <codecvt>
+#include <ranges>
+
+#ifdef __cpp_lib_print
+#include <print>
+#else
+namespace std {
+template <typename T, typename... Args>
+inline void print(T format, Args &&... args) {
+    auto f = std::vformat(format, std::make_format_args(args...));
+    std::cout << f;
+}
+
+template <typename T, typename... Args>
+inline void println(T format, Args &&... args) {
+    auto f = std::vformat(format, std::make_format_args(args...));
+    std::cout << f << std::endl;
+}
+}
+#endif
+
+constexpr std::array<std::wstring_view, 35> BAD_WORDS = {
+    L"recovery",
+    L"techie",
+    L"http",
+    L"https",
+    L"digital",
+    L"hack",
+    L"::",
+    L"//",
+    L"com",
+    L"@",
+    L"crypto",
+    L"bitcoin",
+    L"wallet",
+    L"hacker",
+    L"welcome",
+    L"whatsapp",
+    L"email",
+    L"cryptocurrency",
+    L"stolen",
+    L"freeze",
+    L"quick",
+    L"crucial",
+    L"tracing",
+    L"scammers",
+    L"expers",
+    L"hire",
+    L"century",
+    L"transaction",
+    L"essential",
+    L"managing",
+    L"contact",
+    L"contacting",
+    L"understanding",
+    L"assets",
+    L"funds",
+};
+
+constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
+                                                          [](std::size_t current, const std::wstring_view &word) {
+                                                              return std::min(current, word.size());
+                                                          }
+    );
+constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
+                                                         [](std::size_t current, const std::wstring_view &word) {
+                                                             return std::max(current, word.size());
+                                                         }
+    );
+
+struct AnalysisResult {
+    std::size_t totalWordCount = 0;
+    std::size_t totalCapitalizedCount = 0;
+    std::size_t totalSentenceCount = 0;
+    std::size_t totalNumberCount = 0;
+    std::size_t totalForbiddenCount = 0;
+    std::size_t fileCount = 1;
+
+    std::size_t failCount = 0;
+
+    operator std::string() const {
+        return std::format(
+            "Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}",
+            totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount
+            );
+    }
+
+    friend AnalysisResult operator+(const AnalysisResult &lhs, const AnalysisResult &rhs) {
+        return {
+            lhs.totalWordCount + rhs.totalWordCount,
+            lhs.totalCapitalizedCount + rhs.totalCapitalizedCount,
+            lhs.totalSentenceCount + rhs.totalSentenceCount,
+            lhs.totalNumberCount + rhs.totalNumberCount,
+            lhs.totalForbiddenCount + rhs.totalForbiddenCount,
+            lhs.fileCount + rhs.fileCount,
+            lhs.failCount + rhs.failCount
+        };
+    };
+};
+
+void check_word(std::wstring &word, std::size_t &forbiddenCount) {
+    if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) {
+        return;
+    }
+    std::ranges::transform(word, word.begin(), ::towlower);
+        if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) {
+            forbiddenCount++;
+        }
+    // if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) {
+    //                              return word.contains(badWord);
+    //                          }
+    //         ) != BAD_WORDS.end()) {
+    //     forbiddenCount++;
+    // }
+}
+
+AnalysisResult parseFile(const std::string_view &filename) {
+    std::wifstream file;
+
+    // surpress warning of deprecation
+#pragma warning(push)
+#pragma warning(suppress : 4996)
+    file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>));
+#pragma warning(pop)
+
+    file.open(std::string(filename));
+    if (!file.is_open()) {
+        std::println("File doesn't exist: {}", filename);
+        return { };
+    }
+
+    AnalysisResult result{ };
+
+    bool inWord = false;
+    bool isDigit = false;
+    wchar_t c;
+
+    std::wstring word;
+    while (file.get(c)) {
+        if (c == '.') {
+            result.totalSentenceCount++;
+        }
+
+        if (std::isspace(c)) {
+            inWord = false;
+            isDigit = false;
+
+            if (!word.empty()) {
+                check_word(word, result.totalForbiddenCount);
+                word.clear();
+            }
+            continue;
+        } else {
+            if (!inWord) {
+                result.totalWordCount++;
+                if (std::isupper(c)) {
+                    result.totalCapitalizedCount++;
+                }
+            }
+            inWord = true;
+
+            if (std::isdigit(c) && !isDigit) {
+                result.totalNumberCount++;
+                isDigit = true;
+            }
+
+            word.push_back(c);
+        }
+    };
+
+    // std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl;
+
+    if (!word.empty()) {
+        check_word(word, result.totalForbiddenCount);
+    }
+
+    file.close();
+
+    if (file.fail() && !file.eof()) {
+        result.failCount++;
+    }
+
+    return result;
+}
+
+int main(const int argc, char *argv[]) {
+    if (argc < 2) {
+        std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
+        return 1;
+    }
+
+    const AnalysisResult result = std::transform_reduce(std::execution::par_unseq, std::next(argv), argv + argc,
+                                                        AnalysisResult{.fileCount = 0},
+                                                        std::plus{ },
+                                                        parseFile
+        );
+
+    double capitalizedPercentage = (result.totalWordCount > 0)
+                                       ? static_cast<double>(result.totalCapitalizedCount) / result.totalWordCount * 100.0
+                                       : 0;
+    double forbiddenPercentage = (result.totalWordCount > 0)
+                                     ? static_cast<double>(result.totalForbiddenCount) / result.totalWordCount * 100.0
+                                     : 0;
+    double wordCountPerSentence = (result.totalSentenceCount > 0)
+                                      ? static_cast<double>(result.totalWordCount) / result.totalSentenceCount
+                                      : 0;
+
+    std::println("{}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}", std::string(result),
+                 capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
+        );
+
+    return 0;
+}
--- a/borded_cpp/src/main2.cpp
+++ b/borded_cpp/src/main2.cpp
@ -0,0 +1,195 @@
+#include <string>
+#include <string_view>
+#include <fstream>
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <execution>
+#include <format>
+#include <codecvt>
+#include <ranges>
+
+#ifdef __cpp_lib_print
+#include <print>
+#else
+namespace std {
+template <typename T, typename... Args>
+inline void print(T format, Args &&... args) {
+    auto f = std::vformat(format, std::make_format_args(args...));
+    std::cout << f;
+}
+
+template <typename T, typename... Args>
+inline void println(T format, Args &&... args) {
+    auto f = std::vformat(format, std::make_format_args(args...));
+    std::cout << f << std::endl;
+}
+}
+#endif
+
+constexpr std::array<std::wstring_view, 35> BAD_WORDS = {
+    L"recovery",
+    L"techie",
+    L"http",
+    L"https",
+    L"digital",
+    L"hack",
+    L"::",
+    L"//",
+    L"com",
+    L"@",
+    L"crypto",
+    L"bitcoin",
+    L"wallet",
+    L"hacker",
+    L"welcome",
+    L"whatsapp",
+    L"email",
+    L"cryptocurrency",
+    L"stolen",
+    L"freeze",
+    L"quick",
+    L"crucial",
+    L"tracing",
+    L"scammers",
+    L"expers",
+    L"hire",
+    L"century",
+    L"transaction",
+    L"essential",
+    L"managing",
+    L"contact",
+    L"contacting",
+    L"understanding",
+    L"assets",
+    L"funds",
+};
+
+constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
+                                                          [](std::size_t current, const std::wstring_view &word) {
+                                                              return std::min(current, word.size());
+                                                          }
+    );
+constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
+                                                         [](std::size_t current, const std::wstring_view &word) {
+                                                             return std::max(current, word.size());
+                                                         }
+    );
+
+std::size_t totalWordCount = 0;
+std::size_t totalCapitalizedCount = 0;
+std::size_t totalSentenceCount = 0;
+std::size_t totalNumberCount = 0;
+std::size_t totalForbiddenCount = 0;
+std::size_t fileCount = 1;
+
+std::size_t failCount = 0;
+
+void check_word(std::wstring &word, std::size_t &forbiddenCount) {
+    if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) {
+        return;
+    }
+    std::ranges::transform(word, word.begin(), ::towlower);
+    if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) {
+        forbiddenCount++;
+    }
+    // if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) {
+    //                              return word.contains(badWord);
+    //                          }
+    //         ) != BAD_WORDS.end()) {
+    //     forbiddenCount++;
+    // }
+}
+
+void parseFile(const std::string_view &filename) {
+    std::wifstream file;
+
+    // surpress warning of deprecation
+#pragma warning(push)
+#pragma warning(suppress : 4996)
+    file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>));
+#pragma warning(pop)
+
+    file.open(std::string(filename));
+    if (!file.is_open()) {
+        std::println("File doesn't exist: {}", filename);
+        return;
+    }
+
+    bool inWord = false;
+    bool isDigit = false;
+    wchar_t c;
+
+    std::wstring word;
+    while (file.get(c)) {
+        if (c == '.') {
+            totalSentenceCount++;
+        }
+
+        if (std::isspace(c)) {
+            inWord = false;
+            isDigit = false;
+
+            if (!word.empty()) {
+                check_word(word, totalForbiddenCount);
+                word.clear();
+            }
+            continue;
+        } else {
+            if (!inWord) {
+                totalWordCount++;
+                if (std::isupper(c)) {
+                    totalCapitalizedCount++;
+                }
+            }
+            inWord = true;
+
+            if (std::isdigit(c) && !isDigit) {
+                totalNumberCount++;
+                isDigit = true;
+            }
+
+            word.push_back(c);
+        }
+    };
+
+    // std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl;
+
+    if (!word.empty()) {
+        check_word(word, totalForbiddenCount);
+    }
+
+    file.close();
+
+    if (file.fail() && !file.eof()) {
+        failCount++;
+    }
+
+}
+
+int main(const int argc, char *argv[]) {
+    if (argc < 2) {
+        std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
+        return 1;
+    }
+
+    std::for_each(std::execution::par_unseq, std::next(argv), argv + argc, parseFile);
+
+    double capitalizedPercentage = (totalWordCount > 0)
+                                       ? static_cast<double>(totalCapitalizedCount) / totalWordCount * 100.0
+                                       : 0;
+    double forbiddenPercentage = (totalWordCount > 0)
+                                     ? static_cast<double>(totalForbiddenCount) / totalWordCount * 100.0
+                                     : 0;
+    double wordCountPerSentence = (totalSentenceCount > 0)
+                                      ? static_cast<double>(totalWordCount) / totalSentenceCount
+                                      : 0;
+
+    std::println(
+        "Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}",
+        totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount,
+        capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
+        );
+
+    return 0;
+}
--- a/borded_cpp/src/main3.cpp
+++ b/borded_cpp/src/main3.cpp
@ -0,0 +1,576 @@
+#include <string>
+#include <string_view>
+#include <fstream>
+#include <algorithm>
+#include <iostream>
+#include <execution>
+#include <format>
+#include <cstdio>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <aio.h>
+#include <condition_variable>
+#include <unordered_set>
+#include <sys/signal.h>
+
+#ifdef __cpp_lib_print
+#include <print>
+#else
+namespace std {
+template <typename T, typename... Args>
+inline void print(T format, Args &&... args) {
+    auto f = std::vformat(format, std::make_format_args(args...));
+    std::cout << f;
+}
+
+template <typename T, typename... Args>
+inline void println(T format, Args &&... args) {
+    auto f = std::vformat(format, std::make_format_args(args...));
+    std::cout << f << std::endl;
+}
+}
+#endif
+
+constexpr std::array<std::string_view, 35> BAD_WORDS = {
+    "recovery",
+    "techie",
+    "http",
+    "https",
+    "digital",
+    "hack",
+    "::",
+    "//",
+    "com",
+    "@",
+    "crypto",
+    "bitcoin",
+    "wallet",
+    "hacker",
+    "welcome",
+    "whatsapp",
+    "email",
+    "cryptocurrency",
+    "stolen",
+    "freeze",
+    "quick",
+    "crucial",
+    "tracing",
+    "scammers",
+    "expers",
+    "hire",
+    "century",
+    "transaction",
+    "essential",
+    "managing",
+    "contact",
+    "contacting",
+    "understanding",
+    "assets",
+    "funds",
+};
+const std::unordered_set<std::string_view> BAD_WORDS_SET(BAD_WORDS.begin(), BAD_WORDS.end());
+static constexpr unsigned int crc_table[256] = {
+    0x00000000,
+    0x77073096,
+    0xee0e612c,
+    0x990951ba,
+    0x076dc419,
+    0x706af48f,
+    0xe963a535,
+    0x9e6495a3,
+    0x0edb8832,
+    0x79dcb8a4,
+    0xe0d5e91e,
+    0x97d2d988,
+    0x09b64c2b,
+    0x7eb17cbd,
+    0xe7b82d07,
+    0x90bf1d91,
+    0x1db71064,
+    0x6ab020f2,
+    0xf3b97148,
+    0x84be41de,
+    0x1adad47d,
+    0x6ddde4eb,
+    0xf4d4b551,
+    0x83d385c7,
+    0x136c9856,
+    0x646ba8c0,
+    0xfd62f97a,
+    0x8a65c9ec,
+    0x14015c4f,
+    0x63066cd9,
+    0xfa0f3d63,
+    0x8d080df5,
+    0x3b6e20c8,
+    0x4c69105e,
+    0xd56041e4,
+    0xa2677172,
+    0x3c03e4d1,
+    0x4b04d447,
+    0xd20d85fd,
+    0xa50ab56b,
+    0x35b5a8fa,
+    0x42b2986c,
+    0xdbbbc9d6,
+    0xacbcf940,
+    0x32d86ce3,
+    0x45df5c75,
+    0xdcd60dcf,
+    0xabd13d59,
+    0x26d930ac,
+    0x51de003a,
+    0xc8d75180,
+    0xbfd06116,
+    0x21b4f4b5,
+    0x56b3c423,
+    0xcfba9599,
+    0xb8bda50f,
+    0x2802b89e,
+    0x5f058808,
+    0xc60cd9b2,
+    0xb10be924,
+    0x2f6f7c87,
+    0x58684c11,
+    0xc1611dab,
+    0xb6662d3d,
+    0x76dc4190,
+    0x01db7106,
+    0x98d220bc,
+    0xefd5102a,
+    0x71b18589,
+    0x06b6b51f,
+    0x9fbfe4a5,
+    0xe8b8d433,
+    0x7807c9a2,
+    0x0f00f934,
+    0x9609a88e,
+    0xe10e9818,
+    0x7f6a0dbb,
+    0x086d3d2d,
+    0x91646c97,
+    0xe6635c01,
+    0x6b6b51f4,
+    0x1c6c6162,
+    0x856530d8,
+    0xf262004e,
+    0x6c0695ed,
+    0x1b01a57b,
+    0x8208f4c1,
+    0xf50fc457,
+    0x65b0d9c6,
+    0x12b7e950,
+    0x8bbeb8ea,
+    0xfcb9887c,
+    0x62dd1ddf,
+    0x15da2d49,
+    0x8cd37cf3,
+    0xfbd44c65,
+    0x4db26158,
+    0x3ab551ce,
+    0xa3bc0074,
+    0xd4bb30e2,
+    0x4adfa541,
+    0x3dd895d7,
+    0xa4d1c46d,
+    0xd3d6f4fb,
+    0x4369e96a,
+    0x346ed9fc,
+    0xad678846,
+    0xda60b8d0,
+    0x44042d73,
+    0x33031de5,
+    0xaa0a4c5f,
+    0xdd0d7cc9,
+    0x5005713c,
+    0x270241aa,
+    0xbe0b1010,
+    0xc90c2086,
+    0x5768b525,
+    0x206f85b3,
+    0xb966d409,
+    0xce61e49f,
+    0x5edef90e,
+    0x29d9c998,
+    0xb0d09822,
+    0xc7d7a8b4,
+    0x59b33d17,
+    0x2eb40d81,
+    0xb7bd5c3b,
+    0xc0ba6cad,
+    0xedb88320,
+    0x9abfb3b6,
+    0x03b6e20c,
+    0x74b1d29a,
+    0xead54739,
+    0x9dd277af,
+    0x04db2615,
+    0x73dc1683,
+    0xe3630b12,
+    0x94643b84,
+    0x0d6d6a3e,
+    0x7a6a5aa8,
+    0xe40ecf0b,
+    0x9309ff9d,
+    0x0a00ae27,
+    0x7d079eb1,
+    0xf00f9344,
+    0x8708a3d2,
+    0x1e01f268,
+    0x6906c2fe,
+    0xf762575d,
+    0x806567cb,
+    0x196c3671,
+    0x6e6b06e7,
+    0xfed41b76,
+    0x89d32be0,
+    0x10da7a5a,
+    0x67dd4acc,
+    0xf9b9df6f,
+    0x8ebeeff9,
+    0x17b7be43,
+    0x60b08ed5,
+    0xd6d6a3e8,
+    0xa1d1937e,
+    0x38d8c2c4,
+    0x4fdff252,
+    0xd1bb67f1,
+    0xa6bc5767,
+    0x3fb506dd,
+    0x48b2364b,
+    0xd80d2bda,
+    0xaf0a1b4c,
+    0x36034af6,
+    0x41047a60,
+    0xdf60efc3,
+    0xa867df55,
+    0x316e8eef,
+    0x4669be79,
+    0xcb61b38c,
+    0xbc66831a,
+    0x256fd2a0,
+    0x5268e236,
+    0xcc0c7795,
+    0xbb0b4703,
+    0x220216b9,
+    0x5505262f,
+    0xc5ba3bbe,
+    0xb2bd0b28,
+    0x2bb45a92,
+    0x5cb36a04,
+    0xc2d7ffa7,
+    0xb5d0cf31,
+    0x2cd99e8b,
+    0x5bdeae1d,
+    0x9b64c2b0,
+    0xec63f226,
+    0x756aa39c,
+    0x026d930a,
+    0x9c0906a9,
+    0xeb0e363f,
+    0x72076785,
+    0x05005713,
+    0x95bf4a82,
+    0xe2b87a14,
+    0x7bb12bae,
+    0x0cb61b38,
+    0x92d28e9b,
+    0xe5d5be0d,
+    0x7cdcefb7,
+    0x0bdbdf21,
+    0x86d3d2d4,
+    0xf1d4e242,
+    0x68ddb3f8,
+    0x1fda836e,
+    0x81be16cd,
+    0xf6b9265b,
+    0x6fb077e1,
+    0x18b74777,
+    0x88085ae6,
+    0xff0f6a70,
+    0x66063bca,
+    0x11010b5c,
+    0x8f659eff,
+    0xf862ae69,
+    0x616bffd3,
+    0x166ccf45,
+    0xa00ae278,
+    0xd70dd2ee,
+    0x4e048354,
+    0x3903b3c2,
+    0xa7672661,
+    0xd06016f7,
+    0x4969474d,
+    0x3e6e77db,
+    0xaed16a4a,
+    0xd9d65adc,
+    0x40df0b66,
+    0x37d83bf0,
+    0xa9bcae53,
+    0xdebb9ec5,
+    0x47b2cf7f,
+    0x30b5ffe9,
+    0xbdbdf21c,
+    0xcabac28a,
+    0x53b39330,
+    0x24b4a3a6,
+    0xbad03605,
+    0xcdd70693,
+    0x54de5729,
+    0x23d967bf,
+    0xb3667a2e,
+    0xc4614ab8,
+    0x5d681b02,
+    0x2a6f2b94,
+    0xb40bbe37,
+    0xc30c8ea1,
+    0x5a05df1b,
+    0x2d02ef8d
+};
+
+constexpr uint32_t crc32(std::string_view str) {
+    uint32_t crc = 0xffffffff;
+    for (auto c : str)
+        crc = (crc >> 8) ^ crc_table[(crc ^ c) & 0xff];
+    return crc ^ 0xffffffff;
+}
+
+constexpr uint32_t crc32(char const *str, const size_t size) {
+    uint32_t crc = 0xffffffff;
+    for (size_t i = 0; i < size; ++i)
+        crc = (crc >> 8) ^ crc_table[(crc ^ str[i]) & 0xff];
+    return crc ^ 0xffffffff;
+}
+
+constexpr std::array<uint32_t, 35> BAD_WORDS_HASH = {
+    crc32("recovery"),
+    crc32("techie"),
+    crc32("http"),
+    crc32("https"),
+    crc32("digital"),
+    crc32("hack"),
+    crc32("::"),
+    crc32("//"),
+    crc32("com"),
+    crc32("@"),
+    crc32("crypto"),
+    crc32("bitcoin"),
+    crc32("wallet"),
+    crc32("hacker"),
+    crc32("welcome"),
+    crc32("whatsapp"),
+    crc32("email"),
+    crc32("cryptocurrency"),
+    crc32("stolen"),
+    crc32("freeze"),
+    crc32("quick"),
+    crc32("crucial"),
+    crc32("tracing"),
+    crc32("scammers"),
+    crc32("expers"),
+    crc32("hire"),
+    crc32("century"),
+    crc32("transaction"),
+    crc32("essential"),
+    crc32("managing"),
+    crc32("contact"),
+    crc32("contacting"),
+    crc32("understanding"),
+    crc32("assets"),
+    crc32("funds")
+};
+const std::unordered_set BAD_WORDS_STR(BAD_WORDS.begin(), BAD_WORDS.end());
+
+constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
+                                                          [](std::size_t current, const std::string_view &word) {
+                                                              return std::min(current, word.size());
+                                                          }
+    );
+constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
+                                                         [](std::size_t current, const std::string_view &word) {
+                                                             return std::max(current, word.size());
+                                                         }
+    );
+
+int totalWordCount = 0;
+int totalCapitalizedCount = 0;
+int totalSentenceCount = 0;
+int totalNumberCount = 0;
+int totalForbiddenCount = 0;
+int fileCount = 1;
+
+int failCount = 0;
+
+
+int done = 0;
+
+
+struct info {
+    std::string_view name;
+    aiocb *cb;
+    const std::chrono::time_point<std::chrono::steady_clock> start = std::chrono::steady_clock::now();
+};
+
+constexpr void check_word_simple(const char *word, const ssize_t size) {
+    if (size < SHORTEST_BAD_WORD || size > LONGEST_BAD_WORD) {
+        return;
+    }
+    // if (BAD_WORDS_SET.contains(word)) {
+    //     totalForbiddenCount++;
+    // }
+
+    const auto hs = crc32(word, size);
+
+    for (int i = 0; i < BAD_WORDS_HASH.size(); ++i) {
+        if (BAD_WORDS_HASH[i] == hs) {
+            totalForbiddenCount++;
+            return;
+        }
+    }
+}
+
+void read_str(char *str, ssize_t size) {
+    int mark = -1;
+
+    int fileWords = 0;
+    for (int pos = 0; pos <= size; ++pos) {
+        char *c = str + pos;
+
+        if (*c == '.') {
+            totalSentenceCount++;
+        }
+
+        if (*c == ' ' || *c == '\n' || *c == '\r' || *c == '\t') {
+            if (mark != -1) {
+                check_word_simple(str + mark, pos - mark);
+                mark = -1;
+            }
+        } else if (mark == -1) {
+            ++fileWords;
+            if (*c >= 'A' && *c <= 'Z') {
+                totalCapitalizedCount++;
+            }
+
+            mark = pos;
+        } else if (*c >= '0' && *c <= '9') {
+            totalNumberCount++;
+            for (; pos <= size; ++pos) {
+                c = str + pos;
+                if (*c == '.') {
+                    totalSentenceCount++;
+                }
+                if (*c == ' ' || *c == '\n' || *c == '\r' || *c == '\t') {
+                    break;
+                }
+            }
+            mark = -1;
+        }
+
+    }
+
+    if (mark != -1) {
+        check_word_simple(str + mark, size - mark);
+    }
+
+    totalWordCount += fileWords;
+}
+
+
+void aio_completion_handler(sigval_t sigval) {
+
+        fileCount++;
+        info *data = (info *)sigval.sival_ptr;
+        auto req = data->cb;
+        // auto req = (struct aiocb *)sigval.sival_ptr;
+        /* Did the request complete? */
+        auto error = aio_error(req);
+        if (error == 0) {
+
+            /* Request completed successfully, get the return status */
+            // const auto start{std::chrono::steady_clock::now()};
+            // const std::chrono::duration<double> start_seconds{start - (data->start)};
+            // std::println("File started {} in {}", data->name, start_seconds.count());
+            read_str((char *)req->aio_buf, aio_return(req));
+            // const auto finish{std::chrono::steady_clock::now()};
+            // const std::chrono::duration<double> elapsed_seconds{finish - (data->start)};
+            // std::println("File read {} in {}", data->name, elapsed_seconds.count());
+        } else {
+            std::println("Error at aio_error ({}): ", error);
+            failCount++;
+        }
+        --done;
+
+}
+
+int main(const int argc, char *argv[]) {
+    if (argc < 2) {
+        std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
+        return 1;
+    }
+
+    done = argc - 1;
+
+    // lio_listio
+    auto aiocb_list = (struct aiocb *)malloc(sizeof(struct aiocb) * (argc - 1));
+    auto aiocb_list_ptr = (struct aiocb **)malloc(sizeof(struct aiocb *) * (argc - 1));
+
+    // char *memchnk = (char *)malloc(5 * 1024 * 1024 * (argc - 1));
+
+    for (std::size_t i = 0; i < argc - 1; i++) {
+        aiocb_list[i].aio_fildes = open(argv[i + 1], O_RDONLY);
+        aiocb_list[i].aio_offset = 0;
+        // 5mb
+        aiocb_list[i].aio_buf = malloc(5 * 1024 * 1024);
+        aiocb_list[i].aio_nbytes = (5 * 1024 * 1024);;
+
+        aiocb_list[i].aio_sigevent.sigev_notify = SIGEV_THREAD;
+        aiocb_list[i].aio_sigevent.sigev_notify_function = aio_completion_handler;
+        aiocb_list[i].aio_sigevent.sigev_notify_attributes = nullptr;
+        // aiocb_list[i].aio_sigevent.sigev_value.sival_ptr = &aiocb_list[i];
+        aiocb_list[i].aio_sigevent.sigev_value.sival_ptr = new info{
+            argv[i + 1],
+            &aiocb_list[i]};
+
+        // aiocb_list[i].aio_reqprio = SIGRTMIN;
+
+        aiocb_list_ptr[i] = &aiocb_list[i];
+    }
+
+    lio_listio(LIO_WAIT, aiocb_list_ptr, argc - 1, nullptr);
+
+    while (done > 0) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+
+    std::println("Done reading files, {} done", done);
+
+    double capitalizedPercentage = (totalWordCount > 0)
+                                       ? static_cast<double>(totalCapitalizedCount) / totalWordCount * 100.0
+                                       : 0;
+    double forbiddenPercentage = (totalWordCount > 0)
+                                     ? static_cast<double>(totalForbiddenCount) / totalWordCount * 100.0
+                                     : 0;
+    double wordCountPerSentence = (totalSentenceCount > 0)
+                                      ? static_cast<double>(totalWordCount) / totalSentenceCount
+                                      : 0;
+
+    std::println(
+        "Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}",
+        totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount,
+        capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
+        );
+
+    for (std::size_t i = 0; i < argc - 1; i++) {
+        close(aiocb_list[i].aio_fildes);
+        free((void *)aiocb_list[i].aio_buf);
+    }
+
+    free(aiocb_list);
+    free(aiocb_list_ptr);
+    // free(memchnk);
+
+    if (failCount > 0) {
+        return 1;
+    }
+}
--- a/jest_rust/.gitignore
+++ b/jest_rust/.gitignore
@ -0,0 +1,2 @@
+/target
+/Cargo.lock
--- a/jest_rust/Cargo.toml
+++ b/jest_rust/Cargo.toml
@ -0,0 +1,14 @@
+[package]
+name = "jisspam"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+fxhash = "0.2.1"
+tokio = { version = "1.44.1", features = ["full"] }
+
+[profile.release]
+codegen-units = 1 # less means more compile work but better optimized
+lto = "thin"      # thin has best performance. fat the worst
+strip = true
+panic = "abort"
--- a/jest_rust/README.md
+++ b/jest_rust/README.md
@ -0,0 +1,195 @@
+for https://retoor.molodetz.nl/retoor/isspam
+
+https://snek.molodetz.nl/terminal.html ubuntu running thing instructions:
+```
+mkdir /project
+cd /project
+git clone https://retoor.molodetz.nl/retoor/isspam.git
+apt install valgrind curl
+export RUSTUP_HOME=/project/.rustup
+export CARGO_HOME=/project/.cargo
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+. "/project/.cargo/env"
+cd isspam
+rustup install nightly
+rustup default nightly
+make
+make benchmark
+python3 bench.py
+```
+
+clone: `git clone https://gitlab.com/jestdotty-group/draft/jisspam.git jest_rust`
+
+edit make: `vi makefile` and add build:
+```
+build_jest:
+	@echo "compiling jest_rust project"
+	cd jest_rust && cargo build --release && cp target/release/jisspam ..
+```
+append to all script:
+```
+all: build run valgrind build_risspam run_risspam build_cpp build_borded_cpp build_py build_jest
+```
+
+add to bench: `vi bench.py`
+```py
+time_start = time.time()
+subprocess.check_output('./jisspam books/*.txt', shell=True)
+print("Time Jest Rust:", time.time() - time_start)
+```
+
+run: `python3 bench.py`
+output looks something like this:
+```
+***benchmarking***
+Time C: 31.315868377685547
+Time Rust: 41.232205867767334
+Time CPP: 20.1683189868927
+Time Borded CPP: 15.468477964401245
+Time Jest Rust: 54.74523115158081
+Time Retoor Python: 287.63036131858826
+***end benchmark***
+```
+
+add `/jisspam` to `.gitignore` to not commit the executable accidentally
+
+# local machine benchmarks
+
+single threaded:
+```
+***benchmarking***
+Time C: 2.4082751274108887
+Time Rust: 2.865687847137451
+Time CPP: 1.1568822860717773
+Time Borded CPP: 1.9657189846038818
+Time Jest Rust: 33.63373279571533
+Time Retoor Python: 133.92413425445557
+***end benchmark***
+```
+
+rayon:
+```
+***benchmarking***
+Time C: 2.457853317260742
+Time Rust: 3.0170154571533203
+Time CPP: 1.1482579708099365
+Time Borded CPP: 2.002591371536255
+Time Jest Rust: 4.294418811798096
+Time Retoor Python: 201.2997748851776
+***end benchmark***
+```
+
+tokio:
+```
+***benchmarking***
+Time C: 2.448648452758789
+Time Rust: 3.095592737197876
+Time CPP: 1.1662013530731201
+Time Borded CPP: 1.9207634925842285
+Time Jest Rust: 4.717588901519775
+Time Retoor Python: 139.8203284740448
+***end benchmark***
+```
+## compile options benchmarks
+lto not thin: `Time Jest Rust: 5.306957483291626` slower
+
+lto fat: `Time Jest Rust: 5.413678407669067` slower
+
+codegen-units 1: `Time Jest Rust: 4.451631546020508` faster
+
+opt-level z: `Time Jest Rust: 7.045313119888306` slower
+
+strip true: `Time Jest Rust: 4.337219476699829` faster
+
+lto true: `Time Jest Rust: 4.703521728515625` slower
+
+lto none: `Time Jest Rust: 4.817203998565674`
+
+lto thin: `Time Jest Rust: 4.429729223251343` faster
+
+# data integrity
+(this isn't tested, just guessed, and I don't have data to compare it with)
+for loops:
+```
+file count: 904
+failed file count: 0
+sentence count: 5602301
+word count: 81701260
+capitalized count: 1753639
+numeric count: 14981248
+forbidden count: 1237059
+words per sentence average: 14.6
+forbidden word percentage: 2%
+capitalized word percentage: 2%
+
+benchmark: 5033ms
+```
+
+muncher:
+```
+file count: 904
+failed file count: 0
+sentence count: 5338705
+word count: 86765116
+capitalized count: 13640820
+numeric count: 10902254
+forbidden count: 0
+words per sentence average: 16.3
+forbidden word percentage: 0%
+capitalized word percentage: 16%
+
+benchmark: 504ms
+```
+with forbidden words:
+```
+file count: 904
+failed file count: 0
+sentence count: 5338705
+word count: 86765116
+capitalized count: 13640820
+numeric count: 10902254
+forbidden count: 279717
+words per sentence average: 16.3
+forbidden word percentage: 0%
+capitalized word percentage: 16%
+
+benchmark: 6078ms
+```
+
+# forbidden words benchmarks
+seems they take up about 4000ms to churn through in the original version
+
+for loops count forbidden word once only:
+```
+file count: 904
+failed file count: 0
+sentence count: 5602301
+word count: 81701260
+capitalized count: 1753639
+numeric count: 14981248
+forbidden count: 1143234
+words per sentence average: 14.6
+forbidden word percentage: 1%
+capitalized word percentage: 2%
+
+benchmark: 4737ms
+```
+for loops with trie:
+```
+file count: 904
+failed file count: 0
+sentence count: 5602301
+word count: 81701260
+capitalized count: 1753639
+numeric count: 14981248
+forbidden count: 176528
+words per sentence average: 14.6
+forbidden word percentage: 0%
+capitalized word percentage: 2%
+
+benchmark: 1588ms
+```
+
+muncher with trie is 2600ms
+
+for loops with fxhash trie: 1200ms
--- a/jest_rust/src/main.rs
+++ b/jest_rust/src/main.rs
@ -0,0 +1,264 @@
+mod stats;
+mod trie;
+
+use stats::Stats;
+use std::{env, fs, sync::LazyLock};
+use tokio::sync::mpsc;
+use trie::Trie;
+
+static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
+	let mut trie = Trie::default();
+	for word in [
+		"recovery",
+		"techie",
+		"http",
+		"https",
+		"digital",
+		"hack",
+		"::",
+		"//",
+		"@",
+		"com",
+		"crypto",
+		"bitcoin",
+		"wallet",
+		"hacker",
+		"welcome",
+		"whatsapp",
+		"email",
+		"cryptocurrency",
+		"stolen",
+		"freeze",
+		"quick",
+		"crucial",
+		"tracing",
+		"scammers",
+		"expers",
+		"hire",
+		"century",
+		"transaction",
+		"essential",
+		"managing",
+		"contact",
+		"contacting",
+		"understanding",
+		"assets",
+		"funds",
+	] {
+		trie.insert(word);
+	}
+	trie
+});
+
+impl Stats {
+	pub fn process(&mut self, text: &str) {
+		// self.muncher(&text);
+		self.for_loops(&text);
+	}
+	#[allow(dead_code)]
+	/// probably buggy. for example, are new lines sentences? what if the text has no last period?
+	/// 500ms is without forbidden words check, but...
+	/// 6000ms if adding forbidden words.. so not faster
+	/// with trie this is 2600ms
+	fn muncher(&mut self, text: &str) {
+		let mut capitalized = true;
+		let mut whitespaced = false;
+		let mut dotted = false;
+		let mut word = String::new();
+		for char in text.chars() {
+			if whitespaced {
+				if !char.is_whitespace() {
+					whitespaced = false; //end whiteness
+				}
+				continue;
+			} else if char.is_whitespace() {
+				whitespaced = true;
+				self.word_count += 1; //end of word
+				if capitalized {
+					self.capitalized_count += 1;
+				} else {
+					//reset capitalized word
+					capitalized = true;
+				}
+				let lowercase_word = word.to_lowercase();
+				if FORBIDDEN_WORDS.contains(&lowercase_word) {
+					self.forbidden_count += 1;
+				}
+				word = String::new();
+				continue;
+			}
+			if dotted {
+				if char != '.' {
+					dotted = false; //end sentencing
+				}
+				continue;
+			} else if char == '.' {
+				dotted = true;
+				self.sentence_count += 1;
+				self.word_count += 1; //end of word
+				if capitalized {
+					self.capitalized_count += 1;
+				} else {
+					//reset capitalized word
+					capitalized = true;
+				}
+				let lowercase_word = word.to_lowercase();
+				if FORBIDDEN_WORDS.contains(&lowercase_word) {
+					self.forbidden_count += 1;
+				}
+				word = String::new();
+				continue;
+			}
+			word += &char.to_string();
+			if char.is_numeric() {
+				self.numeric_count += 1;
+				capitalized = false;
+			}
+			if !char.is_ascii_uppercase() {
+				capitalized = false;
+			}
+		}
+	}
+	#[allow(dead_code)]
+	/// typically 5000ms
+	/// with trie this is 1600ms
+	fn for_loops(&mut self, text: &str) {
+		for sentence in text
+			.split('.')
+			.map(|s| s.trim())
+			.filter(|s| !s.is_empty())
+		{
+			self.sentence_count += 1;
+			for word in sentence
+				.split_whitespace()
+				.map(|s| s.trim())
+				.filter(|s| !s.is_empty())
+			{
+				self.word_count += 1;
+				//get all numbers counted
+				let mut all_capitalized = true;
+				for char in word.chars() {
+					if char.is_numeric() {
+						self.numeric_count += 1;
+						//TODO are numbers capitalized or not? I don't know!
+					}
+					if !char.is_ascii_uppercase() {
+						all_capitalized = false;
+					}
+				}
+				if all_capitalized {
+					self.capitalized_count += 1;
+				}
+				let lowercase_word = word.to_lowercase();
+				if FORBIDDEN_WORDS.contains(&lowercase_word) {
+					self.forbidden_count += 1;
+				}
+			}
+		}
+	}
+}
+
+#[tokio::main]
+async fn main() {
+	let files = env::args().skip(1);
+	let mut stats = Stats::default();
+	let mut rx = {
+		let (tx, rx) = mpsc::unbounded_channel();
+		for file in files {
+			//reading files not sequentially average shaves 30ms (of 1250ms), and that's on a NVMe SSD so why not
+			let Ok(text) = fs::read_to_string(&file) else {
+				stats.failed_file_count += 1;
+				continue;
+			};
+			stats.file_count += 1;
+			let tx = tx.clone();
+			tokio::spawn(async move {
+				let mut stats = Stats::default();
+				stats.process(&text);
+				tx.send(stats).unwrap();
+			});
+		}
+		rx
+	};
+	while let Some(file_stat) = rx.recv().await {
+		stats += file_stat;
+	}
+	println!("{stats}");
+}
+
+#[test]
+fn test() {
+	use std::{env, fs, process::Command, time::Instant};
+	println!("cwd: {}", env::current_dir().unwrap().display());
+
+	//compile
+	let mut compile = Command::new("cargo");
+	let compile_arged = compile.arg("build").arg("--release");
+	match compile_arged.output() {
+		Ok(output) => println!("compiled {}", String::from_utf8_lossy(&output.stdout)),
+		Err(err) => eprintln!("compile failed: {err}"),
+	}
+
+	//get test files
+	let files = fs::read_dir("test_files")
+		.unwrap()
+		.map(|f| {
+			f.unwrap()
+				.path()
+				.canonicalize()
+				.unwrap()
+				.to_str()
+				.unwrap()
+				.to_string()
+		})
+		.collect::<Vec<_>>();
+	println!("test files found: {:#?}", files);
+
+	//benchmark run
+	let benchmark = Instant::now();
+	let mut run = Command::new("target/release/jisspam");
+	let run_arged = run.args(files);
+	match run_arged.output() {
+		Ok(output) => println!("{}", String::from_utf8_lossy(&output.stdout)),
+		Err(err) => eprintln!("run failed: {err}"),
+	}
+	println!("benchmark: {}ms", benchmark.elapsed().as_millis());
+}
+#[test]
+fn books_test() {
+	use std::{env, fs, process::Command, time::Instant};
+	println!("cwd: {}", env::current_dir().unwrap().display());
+
+	//compile
+	let mut compile = Command::new("cargo");
+	let compile_arged = compile.arg("build").arg("--release");
+	match compile_arged.output() {
+		Ok(output) => println!("compiled {}", String::from_utf8_lossy(&output.stdout)),
+		Err(err) => eprintln!("compile failed: {err}"),
+	}
+
+	//get test files
+	let files = fs::read_dir("../books")
+		.unwrap()
+		.map(|f| {
+			f.unwrap()
+				.path()
+				.canonicalize()
+				.unwrap()
+				.to_str()
+				.unwrap()
+				.to_string()
+		})
+		.collect::<Vec<_>>();
+	println!("test files found: {:#?}", files);
+
+	//benchmark run
+	let benchmark = Instant::now();
+	let mut run = Command::new("target/release/jisspam");
+	let run_arged = run.args(files);
+	match run_arged.output() {
+		Ok(output) => println!("{}", String::from_utf8_lossy(&output.stdout)),
+		Err(err) => eprintln!("run failed: {err}"),
+	}
+	println!("benchmark: {}ms", benchmark.elapsed().as_millis());
+}
--- a/jest_rust/src/stats.rs
+++ b/jest_rust/src/stats.rs
@ -0,0 +1,58 @@
+use std::{fmt::Display, ops::AddAssign};
+
+#[derive(Debug, Default)]
+pub struct Stats {
+	pub file_count: u32,
+	pub failed_file_count: u32,
+
+	pub sentence_count: u32,
+	pub word_count: u32,
+
+	pub capitalized_count: u32,
+	pub numeric_count: u32,
+	pub forbidden_count: u32,
+}
+
+impl AddAssign for Stats {
+	fn add_assign(&mut self, rhs: Self) {
+		self.file_count += rhs.file_count;
+		self.failed_file_count += rhs.failed_file_count;
+
+		self.sentence_count += rhs.sentence_count;
+		self.word_count += rhs.word_count;
+
+		self.capitalized_count += rhs.capitalized_count;
+		self.numeric_count += rhs.numeric_count;
+		self.forbidden_count += rhs.forbidden_count;
+	}
+}
+impl Display for Stats {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		writeln!(f, "file count: {}", self.file_count)?;
+		writeln!(f, "failed file count: {}", self.failed_file_count)?;
+
+		writeln!(f, "sentence count: {}", self.sentence_count)?;
+		writeln!(f, "word count: {}", self.word_count)?;
+
+		writeln!(f, "capitalized count: {}", self.capitalized_count)?;
+		writeln!(f, "numeric count: {}", self.numeric_count)?;
+		writeln!(f, "forbidden count: {}", self.forbidden_count)?;
+
+		let word_count = self.word_count as f32;
+		writeln!(
+			f,
+			"words per sentence average: {:.1}",
+			word_count / self.sentence_count as f32
+		)?;
+		writeln!(
+			f,
+			"forbidden word percentage: {:.0}%",
+			(self.forbidden_count as f32 / word_count) * 100.0,
+		)?;
+		write!(
+			f,
+			"capitalized word percentage: {:.0}%",
+			(self.capitalized_count as f32 / word_count) * 100.0,
+		)
+	}
+}
--- a/jest_rust/src/trie.rs
+++ b/jest_rust/src/trie.rs
@ -0,0 +1,33 @@
+use fxhash::FxBuildHasher;
+use std::collections::HashMap;
+
+type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; //simpler, slightly faster
+
+#[derive(Default, Debug, Clone)]
+struct Node {
+	end: bool,
+	children: FxHashMap<char, Node>,
+}
+#[derive(Default, Debug, Clone)]
+pub struct Trie {
+	root: Node,
+}
+impl Trie {
+	pub fn insert(&mut self, word: &str) {
+		let mut node = &mut self.root;
+		for char in word.chars() {
+			node = node.children.entry(char).or_default();
+		}
+		node.end = true;
+	}
+	pub fn contains(&self, word: &str) -> bool {
+		let mut current_node = &self.root;
+		for char in word.chars() {
+			match current_node.children.get(&char) {
+				Some(node) => current_node = node,
+				None => return false,
+			}
+		}
+		current_node.end
+	}
+}
--- a/jest_rust/test_files/not_spam.txt
+++ b/jest_rust/test_files/not_spam.txt
@ -0,0 +1 @@
+I am Nastya and downvoted this post because post is considered spam. Your message will be removed from this community site due too much downvotes. See my profile for more information. Read my source code mentioned on my profile to see what you did wrong. Should be no problem for a developer.
--- a/jest_rust/test_files/spam1.txt
+++ b/jest_rust/test_files/spam1.txt
@ -0,0 +1,20 @@
+HIRE Century Web Recovery TO RECOVER YOUR LOST BITCOIN
+
+	If you’ve lost your Bitcoin to an online scam, hiring a professional recovery service can significantly improve your chances of getting your funds back. Century Web Recovery specializes in Bitcoin recovery, helping victims reclaim their stolen assets. Here’s what you need to know:
+
+Understanding the Recovery Process
+The recovery process begins with contacting Century Web Recovery. Their team will guide you through the steps necessary to initiate an investigation into your case. Understanding the process is key to managing your expectations.
+
+Documenting Your Case
+	To facilitate recovery, it’s essential to document all relevant information regarding the scam. This includes transaction records, wallet addresses, and any communications with the scammer. Century Web Recovery will help you gather this information to build a strong case.
+
+Investigation and Tracking
+Once you hire Century Web Recovery, their experts will begin investigating your case. They use sophisticated tools to track the stolen Bitcoin, identifying the paths taken by the scammers. This tracing is crucial for successful recovery.
+
+Freezing Stolen Assets
+Quick action is vital in recovering stolen Bitcoin.Century Web Recovery works directly with cryptocurrency exchanges to freeze any stolen assets, preventing the scammers from cashing out your funds. This collaboration is essential for a successful recovery.
+
+Legal Support and Guidance
+If necessary, Century Web Recovery can provide legal support. They will guide you on reporting the scam to law enforcement and assist in filing any legal claims. Their expertise in crypto-related cases ensures you receive the best advice on how to proceed.
+
+	If you’ve lost Bitcoin to an online scam, don’t hesitate. Hire Century Web Recovery to recover your lost assets and regain your financial security.
--- a/jest_rust/test_files/spam2.txt
+++ b/jest_rust/test_files/spam2.txt
@ -0,0 +1,3 @@
+Email; digital hack recovery @ techie . com
+WhatsApp +19152151930
+Website; https : // digital hack recovery . com
--- a/jest_rust/test_files/spam3.txt
+++ b/jest_rust/test_files/spam3.txt
@ -0,0 +1,7 @@
+	TESTED CRYPTOCURRENCY RECOVERY SERVICE \\ DIGITAL HACK RECOVERY
+
+	When the devastating reality of lost or stolen Bitcoin strikes, the path to recovery can seem bleak and hopeless. However, the story of DIGITAL HACK RECOVERY stands as a shining beacon of hope, demonstrating the power of perseverance in the face of seemingly insurmountable odds. This specialized service, founded by a team of tenacious experts, has made it their mission to reunite people with their rightful digital assets, no matter how complex or convoluted the situation may be. Through their unwavering dedication and meticulous investigative techniques, DIGITAL HACK RECOVERY has time and again succeeded in tracking down lost Bitcoin, navigating the labyrinthine world of blockchain technology and leveraging their deep understanding of crypto ecosystems. Their success stories are a testament to the resilience of the human spirit, as they've helped individuals regain access to life-changing sums of money that had been presumed lost forever. In an industry rife with uncertainty and risk, DIGITAL HACK RECOVERY has emerged as a trusted ally, guiding clients through the darkness with a steadfast commitment to recovery. By combining cutting-edge digital forensics, strategic partnerships, and a relentless determination to leave no stone unturned, this remarkable organization has earned the gratitude of countless individuals who had resigned themselves to the permanent disappearance of their hard-earned Bitcoin. In a world where the digital landscape can feel overwhelming and unpredictable, DIGITAL HACK RECOVERY stands as a shining example of what can be achieved through perseverance, expertise, and an unwavering belief in the possibility of redemption. I tried everything I could think of. I contacted support forums, tried password recovery tools, scoured Reddit for advice, and spent countless hours following step-by-step guides. Every lead I followed seemed to end in disappointment. I felt like I was chasing an illusion—getting closer, but never quite reaching it. With every attempt that failed, my hope dwindled further. It was an overwhelming feeling, knowing that I had lost something irreplaceable, something I had worked so hard for, and worse—something I had no way of recovering. Months passed, and I was ready to give up. I had accepted that my Bitcoin was gone, lost forever. But that feeling of helplessness lingered, gnawing at me in the back of my mind but DIGITAL HACK RECOVERY made the change of my life when I got the news of the recovery. Thank you very much. Contact them via contact details bellow⁚
+
+Email; digital hack recovery @ techie . com
+WhatsApp +19152151930
+Website; https : // digital hack recovery . com
--- a/retoor_c/isspam.py
+++ b/retoor_c/isspam.py
@ -0,0 +1,85 @@
+import os
+import sys
+import threading
+from concurrent.futures import ThreadPoolExecutor
+
+MAX_TEXT_LENGTH = 1024
+FORBIDDEN_WORDS_COUNT = 40
+
+forbidden_words = set([
+    "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
+    "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
+    "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
+    "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
+])
+
+class AnalysisResult:
+    def __init__(self, filename):
+        self.filename = filename
+        self.total_word_count = 0
+        self.total_capitalized_count = 0
+        self.total_sentence_count = 0
+        self.total_number_count = 0
+        self.total_forbidden_count = 0
+
+def is_forbidden(word):
+    return word in forbidden_words
+
+def read_file(filename):
+    if not os.path.exists(filename):
+        print(f"File doesn't exist: {filename}")
+        return None
+
+    with open(filename, 'r') as file:
+        return file.read()
+
+def analyze_file(result):
+    text = read_file(result.filename)
+    if text:
+        result.total_sentence_count = text.count('.')
+        tokens = text.split()
+
+        result.total_word_count = len(tokens)
+        result.total_capitalized_count = sum(1 for token in tokens if token[0].isupper())
+        result.total_number_count = sum(1 for token in tokens if any(char.isdigit() for char in token))
+        result.total_forbidden_count = sum(1 for token in tokens if is_forbidden(token))
+
+def main():
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>")
+        return
+
+    results = []
+
+    with ThreadPoolExecutor() as executor:
+        futures = []
+        for filename in sys.argv[1:]:
+            result = AnalysisResult(filename)
+            results.append(result)
+            futures.append(executor.submit(analyze_file, result))
+
+        for future in futures:
+            future.result()
+
+    total_word_count = sum(result.total_word_count for result in results)
+    total_capitalized_count = sum(result.total_capitalized_count for result in results)
+    total_sentence_count = sum(result.total_sentence_count for result in results)
+    total_number_count = sum(result.total_number_count for result in results)
+    total_forbidden_count = sum(result.total_forbidden_count for result in results)
+
+    capitalized_percentage = (total_word_count > 0) * (total_capitalized_count / total_word_count * 100.0)
+    forbidden_percentage = (total_word_count > 0) * (total_forbidden_count / total_word_count * 100.0)
+    word_count_per_sentence = (total_sentence_count > 0) * (total_word_count / total_sentence_count)
+
+    print(f"\nTotal Words: {total_word_count}")
+    print(f"Total Capitalized words: {total_capitalized_count}")
+    print(f"Total Sentences: {total_sentence_count}")
+    print(f"Total Numbers: {total_number_count}")
+    print(f"Total Forbidden words: {total_forbidden_count}")
+    print(f"Capitalized percentage: {capitalized_percentage:.6f}%")
+    print(f"Forbidden percentage: {forbidden_percentage:.6f}%")
+    print(f"Word count per sentence: {word_count_per_sentence:.6f}")
+    print(f"Total files read: {len(sys.argv) - 1}")
+
+if __name__ == "__main__":
+    main()