Compare commits

...

24 Commits

Author SHA1 Message Date
JestDotty
4415dd26ae sequential file read slightly faster
Some checks failed
isspam build / build (push) Failing after 2m24s
2025-03-24 16:03:36 -04:00
JestDotty
f9115a9e40 clean up README
Some checks failed
isspam build / build (push) Failing after 2m7s
2025-03-24 00:48:04 -04:00
JestDotty
b711d5a908 fxhash and I remembered how to static lazy right
Some checks failed
isspam build / build (push) Failing after 2m33s
2025-03-24 00:20:15 -04:00
JestDotty
5b8dd08348 do or do not, there is always a trie organization
Some checks failed
isspam build / build (push) Failing after 2m23s
2025-03-23 23:58:34 -04:00
JestDotty
12f2494411 if a word has forbidden only count it once. uses for loops again whoops
Some checks failed
isspam build / build (push) Failing after 2m3s
2025-03-23 23:23:40 -04:00
JestDotty
854a1c3991 muncher benchmark and data
Some checks failed
isspam build / build (push) Failing after 2m23s
2025-03-23 23:14:30 -04:00
JestDotty
ab32a81d9c don't track
Some checks failed
isspam build / build (push) Failing after 2m5s
2025-03-23 22:31:14 -04:00
JestDotty
019970b2aa optimization benchmarks. switched to tokio
Some checks failed
isspam build / build (push) Has been cancelled
2025-03-23 22:29:54 -04:00
JestDotty
5eded442b7 sync. rayon and build options, README
Some checks failed
isspam build / build (push) Failing after 2m2s
2025-03-23 21:39:12 -04:00
e4354a7645 updates.
Some checks failed
isspam build / build (push) Failing after 2m12s
2025-03-24 02:31:31 +01:00
Jest Dotty
a41f819cdf flatten jest_rust
Some checks failed
isspam build / build (push) Failing after 2m8s
2025-03-23 20:23:38 -04:00
Jest Dotty
c2fae46865 jest_rust, build, bench
Some checks failed
isspam build / build (push) Failing after 2m31s
2025-03-23 20:03:09 -04:00
BordedDev
c1a97c01f5
Removed TBB from default makefile
Some checks failed
isspam build / build (push) Failing after 2m2s
2025-03-23 23:37:31 +01:00
BordedDev
4f873d02ef
Fixed horrible mistake ;P
Some checks failed
isspam build / build (push) Failing after 2m39s
2025-03-23 22:40:43 +01:00
BordedDev
75092cb738
Updated file io perf
Some checks failed
isspam build / build (push) Failing after 2m35s
2025-03-23 22:06:48 +01:00
BordedDev
a08c181872
Tweaked some variables, add main2.cpp which removes the struct
Some checks failed
isspam build / build (push) Failing after 2m7s
2025-03-23 03:36:29 +01:00
fe0357b5d5 Added py
Some checks failed
isspam build / build (push) Failing after 2m9s
2025-03-23 03:25:51 +01:00
BordedDev
611ef8d124
Fixed issues with utf8
Some checks failed
isspam build / build (push) Failing after 1m58s
2025-03-23 02:13:38 +01:00
2f2710df41 Added python version.
Some checks failed
isspam build / build (push) Failing after 2m27s
2025-03-22 23:55:48 +01:00
BordedDev
fbb1db28d7
Updated Dockerfile to use gcc
Some checks failed
isspam build / build (push) Failing after 1m58s
Added borded cpp to bench
Made borded part of make all
2025-03-20 23:32:24 +01:00
BordedDev
b24b1b933c
Added borded cpp to make file
Some checks failed
isspam build / build (push) Failing after 2m5s
2025-03-20 23:21:33 +01:00
77926649ba Docker setup.
Some checks failed
isspam build / build (push) Failing after 2m0s
2025-03-20 22:52:53 +01:00
ce4997317a Retoorii fixes.
Some checks failed
isspam build / build (push) Failing after 2m4s
2025-03-20 22:23:41 +01:00
BordedDev
1fb6481f2b
Setup v1 of borded spam parser
Some checks failed
isspam build / build (push) Failing after 2m20s
2025-03-20 21:44:22 +01:00
22 changed files with 1837 additions and 1 deletions

3
.gitignore vendored
View File

@ -5,7 +5,10 @@ publish
books
__pycache__
target
./isspam.py
isspam
risspam
/jisspam
isspam_cpp
.build-trigger-2014-12-02 15:26
borded_cpp_exec

View File

@ -1,20 +1,32 @@
CC = gcc
CFLAGS = -Ofast
all: build run valgrind build_risspam run_risspam build_cpp
all: build run valgrind build_risspam run_risspam build_cpp build_borded_cpp build_py build_jest
build:
@echo "Compiling retoor_c project.".
@$(CC) $(CFLAGS) retoor_c/isspam.c -o isspam
build_py:
@echo "Copying py file"
@cp retoor_c/isspam.py isspam.py
build_cpp:
@echo "Compiling C++ version of isspam."
@g++ -Ofast retoor_c/isspam.cpp -o isspam_cpp
build_borded_cpp:
@echo "Compiling Borded C++ version of isspam."
@g++ -std=c++23 -Ofast borded_cpp/src/main3.cpp -o borded_cpp_exec
build_risspam:
@echo "Compiling 12bitfloat_risspam project."
cd 12bitfloat_rust/risspam && cargo run --release && cp target/release/risspam ../../
build_jest:
@echo "compiling jest_rust project"
cd jest_rust && cargo build --release && cp target/release/jisspam ..
run: run_spam wl run_not_spam
run_risspam: run_spam_risspam run_not_spam_risspam

View File

@ -11,4 +11,13 @@ print("Time Rust:",time.time() - time_start)
time_start = time.time()
subprocess.check_output('./isspam_cpp books/*.txt', shell=True)
print("Time CPP:",time.time() - time_start)
time_start = time.time()
subprocess.check_output('./borded_cpp_exec books/*.txt', shell=True)
print("Time Borded CPP:",time.time() - time_start)
time_start = time.time()
subprocess.check_output('./jisspam books/*.txt', shell=True)
print("Time Jest Rust:", time.time() - time_start)
time_start = time.time()
subprocess.check_output('python3 isspam.py books/*.txt', shell=True)
print("Time Retoor Python:",time.time() - time_start)
print("***end benchmark***")

97
borded_cpp/.gitignore vendored Normal file
View File

@ -0,0 +1,97 @@
*.d
*.slo
*.lo
*.o
*.obj
*.gch
*.pch
*.so
*.dylib
*.dll
*.mod
*.smod
*.lai
*.la
*.a
*.lib
*.exe
*.out
*.app
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
.idea/**/aws.xml
.idea/**/contentModel.xml
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
.idea/**/gradle.xml
.idea/**/libraries
.idea
cmake-build-*/
.idea/**/mongoSettings.xml
*.iws
out/
.idea_modules/
atlassian-ide-plugin.xml
.idea/replstate.xml
.idea/sonarlint/
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
.idea/httpRequests
.idea/caches/build_file_checksums.ser
*~
.fuse_hidden*
.directory
.Trash-*
.nfs*
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
_deps
CMakeUserPresets.json
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
*.stackdump
[Dd]esktop.ini
$RECYCLE.BIN/
*.cab
*.msi
*.msix
*.msm
*.msp
*.lnk
.DS_Store
.AppleDouble
.LSOverride
Icon
._*
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

27
borded_cpp/CMakeLists.txt Normal file
View File

@ -0,0 +1,27 @@
cmake_minimum_required(VERSION 3.25)
project(isspam)
set(CMAKE_CXX_STANDARD 26)
if (MSVC)
add_compile_options(/W4)
add_compile_options(/WX)
add_compile_options(/external:anglebrackets)
add_compile_options(/external:W0)
add_compile_options(/wd4100)
add_compile_options(/wd5050)
add_definitions(-DWIN32_LEAN_AND_MEAN -DVC_EXTRALEAN)
add_compile_definitions(WIN32_LEAN_AND_MEAN NOMINMAX)
else ()
add_compile_options(-Wall)
add_compile_options(-Wextra)
add_compile_options(-Wpedantic)
# add_compile_options(-Werror)
endif ()
add_executable(${PROJECT_NAME} src/main.cpp)
add_executable(${PROJECT_NAME}3 src/main3.cpp)
if (LINUX)
target_link_libraries(${PROJECT_NAME} tbb)
target_link_libraries(${PROJECT_NAME}3 tbb)
endif ()

3
borded_cpp/Dockerfile Normal file
View File

@ -0,0 +1,3 @@
FROM gcc:latest
RUN apt update && apt install -y cmake gdb
WORKDIR /home

9
borded_cpp/compose.yml Normal file
View File

@ -0,0 +1,9 @@
services:
cpp:
build: .
command: ["sh","doit.sh"]
tty: true
stdin_open: true
volumes:
- ./:/home
- ../books:/books

2
borded_cpp/doit.sh Executable file
View File

@ -0,0 +1,2 @@
rm -rf build | true
mkdir build && cd build && cmake .. && make

221
borded_cpp/src/main.cpp Normal file
View File

@ -0,0 +1,221 @@
#include <string>
#include <string_view>
#include <fstream>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <execution>
#include <format>
#include <codecvt>
#include <ranges>
#ifdef __cpp_lib_print
#include <print>
#else
namespace std {
template <typename T, typename... Args>
inline void print(T format, Args &&... args) {
auto f = std::vformat(format, std::make_format_args(args...));
std::cout << f;
}
template <typename T, typename... Args>
inline void println(T format, Args &&... args) {
auto f = std::vformat(format, std::make_format_args(args...));
std::cout << f << std::endl;
}
}
#endif
constexpr std::array<std::wstring_view, 35> BAD_WORDS = {
L"recovery",
L"techie",
L"http",
L"https",
L"digital",
L"hack",
L"::",
L"//",
L"com",
L"@",
L"crypto",
L"bitcoin",
L"wallet",
L"hacker",
L"welcome",
L"whatsapp",
L"email",
L"cryptocurrency",
L"stolen",
L"freeze",
L"quick",
L"crucial",
L"tracing",
L"scammers",
L"expers",
L"hire",
L"century",
L"transaction",
L"essential",
L"managing",
L"contact",
L"contacting",
L"understanding",
L"assets",
L"funds",
};
constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
[](std::size_t current, const std::wstring_view &word) {
return std::min(current, word.size());
}
);
constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
[](std::size_t current, const std::wstring_view &word) {
return std::max(current, word.size());
}
);
struct AnalysisResult {
std::size_t totalWordCount = 0;
std::size_t totalCapitalizedCount = 0;
std::size_t totalSentenceCount = 0;
std::size_t totalNumberCount = 0;
std::size_t totalForbiddenCount = 0;
std::size_t fileCount = 1;
std::size_t failCount = 0;
operator std::string() const {
return std::format(
"Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}",
totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount
);
}
friend AnalysisResult operator+(const AnalysisResult &lhs, const AnalysisResult &rhs) {
return {
lhs.totalWordCount + rhs.totalWordCount,
lhs.totalCapitalizedCount + rhs.totalCapitalizedCount,
lhs.totalSentenceCount + rhs.totalSentenceCount,
lhs.totalNumberCount + rhs.totalNumberCount,
lhs.totalForbiddenCount + rhs.totalForbiddenCount,
lhs.fileCount + rhs.fileCount,
lhs.failCount + rhs.failCount
};
};
};
void check_word(std::wstring &word, std::size_t &forbiddenCount) {
if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) {
return;
}
std::ranges::transform(word, word.begin(), ::towlower);
if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) {
forbiddenCount++;
}
// if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) {
// return word.contains(badWord);
// }
// ) != BAD_WORDS.end()) {
// forbiddenCount++;
// }
}
AnalysisResult parseFile(const std::string_view &filename) {
std::wifstream file;
// surpress warning of deprecation
#pragma warning(push)
#pragma warning(suppress : 4996)
file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>));
#pragma warning(pop)
file.open(std::string(filename));
if (!file.is_open()) {
std::println("File doesn't exist: {}", filename);
return { };
}
AnalysisResult result{ };
bool inWord = false;
bool isDigit = false;
wchar_t c;
std::wstring word;
while (file.get(c)) {
if (c == '.') {
result.totalSentenceCount++;
}
if (std::isspace(c)) {
inWord = false;
isDigit = false;
if (!word.empty()) {
check_word(word, result.totalForbiddenCount);
word.clear();
}
continue;
} else {
if (!inWord) {
result.totalWordCount++;
if (std::isupper(c)) {
result.totalCapitalizedCount++;
}
}
inWord = true;
if (std::isdigit(c) && !isDigit) {
result.totalNumberCount++;
isDigit = true;
}
word.push_back(c);
}
};
// std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl;
if (!word.empty()) {
check_word(word, result.totalForbiddenCount);
}
file.close();
if (file.fail() && !file.eof()) {
result.failCount++;
}
return result;
}
int main(const int argc, char *argv[]) {
if (argc < 2) {
std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
return 1;
}
const AnalysisResult result = std::transform_reduce(std::execution::par_unseq, std::next(argv), argv + argc,
AnalysisResult{.fileCount = 0},
std::plus{ },
parseFile
);
double capitalizedPercentage = (result.totalWordCount > 0)
? static_cast<double>(result.totalCapitalizedCount) / result.totalWordCount * 100.0
: 0;
double forbiddenPercentage = (result.totalWordCount > 0)
? static_cast<double>(result.totalForbiddenCount) / result.totalWordCount * 100.0
: 0;
double wordCountPerSentence = (result.totalSentenceCount > 0)
? static_cast<double>(result.totalWordCount) / result.totalSentenceCount
: 0;
std::println("{}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}", std::string(result),
capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
);
return 0;
}

195
borded_cpp/src/main2.cpp Normal file
View File

@ -0,0 +1,195 @@
#include <string>
#include <string_view>
#include <fstream>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <execution>
#include <format>
#include <codecvt>
#include <ranges>
#ifdef __cpp_lib_print
#include <print>
#else
namespace std {
template <typename T, typename... Args>
inline void print(T format, Args &&... args) {
auto f = std::vformat(format, std::make_format_args(args...));
std::cout << f;
}
template <typename T, typename... Args>
inline void println(T format, Args &&... args) {
auto f = std::vformat(format, std::make_format_args(args...));
std::cout << f << std::endl;
}
}
#endif
constexpr std::array<std::wstring_view, 35> BAD_WORDS = {
L"recovery",
L"techie",
L"http",
L"https",
L"digital",
L"hack",
L"::",
L"//",
L"com",
L"@",
L"crypto",
L"bitcoin",
L"wallet",
L"hacker",
L"welcome",
L"whatsapp",
L"email",
L"cryptocurrency",
L"stolen",
L"freeze",
L"quick",
L"crucial",
L"tracing",
L"scammers",
L"expers",
L"hire",
L"century",
L"transaction",
L"essential",
L"managing",
L"contact",
L"contacting",
L"understanding",
L"assets",
L"funds",
};
constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
[](std::size_t current, const std::wstring_view &word) {
return std::min(current, word.size());
}
);
constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
[](std::size_t current, const std::wstring_view &word) {
return std::max(current, word.size());
}
);
std::size_t totalWordCount = 0;
std::size_t totalCapitalizedCount = 0;
std::size_t totalSentenceCount = 0;
std::size_t totalNumberCount = 0;
std::size_t totalForbiddenCount = 0;
std::size_t fileCount = 1;
std::size_t failCount = 0;
void check_word(std::wstring &word, std::size_t &forbiddenCount) {
if (word.size() < SHORTEST_BAD_WORD || word.size() > LONGEST_BAD_WORD) {
return;
}
std::ranges::transform(word, word.begin(), ::towlower);
if (std::ranges::find(BAD_WORDS, word) != BAD_WORDS.end()) {
forbiddenCount++;
}
// if (std::ranges::find_if(BAD_WORDS, [&word](const std::wstring_view &badWord) {
// return word.contains(badWord);
// }
// ) != BAD_WORDS.end()) {
// forbiddenCount++;
// }
}
void parseFile(const std::string_view &filename) {
std::wifstream file;
// surpress warning of deprecation
#pragma warning(push)
#pragma warning(suppress : 4996)
file.imbue(std::locale(std::locale(), new std::codecvt_utf8<wchar_t>));
#pragma warning(pop)
file.open(std::string(filename));
if (!file.is_open()) {
std::println("File doesn't exist: {}", filename);
return;
}
bool inWord = false;
bool isDigit = false;
wchar_t c;
std::wstring word;
while (file.get(c)) {
if (c == '.') {
totalSentenceCount++;
}
if (std::isspace(c)) {
inWord = false;
isDigit = false;
if (!word.empty()) {
check_word(word, totalForbiddenCount);
word.clear();
}
continue;
} else {
if (!inWord) {
totalWordCount++;
if (std::isupper(c)) {
totalCapitalizedCount++;
}
}
inWord = true;
if (std::isdigit(c) && !isDigit) {
totalNumberCount++;
isDigit = true;
}
word.push_back(c);
}
};
// std::cout << "File state: " << file.rdstate() << " EOF" << file.eof() << " Fail" << file.fail() << " Bad" << file.bad() << std::endl;
if (!word.empty()) {
check_word(word, totalForbiddenCount);
}
file.close();
if (file.fail() && !file.eof()) {
failCount++;
}
}
int main(const int argc, char *argv[]) {
if (argc < 2) {
std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
return 1;
}
std::for_each(std::execution::par_unseq, std::next(argv), argv + argc, parseFile);
double capitalizedPercentage = (totalWordCount > 0)
? static_cast<double>(totalCapitalizedCount) / totalWordCount * 100.0
: 0;
double forbiddenPercentage = (totalWordCount > 0)
? static_cast<double>(totalForbiddenCount) / totalWordCount * 100.0
: 0;
double wordCountPerSentence = (totalSentenceCount > 0)
? static_cast<double>(totalWordCount) / totalSentenceCount
: 0;
std::println(
"Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}",
totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount,
capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
);
return 0;
}

576
borded_cpp/src/main3.cpp Normal file
View File

@ -0,0 +1,576 @@
#include <string>
#include <string_view>
#include <fstream>
#include <algorithm>
#include <iostream>
#include <execution>
#include <format>
#include <cstdio>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <aio.h>
#include <condition_variable>
#include <unordered_set>
#include <sys/signal.h>
#ifdef __cpp_lib_print
#include <print>
#else
namespace std {
template <typename T, typename... Args>
inline void print(T format, Args &&... args) {
auto f = std::vformat(format, std::make_format_args(args...));
std::cout << f;
}
template <typename T, typename... Args>
inline void println(T format, Args &&... args) {
auto f = std::vformat(format, std::make_format_args(args...));
std::cout << f << std::endl;
}
}
#endif
constexpr std::array<std::string_view, 35> BAD_WORDS = {
"recovery",
"techie",
"http",
"https",
"digital",
"hack",
"::",
"//",
"com",
"@",
"crypto",
"bitcoin",
"wallet",
"hacker",
"welcome",
"whatsapp",
"email",
"cryptocurrency",
"stolen",
"freeze",
"quick",
"crucial",
"tracing",
"scammers",
"expers",
"hire",
"century",
"transaction",
"essential",
"managing",
"contact",
"contacting",
"understanding",
"assets",
"funds",
};
const std::unordered_set<std::string_view> BAD_WORDS_SET(BAD_WORDS.begin(), BAD_WORDS.end());
static constexpr unsigned int crc_table[256] = {
0x00000000,
0x77073096,
0xee0e612c,
0x990951ba,
0x076dc419,
0x706af48f,
0xe963a535,
0x9e6495a3,
0x0edb8832,
0x79dcb8a4,
0xe0d5e91e,
0x97d2d988,
0x09b64c2b,
0x7eb17cbd,
0xe7b82d07,
0x90bf1d91,
0x1db71064,
0x6ab020f2,
0xf3b97148,
0x84be41de,
0x1adad47d,
0x6ddde4eb,
0xf4d4b551,
0x83d385c7,
0x136c9856,
0x646ba8c0,
0xfd62f97a,
0x8a65c9ec,
0x14015c4f,
0x63066cd9,
0xfa0f3d63,
0x8d080df5,
0x3b6e20c8,
0x4c69105e,
0xd56041e4,
0xa2677172,
0x3c03e4d1,
0x4b04d447,
0xd20d85fd,
0xa50ab56b,
0x35b5a8fa,
0x42b2986c,
0xdbbbc9d6,
0xacbcf940,
0x32d86ce3,
0x45df5c75,
0xdcd60dcf,
0xabd13d59,
0x26d930ac,
0x51de003a,
0xc8d75180,
0xbfd06116,
0x21b4f4b5,
0x56b3c423,
0xcfba9599,
0xb8bda50f,
0x2802b89e,
0x5f058808,
0xc60cd9b2,
0xb10be924,
0x2f6f7c87,
0x58684c11,
0xc1611dab,
0xb6662d3d,
0x76dc4190,
0x01db7106,
0x98d220bc,
0xefd5102a,
0x71b18589,
0x06b6b51f,
0x9fbfe4a5,
0xe8b8d433,
0x7807c9a2,
0x0f00f934,
0x9609a88e,
0xe10e9818,
0x7f6a0dbb,
0x086d3d2d,
0x91646c97,
0xe6635c01,
0x6b6b51f4,
0x1c6c6162,
0x856530d8,
0xf262004e,
0x6c0695ed,
0x1b01a57b,
0x8208f4c1,
0xf50fc457,
0x65b0d9c6,
0x12b7e950,
0x8bbeb8ea,
0xfcb9887c,
0x62dd1ddf,
0x15da2d49,
0x8cd37cf3,
0xfbd44c65,
0x4db26158,
0x3ab551ce,
0xa3bc0074,
0xd4bb30e2,
0x4adfa541,
0x3dd895d7,
0xa4d1c46d,
0xd3d6f4fb,
0x4369e96a,
0x346ed9fc,
0xad678846,
0xda60b8d0,
0x44042d73,
0x33031de5,
0xaa0a4c5f,
0xdd0d7cc9,
0x5005713c,
0x270241aa,
0xbe0b1010,
0xc90c2086,
0x5768b525,
0x206f85b3,
0xb966d409,
0xce61e49f,
0x5edef90e,
0x29d9c998,
0xb0d09822,
0xc7d7a8b4,
0x59b33d17,
0x2eb40d81,
0xb7bd5c3b,
0xc0ba6cad,
0xedb88320,
0x9abfb3b6,
0x03b6e20c,
0x74b1d29a,
0xead54739,
0x9dd277af,
0x04db2615,
0x73dc1683,
0xe3630b12,
0x94643b84,
0x0d6d6a3e,
0x7a6a5aa8,
0xe40ecf0b,
0x9309ff9d,
0x0a00ae27,
0x7d079eb1,
0xf00f9344,
0x8708a3d2,
0x1e01f268,
0x6906c2fe,
0xf762575d,
0x806567cb,
0x196c3671,
0x6e6b06e7,
0xfed41b76,
0x89d32be0,
0x10da7a5a,
0x67dd4acc,
0xf9b9df6f,
0x8ebeeff9,
0x17b7be43,
0x60b08ed5,
0xd6d6a3e8,
0xa1d1937e,
0x38d8c2c4,
0x4fdff252,
0xd1bb67f1,
0xa6bc5767,
0x3fb506dd,
0x48b2364b,
0xd80d2bda,
0xaf0a1b4c,
0x36034af6,
0x41047a60,
0xdf60efc3,
0xa867df55,
0x316e8eef,
0x4669be79,
0xcb61b38c,
0xbc66831a,
0x256fd2a0,
0x5268e236,
0xcc0c7795,
0xbb0b4703,
0x220216b9,
0x5505262f,
0xc5ba3bbe,
0xb2bd0b28,
0x2bb45a92,
0x5cb36a04,
0xc2d7ffa7,
0xb5d0cf31,
0x2cd99e8b,
0x5bdeae1d,
0x9b64c2b0,
0xec63f226,
0x756aa39c,
0x026d930a,
0x9c0906a9,
0xeb0e363f,
0x72076785,
0x05005713,
0x95bf4a82,
0xe2b87a14,
0x7bb12bae,
0x0cb61b38,
0x92d28e9b,
0xe5d5be0d,
0x7cdcefb7,
0x0bdbdf21,
0x86d3d2d4,
0xf1d4e242,
0x68ddb3f8,
0x1fda836e,
0x81be16cd,
0xf6b9265b,
0x6fb077e1,
0x18b74777,
0x88085ae6,
0xff0f6a70,
0x66063bca,
0x11010b5c,
0x8f659eff,
0xf862ae69,
0x616bffd3,
0x166ccf45,
0xa00ae278,
0xd70dd2ee,
0x4e048354,
0x3903b3c2,
0xa7672661,
0xd06016f7,
0x4969474d,
0x3e6e77db,
0xaed16a4a,
0xd9d65adc,
0x40df0b66,
0x37d83bf0,
0xa9bcae53,
0xdebb9ec5,
0x47b2cf7f,
0x30b5ffe9,
0xbdbdf21c,
0xcabac28a,
0x53b39330,
0x24b4a3a6,
0xbad03605,
0xcdd70693,
0x54de5729,
0x23d967bf,
0xb3667a2e,
0xc4614ab8,
0x5d681b02,
0x2a6f2b94,
0xb40bbe37,
0xc30c8ea1,
0x5a05df1b,
0x2d02ef8d
};
constexpr uint32_t crc32(std::string_view str) {
uint32_t crc = 0xffffffff;
for (auto c : str)
crc = (crc >> 8) ^ crc_table[(crc ^ c) & 0xff];
return crc ^ 0xffffffff;
}
constexpr uint32_t crc32(char const *str, const size_t size) {
uint32_t crc = 0xffffffff;
for (size_t i = 0; i < size; ++i)
crc = (crc >> 8) ^ crc_table[(crc ^ str[i]) & 0xff];
return crc ^ 0xffffffff;
}
constexpr std::array<uint32_t, 35> BAD_WORDS_HASH = {
crc32("recovery"),
crc32("techie"),
crc32("http"),
crc32("https"),
crc32("digital"),
crc32("hack"),
crc32("::"),
crc32("//"),
crc32("com"),
crc32("@"),
crc32("crypto"),
crc32("bitcoin"),
crc32("wallet"),
crc32("hacker"),
crc32("welcome"),
crc32("whatsapp"),
crc32("email"),
crc32("cryptocurrency"),
crc32("stolen"),
crc32("freeze"),
crc32("quick"),
crc32("crucial"),
crc32("tracing"),
crc32("scammers"),
crc32("expers"),
crc32("hire"),
crc32("century"),
crc32("transaction"),
crc32("essential"),
crc32("managing"),
crc32("contact"),
crc32("contacting"),
crc32("understanding"),
crc32("assets"),
crc32("funds")
};
const std::unordered_set BAD_WORDS_STR(BAD_WORDS.begin(), BAD_WORDS.end());
constexpr auto SHORTEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::max(),
[](std::size_t current, const std::string_view &word) {
return std::min(current, word.size());
}
);
constexpr auto LONGEST_BAD_WORD = std::ranges::fold_left(BAD_WORDS, std::numeric_limits<std::size_t>::min(),
[](std::size_t current, const std::string_view &word) {
return std::max(current, word.size());
}
);
int totalWordCount = 0;
int totalCapitalizedCount = 0;
int totalSentenceCount = 0;
int totalNumberCount = 0;
int totalForbiddenCount = 0;
int fileCount = 1;
int failCount = 0;
int done = 0;
struct info {
std::string_view name;
aiocb *cb;
const std::chrono::time_point<std::chrono::steady_clock> start = std::chrono::steady_clock::now();
};
constexpr void check_word_simple(const char *word, const ssize_t size) {
if (size < SHORTEST_BAD_WORD || size > LONGEST_BAD_WORD) {
return;
}
// if (BAD_WORDS_SET.contains(word)) {
// totalForbiddenCount++;
// }
const auto hs = crc32(word, size);
for (int i = 0; i < BAD_WORDS_HASH.size(); ++i) {
if (BAD_WORDS_HASH[i] == hs) {
totalForbiddenCount++;
return;
}
}
}
void read_str(char *str, ssize_t size) {
int mark = -1;
int fileWords = 0;
for (int pos = 0; pos <= size; ++pos) {
char *c = str + pos;
if (*c == '.') {
totalSentenceCount++;
}
if (*c == ' ' || *c == '\n' || *c == '\r' || *c == '\t') {
if (mark != -1) {
check_word_simple(str + mark, pos - mark);
mark = -1;
}
} else if (mark == -1) {
++fileWords;
if (*c >= 'A' && *c <= 'Z') {
totalCapitalizedCount++;
}
mark = pos;
} else if (*c >= '0' && *c <= '9') {
totalNumberCount++;
for (; pos <= size; ++pos) {
c = str + pos;
if (*c == '.') {
totalSentenceCount++;
}
if (*c == ' ' || *c == '\n' || *c == '\r' || *c == '\t') {
break;
}
}
mark = -1;
}
}
if (mark != -1) {
check_word_simple(str + mark, size - mark);
}
totalWordCount += fileWords;
}
void aio_completion_handler(sigval_t sigval) {
fileCount++;
info *data = (info *)sigval.sival_ptr;
auto req = data->cb;
// auto req = (struct aiocb *)sigval.sival_ptr;
/* Did the request complete? */
auto error = aio_error(req);
if (error == 0) {
/* Request completed successfully, get the return status */
// const auto start{std::chrono::steady_clock::now()};
// const std::chrono::duration<double> start_seconds{start - (data->start)};
// std::println("File started {} in {}", data->name, start_seconds.count());
read_str((char *)req->aio_buf, aio_return(req));
// const auto finish{std::chrono::steady_clock::now()};
// const std::chrono::duration<double> elapsed_seconds{finish - (data->start)};
// std::println("File read {} in {}", data->name, elapsed_seconds.count());
} else {
std::println("Error at aio_error ({}): ", error);
failCount++;
}
--done;
}
int main(const int argc, char *argv[]) {
if (argc < 2) {
std::println("Usage: {} <file1> <file2> ... <fileN>", argv[0]);
return 1;
}
done = argc - 1;
// lio_listio
auto aiocb_list = (struct aiocb *)malloc(sizeof(struct aiocb) * (argc - 1));
auto aiocb_list_ptr = (struct aiocb **)malloc(sizeof(struct aiocb *) * (argc - 1));
// char *memchnk = (char *)malloc(5 * 1024 * 1024 * (argc - 1));
for (std::size_t i = 0; i < argc - 1; i++) {
aiocb_list[i].aio_fildes = open(argv[i + 1], O_RDONLY);
aiocb_list[i].aio_offset = 0;
// 5mb
aiocb_list[i].aio_buf = malloc(5 * 1024 * 1024);
aiocb_list[i].aio_nbytes = (5 * 1024 * 1024);;
aiocb_list[i].aio_sigevent.sigev_notify = SIGEV_THREAD;
aiocb_list[i].aio_sigevent.sigev_notify_function = aio_completion_handler;
aiocb_list[i].aio_sigevent.sigev_notify_attributes = nullptr;
// aiocb_list[i].aio_sigevent.sigev_value.sival_ptr = &aiocb_list[i];
aiocb_list[i].aio_sigevent.sigev_value.sival_ptr = new info{
argv[i + 1],
&aiocb_list[i]};
// aiocb_list[i].aio_reqprio = SIGRTMIN;
aiocb_list_ptr[i] = &aiocb_list[i];
}
lio_listio(LIO_WAIT, aiocb_list_ptr, argc - 1, nullptr);
while (done > 0) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
std::println("Done reading files, {} done", done);
double capitalizedPercentage = (totalWordCount > 0)
? static_cast<double>(totalCapitalizedCount) / totalWordCount * 100.0
: 0;
double forbiddenPercentage = (totalWordCount > 0)
? static_cast<double>(totalForbiddenCount) / totalWordCount * 100.0
: 0;
double wordCountPerSentence = (totalSentenceCount > 0)
? static_cast<double>(totalWordCount) / totalSentenceCount
: 0;
std::println(
"Word Count: {}\nCapitalized Count: {}\nSentence Count: {}\nNumber Count: {}\nForbidden Count: {}\nFile Count: {}\nFail Count: {}\nCapitalized Percentage: {}%\nForbidden Percentage: {}%\nWord Count Per Sentence: {}",
totalWordCount, totalCapitalizedCount, totalSentenceCount, totalNumberCount, totalForbiddenCount, fileCount, failCount,
capitalizedPercentage, forbiddenPercentage, wordCountPerSentence
);
for (std::size_t i = 0; i < argc - 1; i++) {
close(aiocb_list[i].aio_fildes);
free((void *)aiocb_list[i].aio_buf);
}
free(aiocb_list);
free(aiocb_list_ptr);
// free(memchnk);
if (failCount > 0) {
return 1;
}
}

2
jest_rust/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
/Cargo.lock

14
jest_rust/Cargo.toml Normal file
View File

@ -0,0 +1,14 @@
[package]
name = "jisspam"
version = "0.1.0"
edition = "2024"
[dependencies]
fxhash = "0.2.1"
tokio = { version = "1.44.1", features = ["full"] }
[profile.release]
codegen-units = 1 # less means more compile work but better optimized
lto = "thin" # thin has best performance. fat the worst
strip = true
panic = "abort"

195
jest_rust/README.md Normal file
View File

@ -0,0 +1,195 @@
for https://retoor.molodetz.nl/retoor/isspam
https://snek.molodetz.nl/terminal.html ubuntu running thing instructions:
```
mkdir /project
cd /project
git clone https://retoor.molodetz.nl/retoor/isspam.git
apt install valgrind curl
export RUSTUP_HOME=/project/.rustup
export CARGO_HOME=/project/.cargo
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
. "/project/.cargo/env"
cd isspam
rustup install nightly
rustup default nightly
make
make benchmark
python3 bench.py
```
clone: `git clone https://gitlab.com/jestdotty-group/draft/jisspam.git jest_rust`
edit make: `vi makefile` and add build:
```
build_jest:
@echo "compiling jest_rust project"
cd jest_rust && cargo build --release && cp target/release/jisspam ..
```
append to all script:
```
all: build run valgrind build_risspam run_risspam build_cpp build_borded_cpp build_py build_jest
```
add to bench: `vi bench.py`
```py
time_start = time.time()
subprocess.check_output('./jisspam books/*.txt', shell=True)
print("Time Jest Rust:", time.time() - time_start)
```
run: `python3 bench.py`
output looks something like this:
```
***benchmarking***
Time C: 31.315868377685547
Time Rust: 41.232205867767334
Time CPP: 20.1683189868927
Time Borded CPP: 15.468477964401245
Time Jest Rust: 54.74523115158081
Time Retoor Python: 287.63036131858826
***end benchmark***
```
add `/jisspam` to `.gitignore` to not commit the executable accidentally
# local machine benchmarks
single threaded:
```
***benchmarking***
Time C: 2.4082751274108887
Time Rust: 2.865687847137451
Time CPP: 1.1568822860717773
Time Borded CPP: 1.9657189846038818
Time Jest Rust: 33.63373279571533
Time Retoor Python: 133.92413425445557
***end benchmark***
```
rayon:
```
***benchmarking***
Time C: 2.457853317260742
Time Rust: 3.0170154571533203
Time CPP: 1.1482579708099365
Time Borded CPP: 2.002591371536255
Time Jest Rust: 4.294418811798096
Time Retoor Python: 201.2997748851776
***end benchmark***
```
tokio:
```
***benchmarking***
Time C: 2.448648452758789
Time Rust: 3.095592737197876
Time CPP: 1.1662013530731201
Time Borded CPP: 1.9207634925842285
Time Jest Rust: 4.717588901519775
Time Retoor Python: 139.8203284740448
***end benchmark***
```
## compile options benchmarks
lto not thin: `Time Jest Rust: 5.306957483291626` slower
lto fat: `Time Jest Rust: 5.413678407669067` slower
codegen-units 1: `Time Jest Rust: 4.451631546020508` faster
opt-level z: `Time Jest Rust: 7.045313119888306` slower
strip true: `Time Jest Rust: 4.337219476699829` faster
lto true: `Time Jest Rust: 4.703521728515625` slower
lto none: `Time Jest Rust: 4.817203998565674`
lto thin: `Time Jest Rust: 4.429729223251343` faster
# data integrity
(this isn't tested, just guessed, and I don't have data to compare it with)
for loops:
```
file count: 904
failed file count: 0
sentence count: 5602301
word count: 81701260
capitalized count: 1753639
numeric count: 14981248
forbidden count: 1237059
words per sentence average: 14.6
forbidden word percentage: 2%
capitalized word percentage: 2%
benchmark: 5033ms
```
muncher:
```
file count: 904
failed file count: 0
sentence count: 5338705
word count: 86765116
capitalized count: 13640820
numeric count: 10902254
forbidden count: 0
words per sentence average: 16.3
forbidden word percentage: 0%
capitalized word percentage: 16%
benchmark: 504ms
```
with forbidden words:
```
file count: 904
failed file count: 0
sentence count: 5338705
word count: 86765116
capitalized count: 13640820
numeric count: 10902254
forbidden count: 279717
words per sentence average: 16.3
forbidden word percentage: 0%
capitalized word percentage: 16%
benchmark: 6078ms
```
# forbidden words benchmarks
seems they take up about 4000ms to churn through in the original version
for loops count forbidden word once only:
```
file count: 904
failed file count: 0
sentence count: 5602301
word count: 81701260
capitalized count: 1753639
numeric count: 14981248
forbidden count: 1143234
words per sentence average: 14.6
forbidden word percentage: 1%
capitalized word percentage: 2%
benchmark: 4737ms
```
for loops with trie:
```
file count: 904
failed file count: 0
sentence count: 5602301
word count: 81701260
capitalized count: 1753639
numeric count: 14981248
forbidden count: 176528
words per sentence average: 14.6
forbidden word percentage: 0%
capitalized word percentage: 2%
benchmark: 1588ms
```
muncher with trie is 2600ms
for loops with fxhash trie: 1200ms

264
jest_rust/src/main.rs Normal file
View File

@ -0,0 +1,264 @@
mod stats;
mod trie;
use stats::Stats;
use std::{env, fs, sync::LazyLock};
use tokio::sync::mpsc;
use trie::Trie;
static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
let mut trie = Trie::default();
for word in [
"recovery",
"techie",
"http",
"https",
"digital",
"hack",
"::",
"//",
"@",
"com",
"crypto",
"bitcoin",
"wallet",
"hacker",
"welcome",
"whatsapp",
"email",
"cryptocurrency",
"stolen",
"freeze",
"quick",
"crucial",
"tracing",
"scammers",
"expers",
"hire",
"century",
"transaction",
"essential",
"managing",
"contact",
"contacting",
"understanding",
"assets",
"funds",
] {
trie.insert(word);
}
trie
});
impl Stats {
pub fn process(&mut self, text: &str) {
// self.muncher(&text);
self.for_loops(&text);
}
#[allow(dead_code)]
/// probably buggy. for example, are new lines sentences? what if the text has no last period?
/// 500ms is without forbidden words check, but...
/// 6000ms if adding forbidden words.. so not faster
/// with trie this is 2600ms
fn muncher(&mut self, text: &str) {
let mut capitalized = true;
let mut whitespaced = false;
let mut dotted = false;
let mut word = String::new();
for char in text.chars() {
if whitespaced {
if !char.is_whitespace() {
whitespaced = false; //end whiteness
}
continue;
} else if char.is_whitespace() {
whitespaced = true;
self.word_count += 1; //end of word
if capitalized {
self.capitalized_count += 1;
} else {
//reset capitalized word
capitalized = true;
}
let lowercase_word = word.to_lowercase();
if FORBIDDEN_WORDS.contains(&lowercase_word) {
self.forbidden_count += 1;
}
word = String::new();
continue;
}
if dotted {
if char != '.' {
dotted = false; //end sentencing
}
continue;
} else if char == '.' {
dotted = true;
self.sentence_count += 1;
self.word_count += 1; //end of word
if capitalized {
self.capitalized_count += 1;
} else {
//reset capitalized word
capitalized = true;
}
let lowercase_word = word.to_lowercase();
if FORBIDDEN_WORDS.contains(&lowercase_word) {
self.forbidden_count += 1;
}
word = String::new();
continue;
}
word += &char.to_string();
if char.is_numeric() {
self.numeric_count += 1;
capitalized = false;
}
if !char.is_ascii_uppercase() {
capitalized = false;
}
}
}
#[allow(dead_code)]
/// typically 5000ms
/// with trie this is 1600ms
fn for_loops(&mut self, text: &str) {
for sentence in text
.split('.')
.map(|s| s.trim())
.filter(|s| !s.is_empty())
{
self.sentence_count += 1;
for word in sentence
.split_whitespace()
.map(|s| s.trim())
.filter(|s| !s.is_empty())
{
self.word_count += 1;
//get all numbers counted
let mut all_capitalized = true;
for char in word.chars() {
if char.is_numeric() {
self.numeric_count += 1;
//TODO are numbers capitalized or not? I don't know!
}
if !char.is_ascii_uppercase() {
all_capitalized = false;
}
}
if all_capitalized {
self.capitalized_count += 1;
}
let lowercase_word = word.to_lowercase();
if FORBIDDEN_WORDS.contains(&lowercase_word) {
self.forbidden_count += 1;
}
}
}
}
}
#[tokio::main]
async fn main() {
let files = env::args().skip(1);
let mut stats = Stats::default();
let mut rx = {
let (tx, rx) = mpsc::unbounded_channel();
for file in files {
//reading files not sequentially average shaves 30ms (of 1250ms), and that's on a NVMe SSD so why not
let Ok(text) = fs::read_to_string(&file) else {
stats.failed_file_count += 1;
continue;
};
stats.file_count += 1;
let tx = tx.clone();
tokio::spawn(async move {
let mut stats = Stats::default();
stats.process(&text);
tx.send(stats).unwrap();
});
}
rx
};
while let Some(file_stat) = rx.recv().await {
stats += file_stat;
}
println!("{stats}");
}
#[test]
fn test() {
use std::{env, fs, process::Command, time::Instant};
println!("cwd: {}", env::current_dir().unwrap().display());
//compile
let mut compile = Command::new("cargo");
let compile_arged = compile.arg("build").arg("--release");
match compile_arged.output() {
Ok(output) => println!("compiled {}", String::from_utf8_lossy(&output.stdout)),
Err(err) => eprintln!("compile failed: {err}"),
}
//get test files
let files = fs::read_dir("test_files")
.unwrap()
.map(|f| {
f.unwrap()
.path()
.canonicalize()
.unwrap()
.to_str()
.unwrap()
.to_string()
})
.collect::<Vec<_>>();
println!("test files found: {:#?}", files);
//benchmark run
let benchmark = Instant::now();
let mut run = Command::new("target/release/jisspam");
let run_arged = run.args(files);
match run_arged.output() {
Ok(output) => println!("{}", String::from_utf8_lossy(&output.stdout)),
Err(err) => eprintln!("run failed: {err}"),
}
println!("benchmark: {}ms", benchmark.elapsed().as_millis());
}
#[test]
fn books_test() {
use std::{env, fs, process::Command, time::Instant};
println!("cwd: {}", env::current_dir().unwrap().display());
//compile
let mut compile = Command::new("cargo");
let compile_arged = compile.arg("build").arg("--release");
match compile_arged.output() {
Ok(output) => println!("compiled {}", String::from_utf8_lossy(&output.stdout)),
Err(err) => eprintln!("compile failed: {err}"),
}
//get test files
let files = fs::read_dir("../books")
.unwrap()
.map(|f| {
f.unwrap()
.path()
.canonicalize()
.unwrap()
.to_str()
.unwrap()
.to_string()
})
.collect::<Vec<_>>();
println!("test files found: {:#?}", files);
//benchmark run
let benchmark = Instant::now();
let mut run = Command::new("target/release/jisspam");
let run_arged = run.args(files);
match run_arged.output() {
Ok(output) => println!("{}", String::from_utf8_lossy(&output.stdout)),
Err(err) => eprintln!("run failed: {err}"),
}
println!("benchmark: {}ms", benchmark.elapsed().as_millis());
}

58
jest_rust/src/stats.rs Normal file
View File

@ -0,0 +1,58 @@
use std::{fmt::Display, ops::AddAssign};
#[derive(Debug, Default)]
pub struct Stats {
pub file_count: u32,
pub failed_file_count: u32,
pub sentence_count: u32,
pub word_count: u32,
pub capitalized_count: u32,
pub numeric_count: u32,
pub forbidden_count: u32,
}
impl AddAssign for Stats {
fn add_assign(&mut self, rhs: Self) {
self.file_count += rhs.file_count;
self.failed_file_count += rhs.failed_file_count;
self.sentence_count += rhs.sentence_count;
self.word_count += rhs.word_count;
self.capitalized_count += rhs.capitalized_count;
self.numeric_count += rhs.numeric_count;
self.forbidden_count += rhs.forbidden_count;
}
}
impl Display for Stats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "file count: {}", self.file_count)?;
writeln!(f, "failed file count: {}", self.failed_file_count)?;
writeln!(f, "sentence count: {}", self.sentence_count)?;
writeln!(f, "word count: {}", self.word_count)?;
writeln!(f, "capitalized count: {}", self.capitalized_count)?;
writeln!(f, "numeric count: {}", self.numeric_count)?;
writeln!(f, "forbidden count: {}", self.forbidden_count)?;
let word_count = self.word_count as f32;
writeln!(
f,
"words per sentence average: {:.1}",
word_count / self.sentence_count as f32
)?;
writeln!(
f,
"forbidden word percentage: {:.0}%",
(self.forbidden_count as f32 / word_count) * 100.0,
)?;
write!(
f,
"capitalized word percentage: {:.0}%",
(self.capitalized_count as f32 / word_count) * 100.0,
)
}
}

33
jest_rust/src/trie.rs Normal file
View File

@ -0,0 +1,33 @@
use fxhash::FxBuildHasher;
use std::collections::HashMap;
type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; //simpler, slightly faster
#[derive(Default, Debug, Clone)]
struct Node {
end: bool,
children: FxHashMap<char, Node>,
}
#[derive(Default, Debug, Clone)]
pub struct Trie {
root: Node,
}
impl Trie {
pub fn insert(&mut self, word: &str) {
let mut node = &mut self.root;
for char in word.chars() {
node = node.children.entry(char).or_default();
}
node.end = true;
}
pub fn contains(&self, word: &str) -> bool {
let mut current_node = &self.root;
for char in word.chars() {
match current_node.children.get(&char) {
Some(node) => current_node = node,
None => return false,
}
}
current_node.end
}
}

View File

@ -0,0 +1 @@
I am Nastya and downvoted this post because post is considered spam. Your message will be removed from this community site due too much downvotes. See my profile for more information. Read my source code mentioned on my profile to see what you did wrong. Should be no problem for a developer.

View File

@ -0,0 +1,20 @@
HIRE Century Web Recovery TO RECOVER YOUR LOST BITCOIN
If youve lost your Bitcoin to an online scam, hiring a professional recovery service can significantly improve your chances of getting your funds back. Century Web Recovery specializes in Bitcoin recovery, helping victims reclaim their stolen assets. Heres what you need to know:
Understanding the Recovery Process
The recovery process begins with contacting Century Web Recovery. Their team will guide you through the steps necessary to initiate an investigation into your case. Understanding the process is key to managing your expectations.
Documenting Your Case
To facilitate recovery, its essential to document all relevant information regarding the scam. This includes transaction records, wallet addresses, and any communications with the scammer. Century Web Recovery will help you gather this information to build a strong case.
Investigation and Tracking
Once you hire Century Web Recovery, their experts will begin investigating your case. They use sophisticated tools to track the stolen Bitcoin, identifying the paths taken by the scammers. This tracing is crucial for successful recovery.
Freezing Stolen Assets
Quick action is vital in recovering stolen Bitcoin.Century Web Recovery works directly with cryptocurrency exchanges to freeze any stolen assets, preventing the scammers from cashing out your funds. This collaboration is essential for a successful recovery.
Legal Support and Guidance
If necessary, Century Web Recovery can provide legal support. They will guide you on reporting the scam to law enforcement and assist in filing any legal claims. Their expertise in crypto-related cases ensures you receive the best advice on how to proceed.
If youve lost Bitcoin to an online scam, dont hesitate. Hire Century Web Recovery to recover your lost assets and regain your financial security.

View File

@ -0,0 +1,3 @@
Email; digital hack recovery @ techie . com
WhatsApp +19152151930
Website; https : // digital hack recovery . com

View File

@ -0,0 +1,7 @@
TESTED CRYPTOCURRENCY RECOVERY SERVICE \\ DIGITAL HACK RECOVERY
When the devastating reality of lost or stolen Bitcoin strikes, the path to recovery can seem bleak and hopeless. However, the story of DIGITAL HACK RECOVERY stands as a shining beacon of hope, demonstrating the power of perseverance in the face of seemingly insurmountable odds. This specialized service, founded by a team of tenacious experts, has made it their mission to reunite people with their rightful digital assets, no matter how complex or convoluted the situation may be. Through their unwavering dedication and meticulous investigative techniques, DIGITAL HACK RECOVERY has time and again succeeded in tracking down lost Bitcoin, navigating the labyrinthine world of blockchain technology and leveraging their deep understanding of crypto ecosystems. Their success stories are a testament to the resilience of the human spirit, as they've helped individuals regain access to life-changing sums of money that had been presumed lost forever. In an industry rife with uncertainty and risk, DIGITAL HACK RECOVERY has emerged as a trusted ally, guiding clients through the darkness with a steadfast commitment to recovery. By combining cutting-edge digital forensics, strategic partnerships, and a relentless determination to leave no stone unturned, this remarkable organization has earned the gratitude of countless individuals who had resigned themselves to the permanent disappearance of their hard-earned Bitcoin. In a world where the digital landscape can feel overwhelming and unpredictable, DIGITAL HACK RECOVERY stands as a shining example of what can be achieved through perseverance, expertise, and an unwavering belief in the possibility of redemption. I tried everything I could think of. I contacted support forums, tried password recovery tools, scoured Reddit for advice, and spent countless hours following step-by-step guides. Every lead I followed seemed to end in disappointment. I felt like I was chasing an illusion—getting closer, but never quite reaching it. With every attempt that failed, my hope dwindled further. It was an overwhelming feeling, knowing that I had lost something irreplaceable, something I had worked so hard for, and worse—something I had no way of recovering. Months passed, and I was ready to give up. I had accepted that my Bitcoin was gone, lost forever. But that feeling of helplessness lingered, gnawing at me in the back of my mind but DIGITAL HACK RECOVERY made the change of my life when I got the news of the recovery. Thank you very much. Contact them via contact details bellow
Email; digital hack recovery @ techie . com
WhatsApp +19152151930
Website; https : // digital hack recovery . com

85
retoor_c/isspam.py Normal file
View File

@ -0,0 +1,85 @@
import os
import sys
import threading
from concurrent.futures import ThreadPoolExecutor
MAX_TEXT_LENGTH = 1024
FORBIDDEN_WORDS_COUNT = 40
forbidden_words = set([
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
])
class AnalysisResult:
def __init__(self, filename):
self.filename = filename
self.total_word_count = 0
self.total_capitalized_count = 0
self.total_sentence_count = 0
self.total_number_count = 0
self.total_forbidden_count = 0
def is_forbidden(word):
return word in forbidden_words
def read_file(filename):
if not os.path.exists(filename):
print(f"File doesn't exist: {filename}")
return None
with open(filename, 'r') as file:
return file.read()
def analyze_file(result):
text = read_file(result.filename)
if text:
result.total_sentence_count = text.count('.')
tokens = text.split()
result.total_word_count = len(tokens)
result.total_capitalized_count = sum(1 for token in tokens if token[0].isupper())
result.total_number_count = sum(1 for token in tokens if any(char.isdigit() for char in token))
result.total_forbidden_count = sum(1 for token in tokens if is_forbidden(token))
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <file1> <file2> ... <fileN>")
return
results = []
with ThreadPoolExecutor() as executor:
futures = []
for filename in sys.argv[1:]:
result = AnalysisResult(filename)
results.append(result)
futures.append(executor.submit(analyze_file, result))
for future in futures:
future.result()
total_word_count = sum(result.total_word_count for result in results)
total_capitalized_count = sum(result.total_capitalized_count for result in results)
total_sentence_count = sum(result.total_sentence_count for result in results)
total_number_count = sum(result.total_number_count for result in results)
total_forbidden_count = sum(result.total_forbidden_count for result in results)
capitalized_percentage = (total_word_count > 0) * (total_capitalized_count / total_word_count * 100.0)
forbidden_percentage = (total_word_count > 0) * (total_forbidden_count / total_word_count * 100.0)
word_count_per_sentence = (total_sentence_count > 0) * (total_word_count / total_sentence_count)
print(f"\nTotal Words: {total_word_count}")
print(f"Total Capitalized words: {total_capitalized_count}")
print(f"Total Sentences: {total_sentence_count}")
print(f"Total Numbers: {total_number_count}")
print(f"Total Forbidden words: {total_forbidden_count}")
print(f"Capitalized percentage: {capitalized_percentage:.6f}%")
print(f"Forbidden percentage: {forbidden_percentage:.6f}%")
print(f"Word count per sentence: {word_count_per_sentence:.6f}")
print(f"Total files read: {len(sys.argv) - 1}")
if __name__ == "__main__":
main()