940 lines
28 KiB
Rust
Raw Normal View History

2025-10-06 13:39:29 +02:00
#![feature(likely_unlikely)]
#![feature(rust_cold_cc)]
2024-11-30 20:58:19 +01:00
2025-10-06 13:45:18 +02:00
#![allow(dead_code)]
//mod books;
2025-10-06 13:39:29 +02:00
use rayon::prelude::*;
2025-10-06 13:39:29 +02:00
use std::cell::RefCell;
2025-10-06 13:45:18 +02:00
use std::ffi::OsStr;
use std::fs::OpenOptions;
2025-10-06 13:39:29 +02:00
use std::io::Read;
use std::sync::Mutex;
use std::thread::available_parallelism;
use std::time::{Duration, Instant};
2025-10-06 13:45:18 +02:00
use std::{array, env, hint, process};
2024-11-30 20:58:19 +01:00
2025-10-06 13:39:29 +02:00
#[inline]
fn is_ascii_whitespace(b: u8) -> bool {
matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ')
2024-11-30 20:58:19 +01:00
}
2025-10-06 13:39:29 +02:00
#[inline]
fn is_ascii_upper(b: u8) -> bool {
matches!(b, b'A'..=b'Z')
2024-11-30 20:58:19 +01:00
}
2025-10-06 13:39:29 +02:00
#[inline]
fn is_ascii_digit(b: u8) -> bool {
matches!(b, b'0'..=b'9')
}
#[repr(align(128))]
#[derive(Copy, Clone)]
struct Stats {
pub sentences: u32,
pub words: u32,
pub capitalizeds: u32,
pub numbers: u32,
pub forbiddens: u32,
}
static TIME_SPENT_READING_FILES: Mutex<Duration> = Mutex::new(Duration::from_secs(0));
const TEMP_MEM_SIZE: usize = 6 * 1024 * 1024;
thread_local! {
static WORK_STATE: RefCell<WorkState> = RefCell::new(WorkState::new());
}
pub struct WorkState {
pub work_mem: Box<[u8]>,
// pub io_mem: Box<[u8]>,
// pub curr_read: Option<aiocb>,
// pub had_first_load: bool,
}
impl WorkState {
pub fn new() -> Self {
Self {
work_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
// io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
// curr_read: None,
// had_first_load: false,
2024-12-01 23:23:32 +01:00
}
}
2024-11-30 20:58:19 +01:00
}
2025-10-06 13:39:29 +02:00
#[cold]
#[inline(never)]
extern "rust-cold" fn die() -> ! {
println!("Something went wrong! I'm going to die now");
process::abort()
2024-11-30 20:58:19 +01:00
}
2025-10-06 13:39:29 +02:00
fn work(file_path: &OsStr, stats: &mut Stats) {
WORK_STATE.with_borrow_mut(|state: &mut WorkState| {
// // Load file
// let start_time = Instant::now();
// let Ok(text) = fs::read(file_path) else {
// eprintln!("invalid file!");
// process::abort();
// };
// NOTE: Reading the file like this is noticeably faster!
let mut file = OpenOptions::new()
.read(true)
// .custom_flags(libc::O_DIRECT) // O_DIRECT is A LOT slower!!
.open(file_path)
.unwrap_or_else(|_| die());
let mut read_offset = 0;
loop {
// let rb = file.read_at(&mut state.work_mem[read_offset..], read_offset as u64)
let rb = file.read(&mut state.work_mem[read_offset..])
.unwrap_or_else(|_| die());
if hint::unlikely(rb == 0) {
break;
}
read_offset += rb;
}
let text = &state.work_mem[..read_offset];
// file.read_exact(&mut state.work_mem[..file_len]).unwrap();
// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice();
2024-11-30 20:58:19 +01:00
2025-10-06 13:39:29 +02:00
// let time_reading = start_time.elapsed();
// {
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
// *guard += time_reading;
// }
analyze(&text, stats);
});
2024-11-30 20:58:19 +01:00
}
2025-10-06 13:39:29 +02:00
fn analyze(text: &[u8], stats: &mut Stats) {
// // NOTE: mmap is quite a bit slower
// // Load file
// let Ok(file) = File::open(file_path) else {
// eprintln!("invalid file!");
// std::process::abort();
// };
// let mmap = unsafe {
// Mmap::map(&file).unwrap()
// };
// mem::forget(file);
// let text = &*mmap;
2024-11-30 20:58:19 +01:00
2025-10-06 13:39:29 +02:00
// // Load file
// let start_time = Instant::now();
// let Ok(text) = fs::read(file_path) else {
// eprintln!("invalid file!");
// process::abort();
// };
// let time_reading = start_time.elapsed();
// {
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
// *guard += time_reading;
// }
2024-11-30 20:58:19 +01:00
2025-10-06 13:39:29 +02:00
let mut sentences = 0;
2024-12-01 23:23:32 +01:00
let mut words = 0;
2025-10-06 13:39:29 +02:00
let mut capitalizeds = 0;
let mut numbers = 0;
let mut forbiddens = 0;
2024-12-01 23:23:32 +01:00
2025-10-06 13:39:29 +02:00
let mut idx = 0;
'full_loop: loop {
// Skip whitespace
while is_ascii_whitespace(text[idx]) {
idx += 1;
if hint::unlikely(idx >= text.len()) {
break 'full_loop;
}
}
// Find end of word
let word_start = idx;
let mut has_non_upper = false;
'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) {
idx += 1;
if hint::unlikely(idx >= text.len()) {
break 'find_word_end;
}
// Per-char logic
if !is_ascii_upper(b) {
has_non_upper = true;
}
if b == b'.' {
sentences += 1;
}
if is_ascii_digit(b) {
numbers += 1;
}
// sentences += (b == b'.') as u32;
// numbers += is_ascii_digit(b) as u32;
}
let word = &text[word_start..idx];
// let word = unsafe { &text.get_unchecked(word_start..idx) };
2025-10-06 13:39:29 +02:00
// dbg!(str::from_utf8(word).unwrap());
words += 1;
if !has_non_upper {
capitalizeds += 1;
}
// Check forbidden
if unsafe { FW_TAB.lookup(word) } {
// if FW_PHF.contains(word) { // phf is a lot slower than my FwTab
forbiddens += 1;
}
}
2024-11-30 20:58:19 +01:00
2025-10-06 13:39:29 +02:00
/*
for token in text.split(|&b| is_ascii_whitespace(b)) {
if token.is_empty() {
continue;
}
words += 1;
// Sentence count, folded into this loop
// instead of another loop (better cache usage)
for &b in token {
if b == b'.' {
sentences += 1;
}
}
// Check if upper
if token.iter().all(|&b| is_ascii_upper(b)) {
capitalizeds += 1;
}
// Check digits
for &b in token {
if is_ascii_digit(b) {
numbers += 1;
}
}
// Check if words
// if FORBIDDEN_WORDS.contains(&token) {
// if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
if unsafe { FW_TAB.lookup(token) } {
forbiddens += 1;
}
}
*/
2024-11-30 20:58:19 +01:00
2025-10-06 13:39:29 +02:00
/*
// NOTE: This is pretty slow:
let mut idx = 0;
let mut word_start = 0;
let mut is_in_word = false;
let mut has_non_upper = false;
loop {
let b = unsafe { *text.get_unchecked(idx) };
let mut process_word = false;
if is_ascii_whitespace(b) {
if is_in_word {
process_word = true;
// Reset state for next word
is_in_word = false;
has_non_upper = false;
}
} else {
if !is_in_word {
word_start = idx;
is_in_word = true;
}
has_non_upper |= !is_ascii_upper(b);
}
// Check digits
if is_ascii_digit(b) {
numbers += 1;
}
// Check sentences
if b == b'.' {
sentences += 1;
}
let word = &text[word_start..idx];
idx += 1;
if process_word || idx >= text.len() {
words += 1;
if !has_non_upper {
capitalizeds += 1;
}
// // DEBUG:
// println!("'{}'", str::from_utf8(word).unwrap());
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } {
forbiddens += 1;
}
}
if idx >= text.len() {
break;
}
}
*/
2024-11-30 20:58:19 +01:00
2025-10-06 13:39:29 +02:00
stats.sentences = sentences;
stats.words = words;
stats.capitalizeds = capitalizeds;
stats.numbers = numbers;
stats.forbiddens = forbiddens;
}
/*
fn analyze_old(file_path: &OsStr, stats: &mut Stats) {
// Load file
let Ok(text) = fs::read(file_path) else {
eprintln!("invalid file!");
std::process::abort();
};
2024-11-30 20:58:19 +01:00
2025-10-06 13:39:29 +02:00
let mut sentences = 0;
let mut words = 0;
let mut capitalizeds = 0;
let mut numbers = 0;
let mut forbiddens = 0;
2024-11-30 20:58:19 +01:00
2025-10-06 13:39:29 +02:00
for token in text.split(|&b| is_ascii_whitespace(b)) {
if token.is_empty() {
continue;
}
words += 1;
// Sentence count, folded into this loop
// instead of another loop (better cache usage)
for &b in token {
if b == b'.' {
sentences += 1;
}
}
// Check if upper
if token.iter().all(|&b| is_ascii_upper(b)) {
capitalizeds += 1;
}
// Check digits
for &b in token {
if is_ascii_digit(b) {
numbers += 1;
}
}
// Check if words
// if FORBIDDEN_WORDS.contains(&token) {
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
forbiddens += 1;
}
}
2025-10-06 13:39:29 +02:00
stats.sentences = sentences;
stats.words = words;
stats.capitalizeds = capitalizeds;
stats.numbers = numbers;
stats.forbiddens = forbiddens;
2024-11-30 20:58:19 +01:00
}
2025-10-06 13:39:29 +02:00
*/
2024-11-30 20:58:19 +01:00
fn main() {
2025-10-06 13:39:29 +02:00
// Read in files from args
let mut files = Vec::with_capacity(env::args().len());
2025-10-06 13:45:18 +02:00
// let mut do_parallel = false;
2025-10-06 13:39:29 +02:00
let start_time = Instant::now();
for arg in env::args_os().skip(1) {
// skip program arg
if arg == "-p" {
2025-10-06 13:45:18 +02:00
// do_parallel = true;
} else {
files.push(arg);
}
}
2025-10-06 13:39:29 +02:00
println!("[PROFILE] taking args took {:?}", start_time.elapsed());
// env::args_os().
// let files = FULL_BOOK_PATHS;
// // Build table
// let tab = FwTab::build();
// Do the work
2025-10-06 13:45:18 +02:00
#[allow(unused_mut)] // SAFETY: We (unsafely) write to this via raw ptrs, it NEEDS to be mut!
2025-10-06 13:39:29 +02:00
let mut stats = vec![Stats {
sentences: 0,
words: 0,
capitalizeds: 0,
numbers: 0,
forbiddens: 0,
}; files.len()];
let start_time = Instant::now();
let num_cores = available_parallelism().unwrap().get();
let num_threads = num_cores * 1;
// // DEBUG:
// dbg!(num_threads);
// dbg!(num_cores);
rayon::ThreadPoolBuilder::new()
2025-10-06 13:45:18 +02:00
.num_threads(num_threads)
2025-10-06 13:39:29 +02:00
.build_global()
.unwrap();
files.par_iter()
.enumerate()
.for_each(|(idx, p)| {
let s = unsafe {
&mut *stats.as_ptr()
.offset(idx as isize)
.cast_mut()
};
// let mut path = OsString::from("../../");
// path.push(p);
let path = p;
work(path, s);
});
// thread::scope(|scope| {
// let files_per_thread = files.len() / num_threads;
//
// for thread_idx in 0..num_threads {
// let capture_files = &files;
// let capture_stats = &stats;
// thread::Builder::new().spawn_scoped(scope, move || {
// let files = capture_files;
// let stats = capture_stats;
//
// // Set thread affinity
// assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores }));
//
// // Do work
// let thread_start = thread_idx * files_per_thread;
// for i in 0..files_per_thread {
// let real_idx = thread_start + i;
// let file_path = &files[real_idx];
// let st = unsafe {
// &mut *stats.as_ptr()
// .offset(real_idx as isize)
// .cast_mut()
// };
//
// work(&file_path, st);
// }
// }).unwrap();
// }
// });
println!("[PROFILE] processing text took {:?}", start_time.elapsed());
// Accumulate stats
let start_time = Instant::now();
let mut total_words = 0;
let mut total_capitalizeds = 0;
let mut total_sentences = 0;
let mut total_numbers = 0;
let mut total_forbiddens = 0;
for stat in &stats {
total_words += stat.words;
total_capitalizeds += stat.capitalizeds;
total_sentences += stat.sentences;
total_numbers += stat.numbers;
total_forbiddens += stat.forbiddens;
}
let capitalized_percentage = (total_capitalizeds as f32 / total_words as f32) * 100.0;
let forbidden_percentage = (total_forbiddens as f32 / total_words as f32) * 100.0;
let word_count_per_sentence = total_words as f32 / total_sentences as f32;
println!();
println!("Total Words: {total_words}");
println!("Total Capitalized words: {total_capitalizeds}");
println!("Total Sentences: {total_sentences}");
println!("Total Numbers: {total_numbers}");
println!("Total Forbidden words: {total_forbiddens}");
println!("Capitalized percentage: {capitalized_percentage:.6}");
println!("Forbidden percentage: {forbidden_percentage:.6}");
println!("Word count per sentence: {word_count_per_sentence:.6}");
println!("Total files read: {}", files.len());
println!("[PROFILE] accumulating stats took {:?}", start_time.elapsed());
println!("[PROFILE] total file reading took {:?}", &*TIME_SPENT_READING_FILES.lock().unwrap());
// Exit process to avoid running drops
process::exit(0);
}
#[repr(C)]
struct FwTab {
// pub dir_and_len_bits: [u32; 256],
pub dir_len_bits: [u16; 256],
pub dir: [u8; 256],
pub strs: [u8; 256],
pub long_strs: [&'static [u128]; 256],
}
impl FwTab {
pub fn build() -> Self {
// Sort fws by first char
let mut sorted_fws: Vec<Vec<&'static [u8]>> = vec![vec![]; 256];
for word in FORBIDDEN_WORDS {
sorted_fws[word[0] as usize].push(&word);
}
for i in 0..256 {
sorted_fws[i].sort()
}
// // DEBUG:
// println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
// Build str tab
let mut fw_dir = [0u8; 256];
// let mut fw_dir_len_bits = [0u32; 256];
let mut fw_dir_len_bits = [0u16; 256];
let mut fw_strs: Vec<u8> = vec![];
let mut fw_long_strs: [Vec<u128>; 256] = array::from_fn(|_| vec![]);
fw_strs.push(b'\0'); // push dummy value so that 0 in the dir means no-entries
for c in 0..256 {
for fw in FORBIDDEN_WORDS {
if c == fw[0] as usize {
// Add to len bits
fw_dir_len_bits[c] |= 0x1 << fw.len();
}
}
if !sorted_fws[c].is_empty() {
let sublist_start_offset = fw_strs.len().try_into().unwrap();
fw_dir[c] = sublist_start_offset;
// DEBUG:
println!("{c} start offset: {}", sublist_start_offset);
println!("{:#?}", sorted_fws[c].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
// Push strings
for fw in &sorted_fws[c] {
fw_strs.push(fw.len().try_into().unwrap());
for &c in &fw[1..] {
fw_strs.push(c);
}
}
// Mark end of per-char word sublist
fw_strs.push(b'\0');
}
}
// Calc long strs
for c in 0..256 {
for fw in &sorted_fws[c] {
let mut buf = [0u8; 16];
buf[..fw.len()].copy_from_slice(fw);
let val = u128::from_le_bytes(buf);
fw_long_strs[c].push(val);
}
}
// DEBUG:
println!("strs len: {}", fw_strs.len());
assert_eq!(fw_dir.len(), 256);
assert!(fw_strs.len() <= 256);
fw_strs.resize(256, 0);
let tab = FwTab {
dir: fw_dir,
dir_len_bits: fw_dir_len_bits,
// dir_and_len_bits: array::from_fn(|idx| {
// (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24)
// }),
strs: fw_strs.try_into().unwrap(),
long_strs: fw_long_strs.map(|vec| &*vec.leak()), // Too lazy, not needed anyways
};
// DEBUG: Test some strings
unsafe {
dbg!(tab.lookup(b"cpm"));
dbg!(tab.lookup(b"com"));
dbg!(tab.lookup(b"coma"));
dbg!(tab.lookup(b"co"));
dbg!(tab.lookup(b"cam"));
dbg!(tab.lookup(b"crypto"));
dbg!(tab.lookup(b"@"));
// dbg!(tab.lookup(b"")); // we require that words must be non-empty!
dbg!(tab.lookup(b" "));
dbg!(tab.lookup(b"test"));
dbg!(tab.lookup(b"expers"));
}
println!("static FW_TAB: FwTab = FwTab {{");
// println!("\tdir_and_len_bits: [");
// for chunk in self.dir_and_len_bits.chunks(16) {
// print!("\t\t");
// for &b in chunk {
// print!("0x{b:08x}, ");
// }
// println!();
// }
// println!("\t],");
println!("\tdir: [");
for chunk in tab.dir.chunks(16) {
print!("\t\t");
for &b in chunk {
print!("0x{b:02x}, ");
}
println!();
}
println!("\t],");
println!("\tdir_len_bits: [");
for chunk in tab.dir_len_bits.chunks(16) {
print!("\t\t");
for &b in chunk {
print!("0x{b:04x}, ");
}
println!();
}
println!("\t],");
println!("\tstrs: [");
for chunk in tab.strs.chunks(16) {
print!("\t\t");
for &b in chunk {
print!("0x{b:02x}, ");
}
println!();
}
println!("\t],");
print!("\tlong_strs: [");
for c in 0..256 {
// print!("\t\t");
print!("&[");
for &fw_val in tab.long_strs[c] {
print!("0x{:x}, ", fw_val);
}
print!("],");
// println!();
}
println!("],");
println!("}};");
tab
}
2025-10-06 13:39:29 +02:00
#[inline]
pub unsafe fn lookup_b(&self, word: &[u8]) -> bool {
let first_char = unsafe { *word.get_unchecked(0) };
let strs = self.long_strs[first_char as usize];
if strs.len() == 0 || word.len() >= 16 {
return false;
}
// Mask word
let mask = !(u128::MAX << (word.len() * 8));
let word_int = unsafe {
word.as_ptr()
.cast::<u128>()
.read_unaligned() & mask
};
let mut i = 0;
while i < strs.len() {
if strs[i] == word_int {
return true;
}
i += 1;
}
false
}
#[inline]
pub unsafe fn lookup(&self, word: &[u8]) -> bool {
// let &[first_char, ..] = word else {
// return false;
// };
let first_char = unsafe { *word.get_unchecked(0) };
// let dir_and_len_bits = unsafe {
// *self.dir_and_len_bits.get_unchecked(first_char as usize)
// };
// if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 {
// return false;
// }
let len_bits = unsafe {
*self.dir_len_bits.get_unchecked(first_char as usize)
};
if hint::likely(word.len() < 16 && ((len_bits >> word.len()) & 0x1) == 0) {
return false;
}
// let mut str_offset = (dir_and_len_bits >> 24) as usize;
let mut str_offset = unsafe {
*self.dir.get_unchecked(first_char as usize) as usize
};
// Char doesn't have any strings in the table
if str_offset == 0 {
return false;
}
// Iterate over strs
loop {
// let fw_len = u16::from_le_bytes([
// self.strs[str_offset],
// self.strs[str_offset+1]
// ]);
let fw_len: u8 = unsafe {
*self.strs.get_unchecked(str_offset)
};
if fw_len == 0 {
// We've reached the end of the word sublist
return false;
}
// Only compare words if they are the same length
if hint::unlikely(word.len() == fw_len as usize) {
// Compare strs
let mut char_offset = 1usize;
loop {
// Found the word!
if char_offset == word.len() {
return true;
}
let fw_char = unsafe { *self.strs.get_unchecked(str_offset + char_offset) };
let word_char = unsafe { *word.get_unchecked(char_offset) };
if fw_char > word_char {
// Word can't possible be in the sorted list, return
return false;
}
if fw_char < word_char {
// Try next word
break;
}
char_offset += 1;
}
}
// Advance to next word
// let str_len_bytes = 2;
let str_len_bytes = 1;
str_offset += (fw_len as usize - 1) + str_len_bytes;
}
}
2024-11-30 20:58:19 +01:00
}
2025-10-06 13:39:29 +02:00
const FORBIDDEN_WORDS: [&'static [u8]; 35] = [
b"recovery",
b"techie",
b"http",
b"https",
b"digital",
b"hack",
b"::",
b"//",
b"com",
b"@",
b"crypto",
b"bitcoin",
b"wallet",
b"hacker",
b"welcome",
b"whatsapp",
b"email",
b"cryptocurrency",
b"stolen",
b"freeze",
b"quick",
b"crucial",
b"tracing",
b"scammers",
b"expers",
b"hire",
b"century",
b"transaction",
b"essential",
b"managing",
b"contact",
b"contacting",
b"understanding",
b"assets",
b"funds",
2024-11-30 20:58:19 +01:00
];
2025-10-06 13:39:29 +02:00
static FW_TAB: FwTab = FwTab {
dir: [
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
],
dir_len_bits: [
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0040, 0x0080, 0x44c8, 0x0080, 0x0260, 0x0060, 0x0000, 0x0070, 0x0000, 0x0000, 0x0000, 0x0000, 0x0100, 0x0000, 0x0000,
0x0000, 0x0020, 0x0100, 0x0140, 0x08c0, 0x2000, 0x0000, 0x01c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
],
strs: [
0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
0x61, 0x70, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
],
long_strs: [&[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[0x2f2f, ], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[0x3a3a, ], &[], &[], &[], &[], &[], &[0x40, ], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[0x737465737361, ], &[0x6e696f63746962, ], &[0x797275746e6563, 0x6d6f63, 0x746361746e6f63, 0x676e69746361746e6f63, 0x6c616963757263, 0x6f7470797263, 0x79636e65727275636f7470797263, ], &[0x6c617469676964, ], &[0x6c69616d65, 0x6c6169746e65737365, 0x737265707865, ], &[0x657a65657266, 0x73646e7566, ], &[], &[0x6b636168, 0x72656b636168, 0x65726968, 0x70747468, 0x7370747468, ], &[], &[], &[], &[], &[0x676e6967616e616d, ], &[], &[], &[], &[0x6b63697571, ], &[0x797265766f636572, ], &[0x7372656d6d616373, 0x6e656c6f7473, ], &[0x656968636574, 0x676e6963617274, 0x6e6f69746361736e617274, ], &[0x676e69646e6174737265646e75, ], &[], &[0x74656c6c6177, 0x656d6f636c6577, 0x7070617374616877, ], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], ],
};
// NOTE: perfect hashing was kinda slow
//static FW_PHF: phf::Set<&'static [u8]> = phf::phf_set! {
// b"recovery",
// b"techie",
// b"http",
// b"https",
// b"digital",
// b"hack",
// b"::",
// b"//",
// b"com",
// b"@",
// b"crypto",
// b"bitcoin",
// b"wallet",
// b"hacker",
// b"welcome",
// b"whatsapp",
// b"email",
// b"cryptocurrency",
// b"stolen",
// b"freeze",
// b"quick",
// b"crucial",
// b"tracing",
// b"scammers",
// b"expers",
// b"hire",
// b"century",
// b"transaction",
// b"essential",
// b"managing",
// b"contact",
// b"contacting",
// b"understanding",
// b"assets",
// b"funds",
//};
2024-12-01 23:23:32 +01:00
2025-10-06 13:39:29 +02:00
//static FW_TAB_DIR: [u8; 256] = [
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
// 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
//];
//static FW_TAB_STRS: [u8; 244] = [
// 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
// 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
// 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
// 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
// 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
// 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
// 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
// 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
// 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
// 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
// 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
// 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
// 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
// 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
// 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
// 0x61, 0x70, 0x70, 0x00,
//];