#![ feature(likely_unlikely) ]
#![ feature(rust_cold_cc) ]
#![ allow(dead_code) ]
//mod books;
use rayon ::prelude ::* ;
use std ::cell ::RefCell ;
use std ::ffi ::OsStr ;
use std ::fs ::OpenOptions ;
use std ::io ::Read ;
use std ::sync ::Mutex ;
use std ::thread ::available_parallelism ;
use std ::time ::{ Duration , Instant } ;
use std ::{ array , env , hint , process } ;
#[ inline ]
fn is_ascii_whitespace ( b : u8 ) -> bool {
matches! ( b , b '\t' | b '\n' | b '\x0C' | b '\r' | b ' ' )
}
#[ inline ]
fn is_ascii_upper ( b : u8 ) -> bool {
matches! ( b , b 'A' ..= b 'Z' )
}
#[ inline ]
fn is_ascii_digit ( b : u8 ) -> bool {
matches! ( b , b '0' ..= b '9' )
}
#[ repr(align(128)) ]
#[ derive(Copy, Clone) ]
struct Stats {
pub sentences : u32 ,
pub words : u32 ,
pub capitalizeds : u32 ,
pub numbers : u32 ,
pub forbiddens : u32 ,
}
static TIME_SPENT_READING_FILES : Mutex < Duration > = Mutex ::new ( Duration ::from_secs ( 0 ) ) ;
const TEMP_MEM_SIZE : usize = 6 * 1024 * 1024 ;
thread_local! {
static WORK_STATE : RefCell < WorkState > = RefCell ::new ( WorkState ::new ( ) ) ;
}
pub struct WorkState {
pub work_mem : Box < [ u8 ] > ,
// pub io_mem: Box<[u8]>,
// pub curr_read: Option<aiocb>,
// pub had_first_load: bool,
}
impl WorkState {
pub fn new ( ) -> Self {
Self {
work_mem : vec ! [ 0 ; TEMP_MEM_SIZE ] . into_boxed_slice ( ) ,
// io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
// curr_read: None,
// had_first_load: false,
}
}
}
#[ cold ]
#[ inline(never) ]
extern " rust-cold " fn die ( ) -> ! {
println! ( " Something went wrong! I'm going to die now " ) ;
process ::abort ( )
}
fn work ( file_path : & OsStr , stats : & mut Stats ) {
WORK_STATE . with_borrow_mut ( | state : & mut WorkState | {
// // Load file
// let start_time = Instant::now();
// let Ok(text) = fs::read(file_path) else {
// eprintln!("invalid file!");
// process::abort();
// };
// NOTE: Reading the file like this is noticeably faster!
let mut file = OpenOptions ::new ( )
. read ( true )
// .custom_flags(libc::O_DIRECT) // O_DIRECT is A LOT slower!!
. open ( file_path )
. unwrap_or_else ( | _ | die ( ) ) ;
let mut read_offset = 0 ;
loop {
// let rb = file.read_at(&mut state.work_mem[read_offset..], read_offset as u64)
let rb = file . read ( & mut state . work_mem [ read_offset .. ] )
. unwrap_or_else ( | _ | die ( ) ) ;
if hint ::unlikely ( rb = = 0 ) {
break ;
}
read_offset + = rb ;
}
let text = & state . work_mem [ .. read_offset ] ;
// file.read_exact(&mut state.work_mem[..file_len]).unwrap();
// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice();
// let time_reading = start_time.elapsed();
// {
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
// *guard += time_reading;
// }
analyze ( & text , stats ) ;
} ) ;
}
fn analyze ( text : & [ u8 ] , stats : & mut Stats ) {
// // NOTE: mmap is quite a bit slower
// // Load file
// let Ok(file) = File::open(file_path) else {
// eprintln!("invalid file!");
// std::process::abort();
// };
// let mmap = unsafe {
// Mmap::map(&file).unwrap()
// };
// mem::forget(file);
// let text = &*mmap;
// // Load file
// let start_time = Instant::now();
// let Ok(text) = fs::read(file_path) else {
// eprintln!("invalid file!");
// process::abort();
// };
// let time_reading = start_time.elapsed();
// {
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
// *guard += time_reading;
// }
let mut sentences = 0 ;
let mut words = 0 ;
let mut capitalizeds = 0 ;
let mut numbers = 0 ;
let mut forbiddens = 0 ;
let mut idx = 0 ;
' full_loop : loop {
// Skip whitespace
while is_ascii_whitespace ( text [ idx ] ) {
idx + = 1 ;
if hint ::unlikely ( idx > = text . len ( ) ) {
break 'full_loop ;
}
}
// Find end of word
let word_start = idx ;
let mut has_non_upper = false ;
' find_word_end : while let b = text [ idx ] & & ! is_ascii_whitespace ( b ) {
idx + = 1 ;
if hint ::unlikely ( idx > = text . len ( ) ) {
break 'find_word_end ;
}
// Per-char logic
if ! is_ascii_upper ( b ) {
has_non_upper = true ;
}
if b = = b '.' {
sentences + = 1 ;
}
if is_ascii_digit ( b ) {
numbers + = 1 ;
}
// sentences += (b == b'.') as u32;
// numbers += is_ascii_digit(b) as u32;
}
let word = & text [ word_start .. idx ] ;
// let word = unsafe { &text.get_unchecked(word_start..idx) };
// dbg!(str::from_utf8(word).unwrap());
words + = 1 ;
if ! has_non_upper {
capitalizeds + = 1 ;
}
// Check forbidden
if unsafe { FW_TAB . lookup ( word ) } {
// if FW_PHF.contains(word) { // phf is a lot slower than my FwTab
forbiddens + = 1 ;
}
}
/*
for token in text.split(|&b| is_ascii_whitespace(b)) {
if token.is_empty() {
continue;
}
words += 1;
/ / Sentence count, folded into this loop
/ / instead of another loop (better cache usage)
for &b in token {
if b == b'.' {
sentences += 1;
}
}
/ / Check if upper
if token.iter().all(|&b| is_ascii_upper(b)) {
capitalizeds += 1;
}
/ / Check digits
for &b in token {
if is_ascii_digit(b) {
numbers += 1;
}
}
/ / Check if words
/ / if FORBIDDEN_WORDS.contains(&token) {
/ / if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
if unsafe { FW_TAB.lookup(token) } {
forbiddens += 1;
}
}
*/
/*
/ / NOTE: This is pretty slow:
let mut idx = 0;
let mut word_start = 0;
let mut is_in_word = false;
let mut has_non_upper = false;
loop {
let b = unsafe { * text.get_unchecked(idx) };
let mut process_word = false;
if is_ascii_whitespace(b) {
if is_in_word {
process_word = true;
/ / Reset state for next word
is_in_word = false;
has_non_upper = false;
}
} else {
if !is_in_word {
word_start = idx;
is_in_word = true;
}
has_non_upper |= !is_ascii_upper(b);
}
/ / Check digits
if is_ascii_digit(b) {
numbers += 1;
}
/ / Check sentences
if b == b'.' {
sentences += 1;
}
let word = &text[word_start..idx];
idx += 1;
if process_word || idx >= text.len() {
words += 1;
if !has_non_upper {
capitalizeds += 1;
}
/ / / / DEBUG:
/ / println!("'{}'", str::from_utf8(word).unwrap());
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } {
forbiddens += 1;
}
}
if idx >= text.len() {
break;
}
}
*/
stats . sentences = sentences ;
stats . words = words ;
stats . capitalizeds = capitalizeds ;
stats . numbers = numbers ;
stats . forbiddens = forbiddens ;
}
/*
fn analyze_old(file_path: &OsStr, stats: &mut Stats) {
/ / Load file
let Ok(text) = fs::read(file_path) else {
eprintln!("invalid file!");
std::process::abort();
};
let mut sentences = 0;
let mut words = 0;
let mut capitalizeds = 0;
let mut numbers = 0;
let mut forbiddens = 0;
for token in text.split(|&b| is_ascii_whitespace(b)) {
if token.is_empty() {
continue;
}
words += 1;
/ / Sentence count, folded into this loop
/ / instead of another loop (better cache usage)
for &b in token {
if b == b'.' {
sentences += 1;
}
}
/ / Check if upper
if token.iter().all(|&b| is_ascii_upper(b)) {
capitalizeds += 1;
}
/ / Check digits
for &b in token {
if is_ascii_digit(b) {
numbers += 1;
}
}
/ / Check if words
/ / if FORBIDDEN_WORDS.contains(&token) {
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
forbiddens += 1;
}
}
stats.sentences = sentences;
stats.words = words;
stats.capitalizeds = capitalizeds;
stats.numbers = numbers;
stats.forbiddens = forbiddens;
}
*/
fn main ( ) {
// Read in files from args
let mut files = Vec ::with_capacity ( env ::args ( ) . len ( ) ) ;
// let mut do_parallel = false;
let start_time = Instant ::now ( ) ;
for arg in env ::args_os ( ) . skip ( 1 ) {
// skip program arg
if arg = = " -p " {
// do_parallel = true;
} else {
files . push ( arg ) ;
}
}
println! ( " [PROFILE] taking args took {:?} " , start_time . elapsed ( ) ) ;
// env::args_os().
// let files = FULL_BOOK_PATHS;
// // Build table
// let tab = FwTab::build();
// Do the work
#[ allow(unused_mut) ] // SAFETY: We (unsafely) write to this via raw ptrs, it NEEDS to be mut!
let mut stats = vec! [ Stats {
sentences : 0 ,
words : 0 ,
capitalizeds : 0 ,
numbers : 0 ,
forbiddens : 0 ,
} ; files . len ( ) ] ;
let start_time = Instant ::now ( ) ;
let num_cores = available_parallelism ( ) . unwrap ( ) . get ( ) ;
let num_threads = num_cores * 1 ;
// // DEBUG:
// dbg!(num_threads);
// dbg!(num_cores);
rayon ::ThreadPoolBuilder ::new ( )
. num_threads ( num_threads )
. build_global ( )
. unwrap ( ) ;
files . par_iter ( )
. enumerate ( )
. for_each ( | ( idx , p ) | {
let s = unsafe {
& mut * stats . as_ptr ( )
. offset ( idx as isize )
. cast_mut ( )
} ;
// let mut path = OsString::from("../../");
// path.push(p);
let path = p ;
work ( path , s ) ;
} ) ;
// thread::scope(|scope| {
// let files_per_thread = files.len() / num_threads;
//
// for thread_idx in 0..num_threads {
// let capture_files = &files;
// let capture_stats = &stats;
// thread::Builder::new().spawn_scoped(scope, move || {
// let files = capture_files;
// let stats = capture_stats;
//
// // Set thread affinity
// assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores }));
//
// // Do work
// let thread_start = thread_idx * files_per_thread;
// for i in 0..files_per_thread {
// let real_idx = thread_start + i;
// let file_path = &files[real_idx];
// let st = unsafe {
// &mut *stats.as_ptr()
// .offset(real_idx as isize)
// .cast_mut()
// };
//
// work(&file_path, st);
// }
// }).unwrap();
// }
// });
println! ( " [PROFILE] processing text took {:?} " , start_time . elapsed ( ) ) ;
// Accumulate stats
let start_time = Instant ::now ( ) ;
let mut total_words = 0 ;
let mut total_capitalizeds = 0 ;
let mut total_sentences = 0 ;
let mut total_numbers = 0 ;
let mut total_forbiddens = 0 ;
for stat in & stats {
total_words + = stat . words ;
total_capitalizeds + = stat . capitalizeds ;
total_sentences + = stat . sentences ;
total_numbers + = stat . numbers ;
total_forbiddens + = stat . forbiddens ;
}
let capitalized_percentage = ( total_capitalizeds as f32 / total_words as f32 ) * 100.0 ;
let forbidden_percentage = ( total_forbiddens as f32 / total_words as f32 ) * 100.0 ;
let word_count_per_sentence = total_words as f32 / total_sentences as f32 ;
println! ( ) ;
println! ( " Total Words: {total_words} " ) ;
println! ( " Total Capitalized words: {total_capitalizeds} " ) ;
println! ( " Total Sentences: {total_sentences} " ) ;
println! ( " Total Numbers: {total_numbers} " ) ;
println! ( " Total Forbidden words: {total_forbiddens} " ) ;
println! ( " Capitalized percentage: {capitalized_percentage:.6} " ) ;
println! ( " Forbidden percentage: {forbidden_percentage:.6} " ) ;
println! ( " Word count per sentence: {word_count_per_sentence:.6} " ) ;
println! ( " Total files read: {} " , files . len ( ) ) ;
println! ( " [PROFILE] accumulating stats took {:?} " , start_time . elapsed ( ) ) ;
println! ( " [PROFILE] total file reading took {:?} " , & * TIME_SPENT_READING_FILES . lock ( ) . unwrap ( ) ) ;
// Exit process to avoid running drops
process ::exit ( 0 ) ;
}
#[ repr(C) ]
struct FwTab {
// pub dir_and_len_bits: [u32; 256],
pub dir_len_bits : [ u16 ; 256 ] ,
pub dir : [ u8 ; 256 ] ,
pub strs : [ u8 ; 256 ] ,
pub long_strs : [ & 'static [ u128 ] ; 256 ] ,
}
impl FwTab {
pub fn build ( ) -> Self {
// Sort fws by first char
let mut sorted_fws : Vec < Vec < & 'static [ u8 ] > > = vec! [ vec! [ ] ; 256 ] ;
for word in FORBIDDEN_WORDS {
sorted_fws [ word [ 0 ] as usize ] . push ( & word ) ;
}
for i in 0 .. 256 {
sorted_fws [ i ] . sort ( )
}
// // DEBUG:
// println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
// Build str tab
let mut fw_dir = [ 0 u8 ; 256 ] ;
// let mut fw_dir_len_bits = [0u32; 256];
let mut fw_dir_len_bits = [ 0 u16 ; 256 ] ;
let mut fw_strs : Vec < u8 > = vec! [ ] ;
let mut fw_long_strs : [ Vec < u128 > ; 256 ] = array ::from_fn ( | _ | vec! [ ] ) ;
fw_strs . push ( b '\0' ) ; // push dummy value so that 0 in the dir means no-entries
for c in 0 .. 256 {
for fw in FORBIDDEN_WORDS {
if c = = fw [ 0 ] as usize {
// Add to len bits
fw_dir_len_bits [ c ] | = 0x1 < < fw . len ( ) ;
}
}
if ! sorted_fws [ c ] . is_empty ( ) {
let sublist_start_offset = fw_strs . len ( ) . try_into ( ) . unwrap ( ) ;
fw_dir [ c ] = sublist_start_offset ;
// DEBUG:
println! ( " {c} start offset: {} " , sublist_start_offset ) ;
println! ( " {:#?} " , sorted_fws [ c ] . iter ( ) . map ( | s | str ::from_utf8 ( s ) . unwrap ( ) ) . collect ::< Vec < _ > > ( ) ) ;
// Push strings
for fw in & sorted_fws [ c ] {
fw_strs . push ( fw . len ( ) . try_into ( ) . unwrap ( ) ) ;
for & c in & fw [ 1 .. ] {
fw_strs . push ( c ) ;
}
}
// Mark end of per-char word sublist
fw_strs . push ( b '\0' ) ;
}
}
// Calc long strs
for c in 0 .. 256 {
for fw in & sorted_fws [ c ] {
let mut buf = [ 0 u8 ; 16 ] ;
buf [ .. fw . len ( ) ] . copy_from_slice ( fw ) ;
let val = u128 ::from_le_bytes ( buf ) ;
fw_long_strs [ c ] . push ( val ) ;
}
}
// DEBUG:
println! ( " strs len: {} " , fw_strs . len ( ) ) ;
assert_eq! ( fw_dir . len ( ) , 256 ) ;
assert! ( fw_strs . len ( ) < = 256 ) ;
fw_strs . resize ( 256 , 0 ) ;
let tab = FwTab {
dir : fw_dir ,
dir_len_bits : fw_dir_len_bits ,
// dir_and_len_bits: array::from_fn(|idx| {
// (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24)
// }),
strs : fw_strs . try_into ( ) . unwrap ( ) ,
long_strs : fw_long_strs . map ( | vec | & * vec . leak ( ) ) , // Too lazy, not needed anyways
} ;
// DEBUG: Test some strings
unsafe {
dbg! ( tab . lookup ( b " cpm " ) ) ;
dbg! ( tab . lookup ( b " com " ) ) ;
dbg! ( tab . lookup ( b " coma " ) ) ;
dbg! ( tab . lookup ( b " co " ) ) ;
dbg! ( tab . lookup ( b " cam " ) ) ;
dbg! ( tab . lookup ( b " crypto " ) ) ;
dbg! ( tab . lookup ( b " @ " ) ) ;
// dbg!(tab.lookup(b"")); // we require that words must be non-empty!
dbg! ( tab . lookup ( b " " ) ) ;
dbg! ( tab . lookup ( b " test " ) ) ;
dbg! ( tab . lookup ( b " expers " ) ) ;
}
println! ( " static FW_TAB: FwTab = FwTab {{ " ) ;
// println!("\tdir_and_len_bits: [");
// for chunk in self.dir_and_len_bits.chunks(16) {
// print!("\t\t");
// for &b in chunk {
// print!("0x{b:08x}, ");
// }
// println!();
// }
// println!("\t],");
println! ( " \t dir: [ " ) ;
for chunk in tab . dir . chunks ( 16 ) {
print! ( " \t \t " ) ;
for & b in chunk {
print! ( " 0x {b:02x} , " ) ;
}
println! ( ) ;
}
println! ( " \t ], " ) ;
println! ( " \t dir_len_bits: [ " ) ;
for chunk in tab . dir_len_bits . chunks ( 16 ) {
print! ( " \t \t " ) ;
for & b in chunk {
print! ( " 0x {b:04x} , " ) ;
}
println! ( ) ;
}
println! ( " \t ], " ) ;
println! ( " \t strs: [ " ) ;
for chunk in tab . strs . chunks ( 16 ) {
print! ( " \t \t " ) ;
for & b in chunk {
print! ( " 0x {b:02x} , " ) ;
}
println! ( ) ;
}
println! ( " \t ], " ) ;
print! ( " \t long_strs: [ " ) ;
for c in 0 .. 256 {
// print!("\t\t");
print! ( " &[ " ) ;
for & fw_val in tab . long_strs [ c ] {
print! ( " 0x {:x} , " , fw_val ) ;
}
print! ( " ], " ) ;
// println!();
}
println! ( " ], " ) ;
println! ( " }} ; " ) ;
tab
}
#[ inline ]
pub unsafe fn lookup_b ( & self , word : & [ u8 ] ) -> bool {
let first_char = unsafe { * word . get_unchecked ( 0 ) } ;
let strs = self . long_strs [ first_char as usize ] ;
if strs . len ( ) = = 0 | | word . len ( ) > = 16 {
return false ;
}
// Mask word
let mask = ! ( u128 ::MAX < < ( word . len ( ) * 8 ) ) ;
let word_int = unsafe {
word . as_ptr ( )
. cast ::< u128 > ( )
. read_unaligned ( ) & mask
} ;
let mut i = 0 ;
while i < strs . len ( ) {
if strs [ i ] = = word_int {
return true ;
}
i + = 1 ;
}
false
}
#[ inline ]
pub unsafe fn lookup ( & self , word : & [ u8 ] ) -> bool {
// let &[first_char, ..] = word else {
// return false;
// };
let first_char = unsafe { * word . get_unchecked ( 0 ) } ;
// let dir_and_len_bits = unsafe {
// *self.dir_and_len_bits.get_unchecked(first_char as usize)
// };
// if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 {
// return false;
// }
let len_bits = unsafe {
* self . dir_len_bits . get_unchecked ( first_char as usize )
} ;
if hint ::likely ( word . len ( ) < 16 & & ( ( len_bits > > word . len ( ) ) & 0x1 ) = = 0 ) {
return false ;
}
// let mut str_offset = (dir_and_len_bits >> 24) as usize;
let mut str_offset = unsafe {
* self . dir . get_unchecked ( first_char as usize ) as usize
} ;
// Char doesn't have any strings in the table
if str_offset = = 0 {
return false ;
}
// Iterate over strs
loop {
// let fw_len = u16::from_le_bytes([
// self.strs[str_offset],
// self.strs[str_offset+1]
// ]);
let fw_len : u8 = unsafe {
* self . strs . get_unchecked ( str_offset )
} ;
if fw_len = = 0 {
// We've reached the end of the word sublist
return false ;
}
// Only compare words if they are the same length
if hint ::unlikely ( word . len ( ) = = fw_len as usize ) {
// Compare strs
let mut char_offset = 1 usize ;
loop {
// Found the word!
if char_offset = = word . len ( ) {
return true ;
}
let fw_char = unsafe { * self . strs . get_unchecked ( str_offset + char_offset ) } ;
let word_char = unsafe { * word . get_unchecked ( char_offset ) } ;
if fw_char > word_char {
// Word can't possible be in the sorted list, return
return false ;
}
if fw_char < word_char {
// Try next word
break ;
}
char_offset + = 1 ;
}
}
// Advance to next word
// let str_len_bytes = 2;
let str_len_bytes = 1 ;
str_offset + = ( fw_len as usize - 1 ) + str_len_bytes ;
}
}
}
const FORBIDDEN_WORDS : [ & 'static [ u8 ] ; 35 ] = [
b " recovery " ,
b " techie " ,
b " http " ,
b " https " ,
b " digital " ,
b " hack " ,
b " :: " ,
b " // " ,
b " com " ,
b " @ " ,
b " crypto " ,
b " bitcoin " ,
b " wallet " ,
b " hacker " ,
b " welcome " ,
b " whatsapp " ,
b " email " ,
b " cryptocurrency " ,
b " stolen " ,
b " freeze " ,
b " quick " ,
b " crucial " ,
b " tracing " ,
b " scammers " ,
b " expers " ,
b " hire " ,
b " century " ,
b " transaction " ,
b " essential " ,
b " managing " ,
b " contact " ,
b " contacting " ,
b " understanding " ,
b " assets " ,
b " funds " ,
] ;
static FW_TAB : FwTab = FwTab {
dir : [
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x01 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x04 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x07 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x09 , 0x10 , 0x18 , 0x4f , 0x57 , 0x6c , 0x00 , 0x78 , 0x00 , 0x00 , 0x00 , 0x00 , 0x90 , 0x00 , 0x00 ,
0x00 , 0x99 , 0x9f , 0xa8 , 0xb7 , 0xd0 , 0x00 , 0xde , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
] ,
dir_len_bits : [
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0004 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0004 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0002 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0040 , 0x0080 , 0x44c8 , 0x0080 , 0x0260 , 0x0060 , 0x0000 , 0x0070 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0100 , 0x0000 , 0x0000 ,
0x0000 , 0x0020 , 0x0100 , 0x0140 , 0x08c0 , 0x2000 , 0x0000 , 0x01c0 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 , 0x0000 ,
] ,
strs : [
0x00 , 0x02 , 0x2f , 0x00 , 0x02 , 0x3a , 0x00 , 0x01 , 0x00 , 0x06 , 0x73 , 0x73 , 0x65 , 0x74 , 0x73 , 0x00 ,
0x07 , 0x69 , 0x74 , 0x63 , 0x6f , 0x69 , 0x6e , 0x00 , 0x07 , 0x65 , 0x6e , 0x74 , 0x75 , 0x72 , 0x79 , 0x03 ,
0x6f , 0x6d , 0x07 , 0x6f , 0x6e , 0x74 , 0x61 , 0x63 , 0x74 , 0x0a , 0x6f , 0x6e , 0x74 , 0x61 , 0x63 , 0x74 ,
0x69 , 0x6e , 0x67 , 0x07 , 0x72 , 0x75 , 0x63 , 0x69 , 0x61 , 0x6c , 0x06 , 0x72 , 0x79 , 0x70 , 0x74 , 0x6f ,
0x0e , 0x72 , 0x79 , 0x70 , 0x74 , 0x6f , 0x63 , 0x75 , 0x72 , 0x72 , 0x65 , 0x6e , 0x63 , 0x79 , 0x00 , 0x07 ,
0x69 , 0x67 , 0x69 , 0x74 , 0x61 , 0x6c , 0x00 , 0x05 , 0x6d , 0x61 , 0x69 , 0x6c , 0x09 , 0x73 , 0x73 , 0x65 ,
0x6e , 0x74 , 0x69 , 0x61 , 0x6c , 0x06 , 0x78 , 0x70 , 0x65 , 0x72 , 0x73 , 0x00 , 0x06 , 0x72 , 0x65 , 0x65 ,
0x7a , 0x65 , 0x05 , 0x75 , 0x6e , 0x64 , 0x73 , 0x00 , 0x04 , 0x61 , 0x63 , 0x6b , 0x06 , 0x61 , 0x63 , 0x6b ,
0x65 , 0x72 , 0x04 , 0x69 , 0x72 , 0x65 , 0x04 , 0x74 , 0x74 , 0x70 , 0x05 , 0x74 , 0x74 , 0x70 , 0x73 , 0x00 ,
0x08 , 0x61 , 0x6e , 0x61 , 0x67 , 0x69 , 0x6e , 0x67 , 0x00 , 0x05 , 0x75 , 0x69 , 0x63 , 0x6b , 0x00 , 0x08 ,
0x65 , 0x63 , 0x6f , 0x76 , 0x65 , 0x72 , 0x79 , 0x00 , 0x08 , 0x63 , 0x61 , 0x6d , 0x6d , 0x65 , 0x72 , 0x73 ,
0x06 , 0x74 , 0x6f , 0x6c , 0x65 , 0x6e , 0x00 , 0x06 , 0x65 , 0x63 , 0x68 , 0x69 , 0x65 , 0x07 , 0x72 , 0x61 ,
0x63 , 0x69 , 0x6e , 0x67 , 0x0b , 0x72 , 0x61 , 0x6e , 0x73 , 0x61 , 0x63 , 0x74 , 0x69 , 0x6f , 0x6e , 0x00 ,
0x0d , 0x6e , 0x64 , 0x65 , 0x72 , 0x73 , 0x74 , 0x61 , 0x6e , 0x64 , 0x69 , 0x6e , 0x67 , 0x00 , 0x06 , 0x61 ,
0x6c , 0x6c , 0x65 , 0x74 , 0x07 , 0x65 , 0x6c , 0x63 , 0x6f , 0x6d , 0x65 , 0x08 , 0x68 , 0x61 , 0x74 , 0x73 ,
0x61 , 0x70 , 0x70 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
] ,
long_strs : [ & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ 0x2f2f , ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ 0x3a3a , ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ 0x40 , ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ 0x737465737361 , ] , & [ 0x6e696f63746962 , ] , & [ 0x797275746e6563 , 0x6d6f63 , 0x746361746e6f63 , 0x676e69746361746e6f63 , 0x6c616963757263 , 0x6f7470797263 , 0x79636e65727275636f7470797263 , ] , & [ 0x6c617469676964 , ] , & [ 0x6c69616d65 , 0x6c6169746e65737365 , 0x737265707865 , ] , & [ 0x657a65657266 , 0x73646e7566 , ] , & [ ] , & [ 0x6b636168 , 0x72656b636168 , 0x65726968 , 0x70747468 , 0x7370747468 , ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ 0x676e6967616e616d , ] , & [ ] , & [ ] , & [ ] , & [ 0x6b63697571 , ] , & [ 0x797265766f636572 , ] , & [ 0x7372656d6d616373 , 0x6e656c6f7473 , ] , & [ 0x656968636574 , 0x676e6963617274 , 0x6e6f69746361736e617274 , ] , & [ 0x676e69646e6174737265646e75 , ] , & [ ] , & [ 0x74656c6c6177 , 0x656d6f636c6577 , 0x7070617374616877 , ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , & [ ] , ] ,
} ;
// NOTE: perfect hashing was kinda slow
//static FW_PHF: phf::Set<&'static [u8]> = phf::phf_set! {
// b"recovery",
// b"techie",
// b"http",
// b"https",
// b"digital",
// b"hack",
// b"::",
// b"//",
// b"com",
// b"@",
// b"crypto",
// b"bitcoin",
// b"wallet",
// b"hacker",
// b"welcome",
// b"whatsapp",
// b"email",
// b"cryptocurrency",
// b"stolen",
// b"freeze",
// b"quick",
// b"crucial",
// b"tracing",
// b"scammers",
// b"expers",
// b"hire",
// b"century",
// b"transaction",
// b"essential",
// b"managing",
// b"contact",
// b"contacting",
// b"understanding",
// b"assets",
// b"funds",
//};
//static FW_TAB_DIR: [u8; 256] = [
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
// 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
//];
//static FW_TAB_STRS: [u8; 244] = [
// 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
// 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
// 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
// 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
// 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
// 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
// 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
// 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
// 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
// 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
// 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
// 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
// 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
// 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
// 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
// 0x61, 0x70, 0x70, 0x00,
//];