#![feature(likely_unlikely)] #![feature(rust_cold_cc)] #![allow(dead_code)] mod books; use rayon::prelude::*; use std::cell::RefCell; use std::ffi::OsStr; use std::fs::OpenOptions; use std::io::Read; use std::sync::Mutex; use std::thread::available_parallelism; use std::time::{Duration, Instant}; use std::{array, env, hint, process}; #[inline] fn is_ascii_whitespace(b: u8) -> bool { matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ') } #[inline] fn is_ascii_upper(b: u8) -> bool { matches!(b, b'A'..=b'Z') } #[inline] fn is_ascii_digit(b: u8) -> bool { matches!(b, b'0'..=b'9') } #[repr(align(128))] #[derive(Copy, Clone)] struct Stats { pub sentences: u32, pub words: u32, pub capitalizeds: u32, pub numbers: u32, pub forbiddens: u32, } static TIME_SPENT_READING_FILES: Mutex = Mutex::new(Duration::from_secs(0)); const TEMP_MEM_SIZE: usize = 6 * 1024 * 1024; thread_local! { static WORK_STATE: RefCell = RefCell::new(WorkState::new()); } pub struct WorkState { pub work_mem: Box<[u8]>, // pub io_mem: Box<[u8]>, // pub curr_read: Option, // pub had_first_load: bool, } impl WorkState { pub fn new() -> Self { Self { work_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(), // io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(), // curr_read: None, // had_first_load: false, } } } #[cold] #[inline(never)] extern "rust-cold" fn die() -> ! { println!("Something went wrong! I'm going to die now"); process::abort() } fn work(file_path: &OsStr, stats: &mut Stats) { WORK_STATE.with_borrow_mut(|state: &mut WorkState| { // // Load file // let start_time = Instant::now(); // let Ok(text) = fs::read(file_path) else { // eprintln!("invalid file!"); // process::abort(); // }; // NOTE: Reading the file like this is noticeably faster! let mut file = OpenOptions::new() .read(true) // .custom_flags(libc::O_DIRECT) // O_DIRECT is A LOT slower!! .open(file_path) .unwrap_or_else(|_| die()); let mut read_offset = 0; loop { // let rb = file.read_at(&mut state.work_mem[read_offset..], read_offset as u64) let rb = file.read(&mut state.work_mem[read_offset..]) .unwrap_or_else(|_| die()); if hint::unlikely(rb == 0) { break; } read_offset += rb; } let text = &state.work_mem[..read_offset]; // file.read_exact(&mut state.work_mem[..file_len]).unwrap(); // let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice(); // let time_reading = start_time.elapsed(); // { // let mut guard = TIME_SPENT_READING_FILES.lock().unwrap(); // *guard += time_reading; // } analyze(&text, stats); }); } fn analyze(text: &[u8], stats: &mut Stats) { // // NOTE: mmap is quite a bit slower // // Load file // let Ok(file) = File::open(file_path) else { // eprintln!("invalid file!"); // std::process::abort(); // }; // let mmap = unsafe { // Mmap::map(&file).unwrap() // }; // mem::forget(file); // let text = &*mmap; // // Load file // let start_time = Instant::now(); // let Ok(text) = fs::read(file_path) else { // eprintln!("invalid file!"); // process::abort(); // }; // let time_reading = start_time.elapsed(); // { // let mut guard = TIME_SPENT_READING_FILES.lock().unwrap(); // *guard += time_reading; // } let mut sentences = 0; let mut words = 0; let mut capitalizeds = 0; let mut numbers = 0; let mut forbiddens = 0; let mut idx = 0; 'full_loop: loop { // Skip whitespace while is_ascii_whitespace(text[idx]) { idx += 1; if hint::unlikely(idx >= text.len()) { break 'full_loop; } } // Find end of word let word_start = idx; let mut has_non_upper = false; 'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) { idx += 1; if hint::unlikely(idx >= text.len()) { break 'find_word_end; } // Per-char logic if !is_ascii_upper(b) { has_non_upper = true; } if b == b'.' { sentences += 1; } if is_ascii_digit(b) { numbers += 1; } // sentences += (b == b'.') as u32; // numbers += is_ascii_digit(b) as u32; } let word = &text[word_start..idx]; // dbg!(str::from_utf8(word).unwrap()); words += 1; if !has_non_upper { capitalizeds += 1; } // Check forbidden if unsafe { FW_TAB.lookup(word) } { // if FW_PHF.contains(word) { // phf is a lot slower than my FwTab forbiddens += 1; } } /* for token in text.split(|&b| is_ascii_whitespace(b)) { if token.is_empty() { continue; } words += 1; // Sentence count, folded into this loop // instead of another loop (better cache usage) for &b in token { if b == b'.' { sentences += 1; } } // Check if upper if token.iter().all(|&b| is_ascii_upper(b)) { capitalizeds += 1; } // Check digits for &b in token { if is_ascii_digit(b) { numbers += 1; } } // Check if words // if FORBIDDEN_WORDS.contains(&token) { // if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } { if unsafe { FW_TAB.lookup(token) } { forbiddens += 1; } } */ /* // NOTE: This is pretty slow: let mut idx = 0; let mut word_start = 0; let mut is_in_word = false; let mut has_non_upper = false; loop { let b = unsafe { *text.get_unchecked(idx) }; let mut process_word = false; if is_ascii_whitespace(b) { if is_in_word { process_word = true; // Reset state for next word is_in_word = false; has_non_upper = false; } } else { if !is_in_word { word_start = idx; is_in_word = true; } has_non_upper |= !is_ascii_upper(b); } // Check digits if is_ascii_digit(b) { numbers += 1; } // Check sentences if b == b'.' { sentences += 1; } let word = &text[word_start..idx]; idx += 1; if process_word || idx >= text.len() { words += 1; if !has_non_upper { capitalizeds += 1; } // // DEBUG: // println!("'{}'", str::from_utf8(word).unwrap()); if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } { forbiddens += 1; } } if idx >= text.len() { break; } } */ stats.sentences = sentences; stats.words = words; stats.capitalizeds = capitalizeds; stats.numbers = numbers; stats.forbiddens = forbiddens; } /* fn analyze_old(file_path: &OsStr, stats: &mut Stats) { // Load file let Ok(text) = fs::read(file_path) else { eprintln!("invalid file!"); std::process::abort(); }; let mut sentences = 0; let mut words = 0; let mut capitalizeds = 0; let mut numbers = 0; let mut forbiddens = 0; for token in text.split(|&b| is_ascii_whitespace(b)) { if token.is_empty() { continue; } words += 1; // Sentence count, folded into this loop // instead of another loop (better cache usage) for &b in token { if b == b'.' { sentences += 1; } } // Check if upper if token.iter().all(|&b| is_ascii_upper(b)) { capitalizeds += 1; } // Check digits for &b in token { if is_ascii_digit(b) { numbers += 1; } } // Check if words // if FORBIDDEN_WORDS.contains(&token) { if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } { forbiddens += 1; } } stats.sentences = sentences; stats.words = words; stats.capitalizeds = capitalizeds; stats.numbers = numbers; stats.forbiddens = forbiddens; } */ fn main() { // Read in files from args let mut files = Vec::with_capacity(env::args().len()); // let mut do_parallel = false; let start_time = Instant::now(); for arg in env::args_os().skip(1) { // skip program arg if arg == "-p" { // do_parallel = true; } else { files.push(arg); } } println!("[PROFILE] taking args took {:?}", start_time.elapsed()); // env::args_os(). // let files = FULL_BOOK_PATHS; // // Build table // let tab = FwTab::build(); // Do the work #[allow(unused_mut)] // SAFETY: We (unsafely) write to this via raw ptrs, it NEEDS to be mut! let mut stats = vec![Stats { sentences: 0, words: 0, capitalizeds: 0, numbers: 0, forbiddens: 0, }; files.len()]; let start_time = Instant::now(); let num_cores = available_parallelism().unwrap().get(); let num_threads = num_cores * 1; // // DEBUG: // dbg!(num_threads); // dbg!(num_cores); rayon::ThreadPoolBuilder::new() .num_threads(num_threads) .build_global() .unwrap(); files.par_iter() .enumerate() .for_each(|(idx, p)| { let s = unsafe { &mut *stats.as_ptr() .offset(idx as isize) .cast_mut() }; // let mut path = OsString::from("../../"); // path.push(p); let path = p; work(path, s); }); // thread::scope(|scope| { // let files_per_thread = files.len() / num_threads; // // for thread_idx in 0..num_threads { // let capture_files = &files; // let capture_stats = &stats; // thread::Builder::new().spawn_scoped(scope, move || { // let files = capture_files; // let stats = capture_stats; // // // Set thread affinity // assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores })); // // // Do work // let thread_start = thread_idx * files_per_thread; // for i in 0..files_per_thread { // let real_idx = thread_start + i; // let file_path = &files[real_idx]; // let st = unsafe { // &mut *stats.as_ptr() // .offset(real_idx as isize) // .cast_mut() // }; // // work(&file_path, st); // } // }).unwrap(); // } // }); println!("[PROFILE] processing text took {:?}", start_time.elapsed()); // Accumulate stats let start_time = Instant::now(); let mut total_words = 0; let mut total_capitalizeds = 0; let mut total_sentences = 0; let mut total_numbers = 0; let mut total_forbiddens = 0; for stat in &stats { total_words += stat.words; total_capitalizeds += stat.capitalizeds; total_sentences += stat.sentences; total_numbers += stat.numbers; total_forbiddens += stat.forbiddens; } let capitalized_percentage = (total_capitalizeds as f32 / total_words as f32) * 100.0; let forbidden_percentage = (total_forbiddens as f32 / total_words as f32) * 100.0; let word_count_per_sentence = total_words as f32 / total_sentences as f32; println!(); println!("Total Words: {total_words}"); println!("Total Capitalized words: {total_capitalizeds}"); println!("Total Sentences: {total_sentences}"); println!("Total Numbers: {total_numbers}"); println!("Total Forbidden words: {total_forbiddens}"); println!("Capitalized percentage: {capitalized_percentage:.6}"); println!("Forbidden percentage: {forbidden_percentage:.6}"); println!("Word count per sentence: {word_count_per_sentence:.6}"); println!("Total files read: {}", files.len()); println!("[PROFILE] accumulating stats took {:?}", start_time.elapsed()); println!("[PROFILE] total file reading took {:?}", &*TIME_SPENT_READING_FILES.lock().unwrap()); // Exit process to avoid running drops process::exit(0); } #[repr(C)] struct FwTab { // pub dir_and_len_bits: [u32; 256], pub dir_len_bits: [u16; 256], pub dir: [u8; 256], pub strs: [u8; 256], pub long_strs: [&'static [u128]; 256], } impl FwTab { pub fn build() -> Self { // Sort fws by first char let mut sorted_fws: Vec> = vec![vec![]; 256]; for word in FORBIDDEN_WORDS { sorted_fws[word[0] as usize].push(&word); } for i in 0..256 { sorted_fws[i].sort() } // // DEBUG: // println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::>()); // Build str tab let mut fw_dir = [0u8; 256]; // let mut fw_dir_len_bits = [0u32; 256]; let mut fw_dir_len_bits = [0u16; 256]; let mut fw_strs: Vec = vec![]; let mut fw_long_strs: [Vec; 256] = array::from_fn(|_| vec![]); fw_strs.push(b'\0'); // push dummy value so that 0 in the dir means no-entries for c in 0..256 { for fw in FORBIDDEN_WORDS { if c == fw[0] as usize { // Add to len bits fw_dir_len_bits[c] |= 0x1 << fw.len(); } } if !sorted_fws[c].is_empty() { let sublist_start_offset = fw_strs.len().try_into().unwrap(); fw_dir[c] = sublist_start_offset; // DEBUG: println!("{c} start offset: {}", sublist_start_offset); println!("{:#?}", sorted_fws[c].iter().map(|s| str::from_utf8(s).unwrap()).collect::>()); // Push strings for fw in &sorted_fws[c] { fw_strs.push(fw.len().try_into().unwrap()); for &c in &fw[1..] { fw_strs.push(c); } } // Mark end of per-char word sublist fw_strs.push(b'\0'); } } // Calc long strs for c in 0..256 { for fw in &sorted_fws[c] { let mut buf = [0u8; 16]; buf[..fw.len()].copy_from_slice(fw); let val = u128::from_le_bytes(buf); fw_long_strs[c].push(val); } } // DEBUG: println!("strs len: {}", fw_strs.len()); assert_eq!(fw_dir.len(), 256); assert!(fw_strs.len() <= 256); fw_strs.resize(256, 0); let tab = FwTab { dir: fw_dir, dir_len_bits: fw_dir_len_bits, // dir_and_len_bits: array::from_fn(|idx| { // (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24) // }), strs: fw_strs.try_into().unwrap(), long_strs: fw_long_strs.map(|vec| &*vec.leak()), // Too lazy, not needed anyways }; // DEBUG: Test some strings unsafe { dbg!(tab.lookup(b"cpm")); dbg!(tab.lookup(b"com")); dbg!(tab.lookup(b"coma")); dbg!(tab.lookup(b"co")); dbg!(tab.lookup(b"cam")); dbg!(tab.lookup(b"crypto")); dbg!(tab.lookup(b"@")); // dbg!(tab.lookup(b"")); // we require that words must be non-empty! dbg!(tab.lookup(b" ")); dbg!(tab.lookup(b"test")); dbg!(tab.lookup(b"expers")); } println!("static FW_TAB: FwTab = FwTab {{"); // println!("\tdir_and_len_bits: ["); // for chunk in self.dir_and_len_bits.chunks(16) { // print!("\t\t"); // for &b in chunk { // print!("0x{b:08x}, "); // } // println!(); // } // println!("\t],"); println!("\tdir: ["); for chunk in tab.dir.chunks(16) { print!("\t\t"); for &b in chunk { print!("0x{b:02x}, "); } println!(); } println!("\t],"); println!("\tdir_len_bits: ["); for chunk in tab.dir_len_bits.chunks(16) { print!("\t\t"); for &b in chunk { print!("0x{b:04x}, "); } println!(); } println!("\t],"); println!("\tstrs: ["); for chunk in tab.strs.chunks(16) { print!("\t\t"); for &b in chunk { print!("0x{b:02x}, "); } println!(); } println!("\t],"); print!("\tlong_strs: ["); for c in 0..256 { // print!("\t\t"); print!("&["); for &fw_val in tab.long_strs[c] { print!("0x{:x}, ", fw_val); } print!("],"); // println!(); } println!("],"); println!("}};"); tab } #[inline] pub unsafe fn lookup_b(&self, word: &[u8]) -> bool { let first_char = unsafe { *word.get_unchecked(0) }; let strs = self.long_strs[first_char as usize]; if strs.len() == 0 || word.len() >= 16 { return false; } // Mask word let mask = !(u128::MAX << (word.len() * 8)); let word_int = unsafe { word.as_ptr() .cast::() .read_unaligned() & mask }; let mut i = 0; while i < strs.len() { if strs[i] == word_int { return true; } i += 1; } false } #[inline] pub unsafe fn lookup(&self, word: &[u8]) -> bool { // let &[first_char, ..] = word else { // return false; // }; let first_char = unsafe { *word.get_unchecked(0) }; // let dir_and_len_bits = unsafe { // *self.dir_and_len_bits.get_unchecked(first_char as usize) // }; // if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 { // return false; // } let len_bits = unsafe { *self.dir_len_bits.get_unchecked(first_char as usize) }; if hint::likely(word.len() < 16 && ((len_bits >> word.len()) & 0x1) == 0) { return false; } // let mut str_offset = (dir_and_len_bits >> 24) as usize; let mut str_offset = unsafe { *self.dir.get_unchecked(first_char as usize) as usize }; // Char doesn't have any strings in the table if str_offset == 0 { return false; } // Iterate over strs loop { // let fw_len = u16::from_le_bytes([ // self.strs[str_offset], // self.strs[str_offset+1] // ]); let fw_len: u8 = unsafe { *self.strs.get_unchecked(str_offset) }; if fw_len == 0 { // We've reached the end of the word sublist return false; } // Only compare words if they are the same length if hint::unlikely(word.len() == fw_len as usize) { // Compare strs let mut char_offset = 1usize; loop { // Found the word! if char_offset == word.len() { return true; } let fw_char = unsafe { *self.strs.get_unchecked(str_offset + char_offset) }; let word_char = unsafe { *word.get_unchecked(char_offset) }; if fw_char > word_char { // Word can't possible be in the sorted list, return return false; } if fw_char < word_char { // Try next word break; } char_offset += 1; } } // Advance to next word // let str_len_bytes = 2; let str_len_bytes = 1; str_offset += (fw_len as usize - 1) + str_len_bytes; } } } const FORBIDDEN_WORDS: [&'static [u8]; 35] = [ b"recovery", b"techie", b"http", b"https", b"digital", b"hack", b"::", b"//", b"com", b"@", b"crypto", b"bitcoin", b"wallet", b"hacker", b"welcome", b"whatsapp", b"email", b"cryptocurrency", b"stolen", b"freeze", b"quick", b"crucial", b"tracing", b"scammers", b"expers", b"hire", b"century", b"transaction", b"essential", b"managing", b"contact", b"contacting", b"understanding", b"assets", b"funds", ]; static FW_TAB: FwTab = FwTab { dir: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ], dir_len_bits: [ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0040, 0x0080, 0x44c8, 0x0080, 0x0260, 0x0060, 0x0000, 0x0070, 0x0000, 0x0000, 0x0000, 0x0000, 0x0100, 0x0000, 0x0000, 0x0000, 0x0020, 0x0100, 0x0140, 0x08c0, 0x2000, 0x0000, 0x01c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ], strs: [ 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00, 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03, 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07, 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65, 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65, 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b, 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00, 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08, 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73, 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61, 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00, 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61, 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73, 0x61, 0x70, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ], long_strs: [&[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[0x2f2f, ], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[0x3a3a, ], &[], &[], &[], &[], &[], &[0x40, ], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[0x737465737361, ], &[0x6e696f63746962, ], &[0x797275746e6563, 0x6d6f63, 0x746361746e6f63, 0x676e69746361746e6f63, 0x6c616963757263, 0x6f7470797263, 0x79636e65727275636f7470797263, ], &[0x6c617469676964, ], &[0x6c69616d65, 0x6c6169746e65737365, 0x737265707865, ], &[0x657a65657266, 0x73646e7566, ], &[], &[0x6b636168, 0x72656b636168, 0x65726968, 0x70747468, 0x7370747468, ], &[], &[], &[], &[], &[0x676e6967616e616d, ], &[], &[], &[], &[0x6b63697571, ], &[0x797265766f636572, ], &[0x7372656d6d616373, 0x6e656c6f7473, ], &[0x656968636574, 0x676e6963617274, 0x6e6f69746361736e617274, ], &[0x676e69646e6174737265646e75, ], &[], &[0x74656c6c6177, 0x656d6f636c6577, 0x7070617374616877, ], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], ], }; // NOTE: perfect hashing was kinda slow //static FW_PHF: phf::Set<&'static [u8]> = phf::phf_set! { // b"recovery", // b"techie", // b"http", // b"https", // b"digital", // b"hack", // b"::", // b"//", // b"com", // b"@", // b"crypto", // b"bitcoin", // b"wallet", // b"hacker", // b"welcome", // b"whatsapp", // b"email", // b"cryptocurrency", // b"stolen", // b"freeze", // b"quick", // b"crucial", // b"tracing", // b"scammers", // b"expers", // b"hire", // b"century", // b"transaction", // b"essential", // b"managing", // b"contact", // b"contacting", // b"understanding", // b"assets", // b"funds", //}; //static FW_TAB_DIR: [u8; 256] = [ // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, // 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //]; //static FW_TAB_STRS: [u8; 244] = [ // 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00, // 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03, // 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, // 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f, // 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07, // 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65, // 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65, // 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b, // 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00, // 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08, // 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73, // 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61, // 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00, // 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61, // 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73, // 0x61, 0x70, 0x70, 0x00, //];