2025-10-06 13:39:29 +02:00
|
|
|
|
#![feature(likely_unlikely)]
|
|
|
|
|
|
#![feature(rust_cold_cc)]
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:45:18 +02:00
|
|
|
|
#![allow(dead_code)]
|
|
|
|
|
|
|
2025-10-06 17:43:44 +02:00
|
|
|
|
//mod books;
|
2025-10-06 13:39:29 +02:00
|
|
|
|
|
2024-11-30 23:46:08 +01:00
|
|
|
|
use rayon::prelude::*;
|
2025-10-06 13:39:29 +02:00
|
|
|
|
use std::cell::RefCell;
|
2025-10-06 13:45:18 +02:00
|
|
|
|
use std::ffi::OsStr;
|
|
|
|
|
|
use std::fs::OpenOptions;
|
2025-10-06 13:39:29 +02:00
|
|
|
|
use std::io::Read;
|
|
|
|
|
|
use std::sync::Mutex;
|
|
|
|
|
|
use std::thread::available_parallelism;
|
|
|
|
|
|
use std::time::{Duration, Instant};
|
2025-10-06 13:45:18 +02:00
|
|
|
|
use std::{array, env, hint, process};
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
#[inline]
|
|
|
|
|
|
fn is_ascii_whitespace(b: u8) -> bool {
|
|
|
|
|
|
matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ')
|
2024-11-30 20:58:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
#[inline]
|
|
|
|
|
|
fn is_ascii_upper(b: u8) -> bool {
|
|
|
|
|
|
matches!(b, b'A'..=b'Z')
|
2024-11-30 20:58:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
#[inline]
|
|
|
|
|
|
fn is_ascii_digit(b: u8) -> bool {
|
|
|
|
|
|
matches!(b, b'0'..=b'9')
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[repr(align(128))]
|
|
|
|
|
|
#[derive(Copy, Clone)]
|
|
|
|
|
|
struct Stats {
|
|
|
|
|
|
pub sentences: u32,
|
|
|
|
|
|
pub words: u32,
|
|
|
|
|
|
pub capitalizeds: u32,
|
|
|
|
|
|
pub numbers: u32,
|
|
|
|
|
|
pub forbiddens: u32,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static TIME_SPENT_READING_FILES: Mutex<Duration> = Mutex::new(Duration::from_secs(0));
|
|
|
|
|
|
|
|
|
|
|
|
const TEMP_MEM_SIZE: usize = 6 * 1024 * 1024;
|
|
|
|
|
|
thread_local! {
|
|
|
|
|
|
static WORK_STATE: RefCell<WorkState> = RefCell::new(WorkState::new());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
pub struct WorkState {
|
|
|
|
|
|
pub work_mem: Box<[u8]>,
|
|
|
|
|
|
// pub io_mem: Box<[u8]>,
|
|
|
|
|
|
// pub curr_read: Option<aiocb>,
|
|
|
|
|
|
// pub had_first_load: bool,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl WorkState {
|
|
|
|
|
|
pub fn new() -> Self {
|
|
|
|
|
|
Self {
|
|
|
|
|
|
work_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
|
|
|
|
|
|
// io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
|
|
|
|
|
|
// curr_read: None,
|
|
|
|
|
|
// had_first_load: false,
|
2024-12-01 23:23:32 +01:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2024-11-30 20:58:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
#[cold]
|
|
|
|
|
|
#[inline(never)]
|
|
|
|
|
|
extern "rust-cold" fn die() -> ! {
|
|
|
|
|
|
println!("Something went wrong! I'm going to die now");
|
|
|
|
|
|
process::abort()
|
2024-11-30 20:58:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
fn work(file_path: &OsStr, stats: &mut Stats) {
|
|
|
|
|
|
WORK_STATE.with_borrow_mut(|state: &mut WorkState| {
|
|
|
|
|
|
// // Load file
|
|
|
|
|
|
// let start_time = Instant::now();
|
|
|
|
|
|
|
|
|
|
|
|
// let Ok(text) = fs::read(file_path) else {
|
|
|
|
|
|
// eprintln!("invalid file!");
|
|
|
|
|
|
// process::abort();
|
|
|
|
|
|
// };
|
|
|
|
|
|
|
|
|
|
|
|
// NOTE: Reading the file like this is noticeably faster!
|
|
|
|
|
|
let mut file = OpenOptions::new()
|
|
|
|
|
|
.read(true)
|
|
|
|
|
|
// .custom_flags(libc::O_DIRECT) // O_DIRECT is A LOT slower!!
|
|
|
|
|
|
.open(file_path)
|
|
|
|
|
|
.unwrap_or_else(|_| die());
|
|
|
|
|
|
|
|
|
|
|
|
let mut read_offset = 0;
|
|
|
|
|
|
loop {
|
|
|
|
|
|
// let rb = file.read_at(&mut state.work_mem[read_offset..], read_offset as u64)
|
|
|
|
|
|
let rb = file.read(&mut state.work_mem[read_offset..])
|
|
|
|
|
|
.unwrap_or_else(|_| die());
|
|
|
|
|
|
|
|
|
|
|
|
if hint::unlikely(rb == 0) {
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
read_offset += rb;
|
|
|
|
|
|
}
|
|
|
|
|
|
let text = &state.work_mem[..read_offset];
|
|
|
|
|
|
|
|
|
|
|
|
// file.read_exact(&mut state.work_mem[..file_len]).unwrap();
|
|
|
|
|
|
|
|
|
|
|
|
// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice();
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
// let time_reading = start_time.elapsed();
|
|
|
|
|
|
// {
|
|
|
|
|
|
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
|
|
|
|
|
// *guard += time_reading;
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
|
|
analyze(&text, stats);
|
|
|
|
|
|
});
|
2024-11-30 20:58:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
fn analyze(text: &[u8], stats: &mut Stats) {
|
|
|
|
|
|
// // NOTE: mmap is quite a bit slower
|
|
|
|
|
|
// // Load file
|
|
|
|
|
|
// let Ok(file) = File::open(file_path) else {
|
|
|
|
|
|
// eprintln!("invalid file!");
|
|
|
|
|
|
// std::process::abort();
|
|
|
|
|
|
// };
|
|
|
|
|
|
// let mmap = unsafe {
|
|
|
|
|
|
// Mmap::map(&file).unwrap()
|
|
|
|
|
|
// };
|
|
|
|
|
|
// mem::forget(file);
|
|
|
|
|
|
// let text = &*mmap;
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
// // Load file
|
|
|
|
|
|
// let start_time = Instant::now();
|
|
|
|
|
|
// let Ok(text) = fs::read(file_path) else {
|
|
|
|
|
|
// eprintln!("invalid file!");
|
|
|
|
|
|
// process::abort();
|
|
|
|
|
|
// };
|
|
|
|
|
|
// let time_reading = start_time.elapsed();
|
|
|
|
|
|
// {
|
|
|
|
|
|
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
|
|
|
|
|
// *guard += time_reading;
|
|
|
|
|
|
// }
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
let mut sentences = 0;
|
2024-12-01 23:23:32 +01:00
|
|
|
|
let mut words = 0;
|
2025-10-06 13:39:29 +02:00
|
|
|
|
let mut capitalizeds = 0;
|
|
|
|
|
|
let mut numbers = 0;
|
|
|
|
|
|
let mut forbiddens = 0;
|
2024-12-01 23:23:32 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
let mut idx = 0;
|
|
|
|
|
|
'full_loop: loop {
|
|
|
|
|
|
// Skip whitespace
|
|
|
|
|
|
while is_ascii_whitespace(text[idx]) {
|
|
|
|
|
|
idx += 1;
|
|
|
|
|
|
if hint::unlikely(idx >= text.len()) {
|
|
|
|
|
|
break 'full_loop;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Find end of word
|
|
|
|
|
|
let word_start = idx;
|
|
|
|
|
|
let mut has_non_upper = false;
|
|
|
|
|
|
|
|
|
|
|
|
'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) {
|
|
|
|
|
|
idx += 1;
|
|
|
|
|
|
if hint::unlikely(idx >= text.len()) {
|
|
|
|
|
|
break 'find_word_end;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Per-char logic
|
|
|
|
|
|
if !is_ascii_upper(b) {
|
|
|
|
|
|
has_non_upper = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
if b == b'.' {
|
|
|
|
|
|
sentences += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
if is_ascii_digit(b) {
|
|
|
|
|
|
numbers += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
// sentences += (b == b'.') as u32;
|
|
|
|
|
|
// numbers += is_ascii_digit(b) as u32;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let word = &text[word_start..idx];
|
2025-10-06 17:43:44 +02:00
|
|
|
|
// let word = unsafe { &text.get_unchecked(word_start..idx) };
|
|
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
// dbg!(str::from_utf8(word).unwrap());
|
|
|
|
|
|
|
|
|
|
|
|
words += 1;
|
|
|
|
|
|
|
|
|
|
|
|
if !has_non_upper {
|
|
|
|
|
|
capitalizeds += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check forbidden
|
|
|
|
|
|
if unsafe { FW_TAB.lookup(word) } {
|
|
|
|
|
|
// if FW_PHF.contains(word) { // phf is a lot slower than my FwTab
|
|
|
|
|
|
forbiddens += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
/*
|
|
|
|
|
|
for token in text.split(|&b| is_ascii_whitespace(b)) {
|
|
|
|
|
|
if token.is_empty() {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
words += 1;
|
|
|
|
|
|
|
|
|
|
|
|
// Sentence count, folded into this loop
|
|
|
|
|
|
// instead of another loop (better cache usage)
|
|
|
|
|
|
for &b in token {
|
|
|
|
|
|
if b == b'.' {
|
|
|
|
|
|
sentences += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check if upper
|
|
|
|
|
|
if token.iter().all(|&b| is_ascii_upper(b)) {
|
|
|
|
|
|
capitalizeds += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check digits
|
|
|
|
|
|
for &b in token {
|
|
|
|
|
|
if is_ascii_digit(b) {
|
|
|
|
|
|
numbers += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check if words
|
|
|
|
|
|
// if FORBIDDEN_WORDS.contains(&token) {
|
|
|
|
|
|
// if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
|
|
|
|
|
|
if unsafe { FW_TAB.lookup(token) } {
|
|
|
|
|
|
forbiddens += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
*/
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
/*
|
|
|
|
|
|
// NOTE: This is pretty slow:
|
|
|
|
|
|
let mut idx = 0;
|
|
|
|
|
|
let mut word_start = 0;
|
|
|
|
|
|
let mut is_in_word = false;
|
|
|
|
|
|
let mut has_non_upper = false;
|
|
|
|
|
|
loop {
|
|
|
|
|
|
let b = unsafe { *text.get_unchecked(idx) };
|
|
|
|
|
|
|
|
|
|
|
|
let mut process_word = false;
|
|
|
|
|
|
if is_ascii_whitespace(b) {
|
|
|
|
|
|
if is_in_word {
|
|
|
|
|
|
process_word = true;
|
|
|
|
|
|
|
|
|
|
|
|
// Reset state for next word
|
|
|
|
|
|
is_in_word = false;
|
|
|
|
|
|
has_non_upper = false;
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
if !is_in_word {
|
|
|
|
|
|
word_start = idx;
|
|
|
|
|
|
is_in_word = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
has_non_upper |= !is_ascii_upper(b);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check digits
|
|
|
|
|
|
if is_ascii_digit(b) {
|
|
|
|
|
|
numbers += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Check sentences
|
|
|
|
|
|
if b == b'.' {
|
|
|
|
|
|
sentences += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let word = &text[word_start..idx];
|
|
|
|
|
|
|
|
|
|
|
|
idx += 1;
|
|
|
|
|
|
|
|
|
|
|
|
if process_word || idx >= text.len() {
|
|
|
|
|
|
words += 1;
|
|
|
|
|
|
if !has_non_upper {
|
|
|
|
|
|
capitalizeds += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// // DEBUG:
|
|
|
|
|
|
// println!("'{}'", str::from_utf8(word).unwrap());
|
|
|
|
|
|
|
|
|
|
|
|
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } {
|
|
|
|
|
|
forbiddens += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if idx >= text.len() {
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
*/
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
stats.sentences = sentences;
|
|
|
|
|
|
stats.words = words;
|
|
|
|
|
|
stats.capitalizeds = capitalizeds;
|
|
|
|
|
|
stats.numbers = numbers;
|
|
|
|
|
|
stats.forbiddens = forbiddens;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
fn analyze_old(file_path: &OsStr, stats: &mut Stats) {
|
|
|
|
|
|
// Load file
|
|
|
|
|
|
let Ok(text) = fs::read(file_path) else {
|
|
|
|
|
|
eprintln!("invalid file!");
|
|
|
|
|
|
std::process::abort();
|
|
|
|
|
|
};
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
let mut sentences = 0;
|
|
|
|
|
|
let mut words = 0;
|
|
|
|
|
|
let mut capitalizeds = 0;
|
|
|
|
|
|
let mut numbers = 0;
|
|
|
|
|
|
let mut forbiddens = 0;
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
for token in text.split(|&b| is_ascii_whitespace(b)) {
|
|
|
|
|
|
if token.is_empty() {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
words += 1;
|
|
|
|
|
|
|
|
|
|
|
|
// Sentence count, folded into this loop
|
|
|
|
|
|
// instead of another loop (better cache usage)
|
|
|
|
|
|
for &b in token {
|
|
|
|
|
|
if b == b'.' {
|
|
|
|
|
|
sentences += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check if upper
|
|
|
|
|
|
if token.iter().all(|&b| is_ascii_upper(b)) {
|
|
|
|
|
|
capitalizeds += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check digits
|
|
|
|
|
|
for &b in token {
|
|
|
|
|
|
if is_ascii_digit(b) {
|
|
|
|
|
|
numbers += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Check if words
|
|
|
|
|
|
// if FORBIDDEN_WORDS.contains(&token) {
|
|
|
|
|
|
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
|
|
|
|
|
|
forbiddens += 1;
|
|
|
|
|
|
}
|
2024-11-30 23:46:08 +01:00
|
|
|
|
}
|
2025-10-06 13:39:29 +02:00
|
|
|
|
|
|
|
|
|
|
stats.sentences = sentences;
|
|
|
|
|
|
stats.words = words;
|
|
|
|
|
|
stats.capitalizeds = capitalizeds;
|
|
|
|
|
|
stats.numbers = numbers;
|
|
|
|
|
|
stats.forbiddens = forbiddens;
|
2024-11-30 20:58:19 +01:00
|
|
|
|
}
|
2025-10-06 13:39:29 +02:00
|
|
|
|
*/
|
2024-11-30 20:58:19 +01:00
|
|
|
|
|
|
|
|
|
|
fn main() {
|
2025-10-06 13:39:29 +02:00
|
|
|
|
// Read in files from args
|
2024-11-30 23:46:08 +01:00
|
|
|
|
let mut files = Vec::with_capacity(env::args().len());
|
2025-10-06 13:45:18 +02:00
|
|
|
|
// let mut do_parallel = false;
|
2024-11-30 23:46:08 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
let start_time = Instant::now();
|
|
|
|
|
|
for arg in env::args_os().skip(1) {
|
|
|
|
|
|
// skip program arg
|
2024-11-30 23:46:08 +01:00
|
|
|
|
if arg == "-p" {
|
2025-10-06 13:45:18 +02:00
|
|
|
|
// do_parallel = true;
|
2024-11-30 23:46:08 +01:00
|
|
|
|
} else {
|
|
|
|
|
|
files.push(arg);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-10-06 13:39:29 +02:00
|
|
|
|
println!("[PROFILE] taking args took {:?}", start_time.elapsed());
|
|
|
|
|
|
|
|
|
|
|
|
// env::args_os().
|
|
|
|
|
|
|
|
|
|
|
|
// let files = FULL_BOOK_PATHS;
|
|
|
|
|
|
|
|
|
|
|
|
// // Build table
|
|
|
|
|
|
// let tab = FwTab::build();
|
2024-11-30 23:46:08 +01:00
|
|
|
|
|
|
|
|
|
|
// Do the work
|
2025-10-06 13:45:18 +02:00
|
|
|
|
#[allow(unused_mut)] // SAFETY: We (unsafely) write to this via raw ptrs, it NEEDS to be mut!
|
2025-10-06 13:39:29 +02:00
|
|
|
|
let mut stats = vec![Stats {
|
|
|
|
|
|
sentences: 0,
|
|
|
|
|
|
words: 0,
|
|
|
|
|
|
capitalizeds: 0,
|
|
|
|
|
|
numbers: 0,
|
|
|
|
|
|
forbiddens: 0,
|
|
|
|
|
|
}; files.len()];
|
|
|
|
|
|
|
|
|
|
|
|
let start_time = Instant::now();
|
|
|
|
|
|
|
|
|
|
|
|
let num_cores = available_parallelism().unwrap().get();
|
|
|
|
|
|
let num_threads = num_cores * 1;
|
|
|
|
|
|
|
|
|
|
|
|
// // DEBUG:
|
|
|
|
|
|
// dbg!(num_threads);
|
|
|
|
|
|
// dbg!(num_cores);
|
|
|
|
|
|
|
|
|
|
|
|
rayon::ThreadPoolBuilder::new()
|
2025-10-06 13:45:18 +02:00
|
|
|
|
.num_threads(num_threads)
|
2025-10-06 13:39:29 +02:00
|
|
|
|
.build_global()
|
|
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
|
|
|
|
files.par_iter()
|
|
|
|
|
|
.enumerate()
|
|
|
|
|
|
.for_each(|(idx, p)| {
|
|
|
|
|
|
let s = unsafe {
|
|
|
|
|
|
&mut *stats.as_ptr()
|
|
|
|
|
|
.offset(idx as isize)
|
|
|
|
|
|
.cast_mut()
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// let mut path = OsString::from("../../");
|
|
|
|
|
|
// path.push(p);
|
|
|
|
|
|
let path = p;
|
|
|
|
|
|
work(path, s);
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// thread::scope(|scope| {
|
|
|
|
|
|
// let files_per_thread = files.len() / num_threads;
|
|
|
|
|
|
//
|
|
|
|
|
|
// for thread_idx in 0..num_threads {
|
|
|
|
|
|
// let capture_files = &files;
|
|
|
|
|
|
// let capture_stats = &stats;
|
|
|
|
|
|
// thread::Builder::new().spawn_scoped(scope, move || {
|
|
|
|
|
|
// let files = capture_files;
|
|
|
|
|
|
// let stats = capture_stats;
|
|
|
|
|
|
//
|
|
|
|
|
|
// // Set thread affinity
|
|
|
|
|
|
// assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores }));
|
|
|
|
|
|
//
|
|
|
|
|
|
// // Do work
|
|
|
|
|
|
// let thread_start = thread_idx * files_per_thread;
|
|
|
|
|
|
// for i in 0..files_per_thread {
|
|
|
|
|
|
// let real_idx = thread_start + i;
|
|
|
|
|
|
// let file_path = &files[real_idx];
|
|
|
|
|
|
// let st = unsafe {
|
|
|
|
|
|
// &mut *stats.as_ptr()
|
|
|
|
|
|
// .offset(real_idx as isize)
|
|
|
|
|
|
// .cast_mut()
|
|
|
|
|
|
// };
|
|
|
|
|
|
//
|
|
|
|
|
|
// work(&file_path, st);
|
|
|
|
|
|
// }
|
|
|
|
|
|
// }).unwrap();
|
|
|
|
|
|
// }
|
|
|
|
|
|
// });
|
|
|
|
|
|
|
|
|
|
|
|
println!("[PROFILE] processing text took {:?}", start_time.elapsed());
|
|
|
|
|
|
|
|
|
|
|
|
// Accumulate stats
|
|
|
|
|
|
let start_time = Instant::now();
|
|
|
|
|
|
|
|
|
|
|
|
let mut total_words = 0;
|
|
|
|
|
|
let mut total_capitalizeds = 0;
|
|
|
|
|
|
let mut total_sentences = 0;
|
|
|
|
|
|
let mut total_numbers = 0;
|
|
|
|
|
|
let mut total_forbiddens = 0;
|
|
|
|
|
|
|
|
|
|
|
|
for stat in &stats {
|
|
|
|
|
|
total_words += stat.words;
|
|
|
|
|
|
total_capitalizeds += stat.capitalizeds;
|
|
|
|
|
|
total_sentences += stat.sentences;
|
|
|
|
|
|
total_numbers += stat.numbers;
|
|
|
|
|
|
total_forbiddens += stat.forbiddens;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let capitalized_percentage = (total_capitalizeds as f32 / total_words as f32) * 100.0;
|
|
|
|
|
|
let forbidden_percentage = (total_forbiddens as f32 / total_words as f32) * 100.0;
|
|
|
|
|
|
let word_count_per_sentence = total_words as f32 / total_sentences as f32;
|
|
|
|
|
|
|
|
|
|
|
|
println!();
|
|
|
|
|
|
println!("Total Words: {total_words}");
|
|
|
|
|
|
println!("Total Capitalized words: {total_capitalizeds}");
|
|
|
|
|
|
println!("Total Sentences: {total_sentences}");
|
|
|
|
|
|
println!("Total Numbers: {total_numbers}");
|
|
|
|
|
|
println!("Total Forbidden words: {total_forbiddens}");
|
|
|
|
|
|
println!("Capitalized percentage: {capitalized_percentage:.6}");
|
|
|
|
|
|
println!("Forbidden percentage: {forbidden_percentage:.6}");
|
|
|
|
|
|
println!("Word count per sentence: {word_count_per_sentence:.6}");
|
|
|
|
|
|
println!("Total files read: {}", files.len());
|
|
|
|
|
|
|
|
|
|
|
|
println!("[PROFILE] accumulating stats took {:?}", start_time.elapsed());
|
|
|
|
|
|
|
|
|
|
|
|
println!("[PROFILE] total file reading took {:?}", &*TIME_SPENT_READING_FILES.lock().unwrap());
|
|
|
|
|
|
|
|
|
|
|
|
// Exit process to avoid running drops
|
|
|
|
|
|
process::exit(0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[repr(C)]
|
|
|
|
|
|
struct FwTab {
|
|
|
|
|
|
// pub dir_and_len_bits: [u32; 256],
|
|
|
|
|
|
pub dir_len_bits: [u16; 256],
|
|
|
|
|
|
pub dir: [u8; 256],
|
|
|
|
|
|
pub strs: [u8; 256],
|
|
|
|
|
|
pub long_strs: [&'static [u128]; 256],
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl FwTab {
|
|
|
|
|
|
pub fn build() -> Self {
|
|
|
|
|
|
// Sort fws by first char
|
|
|
|
|
|
let mut sorted_fws: Vec<Vec<&'static [u8]>> = vec![vec![]; 256];
|
|
|
|
|
|
|
|
|
|
|
|
for word in FORBIDDEN_WORDS {
|
|
|
|
|
|
sorted_fws[word[0] as usize].push(&word);
|
|
|
|
|
|
}
|
|
|
|
|
|
for i in 0..256 {
|
|
|
|
|
|
sorted_fws[i].sort()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// // DEBUG:
|
|
|
|
|
|
// println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
|
|
|
|
|
|
|
|
|
|
|
|
// Build str tab
|
|
|
|
|
|
let mut fw_dir = [0u8; 256];
|
|
|
|
|
|
// let mut fw_dir_len_bits = [0u32; 256];
|
|
|
|
|
|
let mut fw_dir_len_bits = [0u16; 256];
|
|
|
|
|
|
let mut fw_strs: Vec<u8> = vec![];
|
|
|
|
|
|
|
|
|
|
|
|
let mut fw_long_strs: [Vec<u128>; 256] = array::from_fn(|_| vec![]);
|
|
|
|
|
|
|
|
|
|
|
|
fw_strs.push(b'\0'); // push dummy value so that 0 in the dir means no-entries
|
|
|
|
|
|
|
|
|
|
|
|
for c in 0..256 {
|
|
|
|
|
|
for fw in FORBIDDEN_WORDS {
|
|
|
|
|
|
if c == fw[0] as usize {
|
|
|
|
|
|
// Add to len bits
|
|
|
|
|
|
fw_dir_len_bits[c] |= 0x1 << fw.len();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if !sorted_fws[c].is_empty() {
|
|
|
|
|
|
let sublist_start_offset = fw_strs.len().try_into().unwrap();
|
|
|
|
|
|
fw_dir[c] = sublist_start_offset;
|
|
|
|
|
|
|
|
|
|
|
|
// DEBUG:
|
|
|
|
|
|
println!("{c} start offset: {}", sublist_start_offset);
|
|
|
|
|
|
println!("{:#?}", sorted_fws[c].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
|
|
|
|
|
|
|
|
|
|
|
|
// Push strings
|
|
|
|
|
|
for fw in &sorted_fws[c] {
|
|
|
|
|
|
fw_strs.push(fw.len().try_into().unwrap());
|
|
|
|
|
|
for &c in &fw[1..] {
|
|
|
|
|
|
fw_strs.push(c);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Mark end of per-char word sublist
|
|
|
|
|
|
fw_strs.push(b'\0');
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Calc long strs
|
|
|
|
|
|
for c in 0..256 {
|
|
|
|
|
|
for fw in &sorted_fws[c] {
|
|
|
|
|
|
let mut buf = [0u8; 16];
|
|
|
|
|
|
buf[..fw.len()].copy_from_slice(fw);
|
|
|
|
|
|
let val = u128::from_le_bytes(buf);
|
|
|
|
|
|
|
|
|
|
|
|
fw_long_strs[c].push(val);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// DEBUG:
|
|
|
|
|
|
println!("strs len: {}", fw_strs.len());
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(fw_dir.len(), 256);
|
|
|
|
|
|
assert!(fw_strs.len() <= 256);
|
|
|
|
|
|
|
|
|
|
|
|
fw_strs.resize(256, 0);
|
|
|
|
|
|
|
|
|
|
|
|
let tab = FwTab {
|
|
|
|
|
|
dir: fw_dir,
|
|
|
|
|
|
dir_len_bits: fw_dir_len_bits,
|
|
|
|
|
|
// dir_and_len_bits: array::from_fn(|idx| {
|
|
|
|
|
|
// (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24)
|
|
|
|
|
|
// }),
|
|
|
|
|
|
strs: fw_strs.try_into().unwrap(),
|
|
|
|
|
|
long_strs: fw_long_strs.map(|vec| &*vec.leak()), // Too lazy, not needed anyways
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// DEBUG: Test some strings
|
|
|
|
|
|
unsafe {
|
|
|
|
|
|
dbg!(tab.lookup(b"cpm"));
|
|
|
|
|
|
dbg!(tab.lookup(b"com"));
|
|
|
|
|
|
dbg!(tab.lookup(b"coma"));
|
|
|
|
|
|
dbg!(tab.lookup(b"co"));
|
|
|
|
|
|
dbg!(tab.lookup(b"cam"));
|
|
|
|
|
|
dbg!(tab.lookup(b"crypto"));
|
|
|
|
|
|
dbg!(tab.lookup(b"@"));
|
|
|
|
|
|
// dbg!(tab.lookup(b"")); // we require that words must be non-empty!
|
|
|
|
|
|
dbg!(tab.lookup(b" "));
|
|
|
|
|
|
dbg!(tab.lookup(b"test"));
|
|
|
|
|
|
dbg!(tab.lookup(b"expers"));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
println!("static FW_TAB: FwTab = FwTab {{");
|
|
|
|
|
|
|
|
|
|
|
|
// println!("\tdir_and_len_bits: [");
|
|
|
|
|
|
// for chunk in self.dir_and_len_bits.chunks(16) {
|
|
|
|
|
|
// print!("\t\t");
|
|
|
|
|
|
// for &b in chunk {
|
|
|
|
|
|
// print!("0x{b:08x}, ");
|
|
|
|
|
|
// }
|
|
|
|
|
|
// println!();
|
|
|
|
|
|
// }
|
|
|
|
|
|
// println!("\t],");
|
|
|
|
|
|
|
|
|
|
|
|
println!("\tdir: [");
|
|
|
|
|
|
for chunk in tab.dir.chunks(16) {
|
|
|
|
|
|
print!("\t\t");
|
|
|
|
|
|
for &b in chunk {
|
|
|
|
|
|
print!("0x{b:02x}, ");
|
|
|
|
|
|
}
|
|
|
|
|
|
println!();
|
|
|
|
|
|
}
|
|
|
|
|
|
println!("\t],");
|
|
|
|
|
|
|
|
|
|
|
|
println!("\tdir_len_bits: [");
|
|
|
|
|
|
for chunk in tab.dir_len_bits.chunks(16) {
|
|
|
|
|
|
print!("\t\t");
|
|
|
|
|
|
for &b in chunk {
|
|
|
|
|
|
print!("0x{b:04x}, ");
|
|
|
|
|
|
}
|
|
|
|
|
|
println!();
|
|
|
|
|
|
}
|
|
|
|
|
|
println!("\t],");
|
|
|
|
|
|
|
|
|
|
|
|
println!("\tstrs: [");
|
|
|
|
|
|
for chunk in tab.strs.chunks(16) {
|
|
|
|
|
|
print!("\t\t");
|
|
|
|
|
|
for &b in chunk {
|
|
|
|
|
|
print!("0x{b:02x}, ");
|
|
|
|
|
|
}
|
|
|
|
|
|
println!();
|
|
|
|
|
|
}
|
|
|
|
|
|
println!("\t],");
|
|
|
|
|
|
|
|
|
|
|
|
print!("\tlong_strs: [");
|
|
|
|
|
|
for c in 0..256 {
|
|
|
|
|
|
// print!("\t\t");
|
|
|
|
|
|
print!("&[");
|
|
|
|
|
|
for &fw_val in tab.long_strs[c] {
|
|
|
|
|
|
print!("0x{:x}, ", fw_val);
|
|
|
|
|
|
}
|
|
|
|
|
|
print!("],");
|
|
|
|
|
|
// println!();
|
|
|
|
|
|
}
|
|
|
|
|
|
println!("],");
|
|
|
|
|
|
|
|
|
|
|
|
println!("}};");
|
|
|
|
|
|
|
|
|
|
|
|
tab
|
|
|
|
|
|
}
|
2024-11-30 23:46:08 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
#[inline]
|
|
|
|
|
|
pub unsafe fn lookup_b(&self, word: &[u8]) -> bool {
|
|
|
|
|
|
let first_char = unsafe { *word.get_unchecked(0) };
|
|
|
|
|
|
let strs = self.long_strs[first_char as usize];
|
|
|
|
|
|
if strs.len() == 0 || word.len() >= 16 {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Mask word
|
|
|
|
|
|
let mask = !(u128::MAX << (word.len() * 8));
|
|
|
|
|
|
let word_int = unsafe {
|
|
|
|
|
|
word.as_ptr()
|
|
|
|
|
|
.cast::<u128>()
|
|
|
|
|
|
.read_unaligned() & mask
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
let mut i = 0;
|
|
|
|
|
|
while i < strs.len() {
|
|
|
|
|
|
if strs[i] == word_int {
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
i += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
false
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
|
|
pub unsafe fn lookup(&self, word: &[u8]) -> bool {
|
|
|
|
|
|
// let &[first_char, ..] = word else {
|
|
|
|
|
|
// return false;
|
|
|
|
|
|
// };
|
|
|
|
|
|
let first_char = unsafe { *word.get_unchecked(0) };
|
|
|
|
|
|
|
|
|
|
|
|
// let dir_and_len_bits = unsafe {
|
|
|
|
|
|
// *self.dir_and_len_bits.get_unchecked(first_char as usize)
|
|
|
|
|
|
// };
|
|
|
|
|
|
// if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 {
|
|
|
|
|
|
// return false;
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
|
|
let len_bits = unsafe {
|
|
|
|
|
|
*self.dir_len_bits.get_unchecked(first_char as usize)
|
|
|
|
|
|
};
|
|
|
|
|
|
if hint::likely(word.len() < 16 && ((len_bits >> word.len()) & 0x1) == 0) {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// let mut str_offset = (dir_and_len_bits >> 24) as usize;
|
|
|
|
|
|
let mut str_offset = unsafe {
|
|
|
|
|
|
*self.dir.get_unchecked(first_char as usize) as usize
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Char doesn't have any strings in the table
|
|
|
|
|
|
if str_offset == 0 {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Iterate over strs
|
|
|
|
|
|
loop {
|
|
|
|
|
|
// let fw_len = u16::from_le_bytes([
|
|
|
|
|
|
// self.strs[str_offset],
|
|
|
|
|
|
// self.strs[str_offset+1]
|
|
|
|
|
|
// ]);
|
|
|
|
|
|
let fw_len: u8 = unsafe {
|
|
|
|
|
|
*self.strs.get_unchecked(str_offset)
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
if fw_len == 0 {
|
|
|
|
|
|
// We've reached the end of the word sublist
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Only compare words if they are the same length
|
|
|
|
|
|
if hint::unlikely(word.len() == fw_len as usize) {
|
|
|
|
|
|
// Compare strs
|
|
|
|
|
|
let mut char_offset = 1usize;
|
|
|
|
|
|
loop {
|
|
|
|
|
|
// Found the word!
|
|
|
|
|
|
if char_offset == word.len() {
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let fw_char = unsafe { *self.strs.get_unchecked(str_offset + char_offset) };
|
|
|
|
|
|
let word_char = unsafe { *word.get_unchecked(char_offset) };
|
|
|
|
|
|
|
|
|
|
|
|
if fw_char > word_char {
|
|
|
|
|
|
// Word can't possible be in the sorted list, return
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
if fw_char < word_char {
|
|
|
|
|
|
// Try next word
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
char_offset += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Advance to next word
|
|
|
|
|
|
// let str_len_bytes = 2;
|
|
|
|
|
|
let str_len_bytes = 1;
|
|
|
|
|
|
str_offset += (fw_len as usize - 1) + str_len_bytes;
|
|
|
|
|
|
}
|
2024-11-30 23:46:08 +01:00
|
|
|
|
}
|
2024-11-30 20:58:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
const FORBIDDEN_WORDS: [&'static [u8]; 35] = [
|
|
|
|
|
|
b"recovery",
|
|
|
|
|
|
b"techie",
|
|
|
|
|
|
b"http",
|
|
|
|
|
|
b"https",
|
|
|
|
|
|
b"digital",
|
|
|
|
|
|
b"hack",
|
|
|
|
|
|
b"::",
|
|
|
|
|
|
b"//",
|
|
|
|
|
|
b"com",
|
|
|
|
|
|
b"@",
|
|
|
|
|
|
b"crypto",
|
|
|
|
|
|
b"bitcoin",
|
|
|
|
|
|
b"wallet",
|
|
|
|
|
|
b"hacker",
|
|
|
|
|
|
b"welcome",
|
|
|
|
|
|
b"whatsapp",
|
|
|
|
|
|
b"email",
|
|
|
|
|
|
b"cryptocurrency",
|
|
|
|
|
|
b"stolen",
|
|
|
|
|
|
b"freeze",
|
|
|
|
|
|
b"quick",
|
|
|
|
|
|
b"crucial",
|
|
|
|
|
|
b"tracing",
|
|
|
|
|
|
b"scammers",
|
|
|
|
|
|
b"expers",
|
|
|
|
|
|
b"hire",
|
|
|
|
|
|
b"century",
|
|
|
|
|
|
b"transaction",
|
|
|
|
|
|
b"essential",
|
|
|
|
|
|
b"managing",
|
|
|
|
|
|
b"contact",
|
|
|
|
|
|
b"contacting",
|
|
|
|
|
|
b"understanding",
|
|
|
|
|
|
b"assets",
|
|
|
|
|
|
b"funds",
|
2024-11-30 20:58:19 +01:00
|
|
|
|
];
|
|
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
static FW_TAB: FwTab = FwTab {
|
|
|
|
|
|
dir: [
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
],
|
|
|
|
|
|
dir_len_bits: [
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0040, 0x0080, 0x44c8, 0x0080, 0x0260, 0x0060, 0x0000, 0x0070, 0x0000, 0x0000, 0x0000, 0x0000, 0x0100, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0020, 0x0100, 0x0140, 0x08c0, 0x2000, 0x0000, 0x01c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
|
|
|
|
|
],
|
|
|
|
|
|
strs: [
|
|
|
|
|
|
0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
|
|
|
|
|
|
0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
|
|
|
|
|
|
0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
|
|
|
|
|
|
0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
|
|
|
|
|
|
0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
|
|
|
|
|
|
0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
|
|
|
|
|
|
0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
|
|
|
|
|
|
0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
|
|
|
|
|
|
0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
|
|
|
|
|
|
0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
|
|
|
|
|
|
0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
|
|
|
|
|
|
0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
|
|
|
|
|
|
0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
|
|
|
|
|
|
0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
|
|
|
|
|
|
0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
|
|
|
|
|
|
0x61, 0x70, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
],
|
|
|
|
|
|
long_strs: [&[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[0x2f2f, ], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[0x3a3a, ], &[], &[], &[], &[], &[], &[0x40, ], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[0x737465737361, ], &[0x6e696f63746962, ], &[0x797275746e6563, 0x6d6f63, 0x746361746e6f63, 0x676e69746361746e6f63, 0x6c616963757263, 0x6f7470797263, 0x79636e65727275636f7470797263, ], &[0x6c617469676964, ], &[0x6c69616d65, 0x6c6169746e65737365, 0x737265707865, ], &[0x657a65657266, 0x73646e7566, ], &[], &[0x6b636168, 0x72656b636168, 0x65726968, 0x70747468, 0x7370747468, ], &[], &[], &[], &[], &[0x676e6967616e616d, ], &[], &[], &[], &[0x6b63697571, ], &[0x797265766f636572, ], &[0x7372656d6d616373, 0x6e656c6f7473, ], &[0x656968636574, 0x676e6963617274, 0x6e6f69746361736e617274, ], &[0x676e69646e6174737265646e75, ], &[], &[0x74656c6c6177, 0x656d6f636c6577, 0x7070617374616877, ], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], &[], ],
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// NOTE: perfect hashing was kinda slow
|
|
|
|
|
|
//static FW_PHF: phf::Set<&'static [u8]> = phf::phf_set! {
|
|
|
|
|
|
// b"recovery",
|
|
|
|
|
|
// b"techie",
|
|
|
|
|
|
// b"http",
|
|
|
|
|
|
// b"https",
|
|
|
|
|
|
// b"digital",
|
|
|
|
|
|
// b"hack",
|
|
|
|
|
|
// b"::",
|
|
|
|
|
|
// b"//",
|
|
|
|
|
|
// b"com",
|
|
|
|
|
|
// b"@",
|
|
|
|
|
|
// b"crypto",
|
|
|
|
|
|
// b"bitcoin",
|
|
|
|
|
|
// b"wallet",
|
|
|
|
|
|
// b"hacker",
|
|
|
|
|
|
// b"welcome",
|
|
|
|
|
|
// b"whatsapp",
|
|
|
|
|
|
// b"email",
|
|
|
|
|
|
// b"cryptocurrency",
|
|
|
|
|
|
// b"stolen",
|
|
|
|
|
|
// b"freeze",
|
|
|
|
|
|
// b"quick",
|
|
|
|
|
|
// b"crucial",
|
|
|
|
|
|
// b"tracing",
|
|
|
|
|
|
// b"scammers",
|
|
|
|
|
|
// b"expers",
|
|
|
|
|
|
// b"hire",
|
|
|
|
|
|
// b"century",
|
|
|
|
|
|
// b"transaction",
|
|
|
|
|
|
// b"essential",
|
|
|
|
|
|
// b"managing",
|
|
|
|
|
|
// b"contact",
|
|
|
|
|
|
// b"contacting",
|
|
|
|
|
|
// b"understanding",
|
|
|
|
|
|
// b"assets",
|
|
|
|
|
|
// b"funds",
|
|
|
|
|
|
//};
|
2024-12-01 23:23:32 +01:00
|
|
|
|
|
2025-10-06 13:39:29 +02:00
|
|
|
|
//static FW_TAB_DIR: [u8; 256] = [
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
|
|
//];
|
|
|
|
|
|
//static FW_TAB_STRS: [u8; 244] = [
|
|
|
|
|
|
// 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
|
|
|
|
|
|
// 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
|
|
|
|
|
|
// 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
|
|
|
|
|
|
// 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
|
|
|
|
|
|
// 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
|
|
|
|
|
|
// 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
|
|
|
|
|
|
// 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
|
|
|
|
|
|
// 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
|
|
|
|
|
|
// 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
|
|
|
|
|
|
// 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
|
|
|
|
|
|
// 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
|
|
|
|
|
|
// 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
|
|
|
|
|
|
// 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
|
|
|
|
|
|
// 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
|
|
|
|
|
|
// 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
|
|
|
|
|
|
// 0x61, 0x70, 0x70, 0x00,
|
|
|
|
|
|
//];
|