fxhash and I remembered how to static lazy right

This commit is contained in:
JestDotty 2025-03-24 00:20:15 -04:00
parent 5b8dd08348
commit b711d5a908
4 changed files with 59 additions and 62 deletions

View File

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2024" edition = "2024"
[dependencies] [dependencies]
fxhash = "0.2.1"
tokio = { version = "1.44.1", features = ["full"] } tokio = { version = "1.44.1", features = ["full"] }
[profile.release] [profile.release]

View File

@ -146,6 +146,7 @@ capitalized word percentage: 2%
benchmark: 1588ms benchmark: 1588ms
``` ```
with fxhash trie: 1200ms
muncher: muncher:
``` ```

View File

@ -2,55 +2,53 @@ mod stats;
mod trie; mod trie;
use stats::Stats; use stats::Stats;
use std::{cell::LazyCell, env, fs}; use std::{env, fs, sync::LazyLock};
use tokio::sync::mpsc; use tokio::sync::mpsc;
use trie::Trie; use trie::Trie;
thread_local! { static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
static FORBIDON: LazyCell<Trie> = LazyCell::new(|| { let mut trie = Trie::default();
let mut trie = Trie::default(); for word in [
for word in [ "recovery",
"recovery", "techie",
"techie", "http",
"http", "https",
"https", "digital",
"digital", "hack",
"hack", "::",
"::", "//",
"//", "@",
"@", "com",
"com", "crypto",
"crypto", "bitcoin",
"bitcoin", "wallet",
"wallet", "hacker",
"hacker", "welcome",
"welcome", "whatsapp",
"whatsapp", "email",
"email", "cryptocurrency",
"cryptocurrency", "stolen",
"stolen", "freeze",
"freeze", "quick",
"quick", "crucial",
"crucial", "tracing",
"tracing", "scammers",
"scammers", "expers",
"expers", "hire",
"hire", "century",
"century", "transaction",
"transaction", "essential",
"essential", "managing",
"managing", "contact",
"contact", "contacting",
"contacting", "understanding",
"understanding", "assets",
"assets", "funds",
"funds", ] {
] { trie.insert(word);
trie.insert(word); }
} trie
trie });
});
}
impl Stats { impl Stats {
pub fn process(&mut self, file: &str) { pub fn process(&mut self, file: &str) {
@ -88,11 +86,9 @@ impl Stats {
capitalized = true; capitalized = true;
} }
let lowercase_word = word.to_lowercase(); let lowercase_word = word.to_lowercase();
FORBIDON.with(|trie| { if FORBIDDEN_WORDS.contains(&lowercase_word) {
if trie.contains(&lowercase_word) { self.forbidden_count += 1;
self.forbidden_count += 1; }
}
});
word = String::new(); word = String::new();
continue; continue;
} }
@ -112,11 +108,9 @@ impl Stats {
capitalized = true; capitalized = true;
} }
let lowercase_word = word.to_lowercase(); let lowercase_word = word.to_lowercase();
FORBIDON.with(|trie| { if FORBIDDEN_WORDS.contains(&lowercase_word) {
if trie.contains(&lowercase_word) { self.forbidden_count += 1;
self.forbidden_count += 1; }
}
});
word = String::new(); word = String::new();
continue; continue;
} }
@ -161,11 +155,9 @@ impl Stats {
self.capitalized_count += 1; self.capitalized_count += 1;
} }
let lowercase_word = word.to_lowercase(); let lowercase_word = word.to_lowercase();
FORBIDON.with(|trie| { if FORBIDDEN_WORDS.contains(&lowercase_word) {
if trie.contains(&lowercase_word) { self.forbidden_count += 1;
self.forbidden_count += 1; }
}
});
} }
} }
} }

View File

@ -1,9 +1,12 @@
use fxhash::FxBuildHasher;
use std::collections::HashMap; use std::collections::HashMap;
type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; //simpler, slightly faster
#[derive(Default, Debug, Clone)] #[derive(Default, Debug, Clone)]
struct Node { struct Node {
end: bool, end: bool,
children: HashMap<char, Node>, children: FxHashMap<char, Node>,
} }
#[derive(Default, Debug, Clone)] #[derive(Default, Debug, Clone)]
pub struct Trie { pub struct Trie {