fxhash and I remembered how to static lazy right

This commit is contained in:
JestDotty 2025-03-24 00:20:15 -04:00
parent 5b8dd08348
commit b711d5a908
4 changed files with 59 additions and 62 deletions

View File

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2024"
[dependencies]
fxhash = "0.2.1"
tokio = { version = "1.44.1", features = ["full"] }
[profile.release]

View File

@ -146,6 +146,7 @@ capitalized word percentage: 2%
benchmark: 1588ms
```
with fxhash trie: 1200ms
muncher:
```

View File

@ -2,55 +2,53 @@ mod stats;
mod trie;
use stats::Stats;
use std::{cell::LazyCell, env, fs};
use std::{env, fs, sync::LazyLock};
use tokio::sync::mpsc;
use trie::Trie;
thread_local! {
static FORBIDON: LazyCell<Trie> = LazyCell::new(|| {
let mut trie = Trie::default();
for word in [
"recovery",
"techie",
"http",
"https",
"digital",
"hack",
"::",
"//",
"@",
"com",
"crypto",
"bitcoin",
"wallet",
"hacker",
"welcome",
"whatsapp",
"email",
"cryptocurrency",
"stolen",
"freeze",
"quick",
"crucial",
"tracing",
"scammers",
"expers",
"hire",
"century",
"transaction",
"essential",
"managing",
"contact",
"contacting",
"understanding",
"assets",
"funds",
] {
trie.insert(word);
}
trie
});
}
static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
let mut trie = Trie::default();
for word in [
"recovery",
"techie",
"http",
"https",
"digital",
"hack",
"::",
"//",
"@",
"com",
"crypto",
"bitcoin",
"wallet",
"hacker",
"welcome",
"whatsapp",
"email",
"cryptocurrency",
"stolen",
"freeze",
"quick",
"crucial",
"tracing",
"scammers",
"expers",
"hire",
"century",
"transaction",
"essential",
"managing",
"contact",
"contacting",
"understanding",
"assets",
"funds",
] {
trie.insert(word);
}
trie
});
impl Stats {
pub fn process(&mut self, file: &str) {
@ -88,11 +86,9 @@ impl Stats {
capitalized = true;
}
let lowercase_word = word.to_lowercase();
FORBIDON.with(|trie| {
if trie.contains(&lowercase_word) {
self.forbidden_count += 1;
}
});
if FORBIDDEN_WORDS.contains(&lowercase_word) {
self.forbidden_count += 1;
}
word = String::new();
continue;
}
@ -112,11 +108,9 @@ impl Stats {
capitalized = true;
}
let lowercase_word = word.to_lowercase();
FORBIDON.with(|trie| {
if trie.contains(&lowercase_word) {
self.forbidden_count += 1;
}
});
if FORBIDDEN_WORDS.contains(&lowercase_word) {
self.forbidden_count += 1;
}
word = String::new();
continue;
}
@ -161,11 +155,9 @@ impl Stats {
self.capitalized_count += 1;
}
let lowercase_word = word.to_lowercase();
FORBIDON.with(|trie| {
if trie.contains(&lowercase_word) {
self.forbidden_count += 1;
}
});
if FORBIDDEN_WORDS.contains(&lowercase_word) {
self.forbidden_count += 1;
}
}
}
}

View File

@ -1,9 +1,12 @@
use fxhash::FxBuildHasher;
use std::collections::HashMap;
type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; //simpler, slightly faster
#[derive(Default, Debug, Clone)]
struct Node {
end: bool,
children: HashMap<char, Node>,
children: FxHashMap<char, Node>,
}
#[derive(Default, Debug, Clone)]
pub struct Trie {