fxhash and I remembered how to static lazy right
Some checks failed
isspam build / build (push) Failing after 2m33s

This commit is contained in:
JestDotty 2025-03-24 00:20:15 -04:00
parent 5b8dd08348
commit b711d5a908
4 changed files with 59 additions and 62 deletions

View File

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2024" edition = "2024"
[dependencies] [dependencies]
fxhash = "0.2.1"
tokio = { version = "1.44.1", features = ["full"] } tokio = { version = "1.44.1", features = ["full"] }
[profile.release] [profile.release]

View File

@ -146,6 +146,7 @@ capitalized word percentage: 2%
benchmark: 1588ms benchmark: 1588ms
``` ```
with fxhash trie: 1200ms
muncher: muncher:
``` ```

View File

@ -2,12 +2,11 @@ mod stats;
mod trie; mod trie;
use stats::Stats; use stats::Stats;
use std::{cell::LazyCell, env, fs}; use std::{env, fs, sync::LazyLock};
use tokio::sync::mpsc; use tokio::sync::mpsc;
use trie::Trie; use trie::Trie;
thread_local! { static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
static FORBIDON: LazyCell<Trie> = LazyCell::new(|| {
let mut trie = Trie::default(); let mut trie = Trie::default();
for word in [ for word in [
"recovery", "recovery",
@ -50,7 +49,6 @@ thread_local! {
} }
trie trie
}); });
}
impl Stats { impl Stats {
pub fn process(&mut self, file: &str) { pub fn process(&mut self, file: &str) {
@ -88,11 +86,9 @@ impl Stats {
capitalized = true; capitalized = true;
} }
let lowercase_word = word.to_lowercase(); let lowercase_word = word.to_lowercase();
FORBIDON.with(|trie| { if FORBIDDEN_WORDS.contains(&lowercase_word) {
if trie.contains(&lowercase_word) {
self.forbidden_count += 1; self.forbidden_count += 1;
} }
});
word = String::new(); word = String::new();
continue; continue;
} }
@ -112,11 +108,9 @@ impl Stats {
capitalized = true; capitalized = true;
} }
let lowercase_word = word.to_lowercase(); let lowercase_word = word.to_lowercase();
FORBIDON.with(|trie| { if FORBIDDEN_WORDS.contains(&lowercase_word) {
if trie.contains(&lowercase_word) {
self.forbidden_count += 1; self.forbidden_count += 1;
} }
});
word = String::new(); word = String::new();
continue; continue;
} }
@ -161,11 +155,9 @@ impl Stats {
self.capitalized_count += 1; self.capitalized_count += 1;
} }
let lowercase_word = word.to_lowercase(); let lowercase_word = word.to_lowercase();
FORBIDON.with(|trie| { if FORBIDDEN_WORDS.contains(&lowercase_word) {
if trie.contains(&lowercase_word) {
self.forbidden_count += 1; self.forbidden_count += 1;
} }
});
} }
} }
} }

View File

@ -1,9 +1,12 @@
use fxhash::FxBuildHasher;
use std::collections::HashMap; use std::collections::HashMap;
type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; //simpler, slightly faster
#[derive(Default, Debug, Clone)] #[derive(Default, Debug, Clone)]
struct Node { struct Node {
end: bool, end: bool,
children: HashMap<char, Node>, children: FxHashMap<char, Node>,
} }
#[derive(Default, Debug, Clone)] #[derive(Default, Debug, Clone)]
pub struct Trie { pub struct Trie {