diff --git a/jest_rust/Cargo.toml b/jest_rust/Cargo.toml index 125db01..4143cb2 100644 --- a/jest_rust/Cargo.toml +++ b/jest_rust/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] +fxhash = "0.2.1" tokio = { version = "1.44.1", features = ["full"] } [profile.release] diff --git a/jest_rust/README.md b/jest_rust/README.md index 7eb3278..70734d4 100644 --- a/jest_rust/README.md +++ b/jest_rust/README.md @@ -146,6 +146,7 @@ capitalized word percentage: 2% benchmark: 1588ms ``` +with fxhash trie: 1200ms muncher: ``` diff --git a/jest_rust/src/main.rs b/jest_rust/src/main.rs index 1f5464d..9adf148 100644 --- a/jest_rust/src/main.rs +++ b/jest_rust/src/main.rs @@ -2,55 +2,53 @@ mod stats; mod trie; use stats::Stats; -use std::{cell::LazyCell, env, fs}; +use std::{env, fs, sync::LazyLock}; use tokio::sync::mpsc; use trie::Trie; -thread_local! { - static FORBIDON: LazyCell<Trie> = LazyCell::new(|| { - let mut trie = Trie::default(); - for word in [ - "recovery", - "techie", - "http", - "https", - "digital", - "hack", - "::", - "//", - "@", - "com", - "crypto", - "bitcoin", - "wallet", - "hacker", - "welcome", - "whatsapp", - "email", - "cryptocurrency", - "stolen", - "freeze", - "quick", - "crucial", - "tracing", - "scammers", - "expers", - "hire", - "century", - "transaction", - "essential", - "managing", - "contact", - "contacting", - "understanding", - "assets", - "funds", - ] { - trie.insert(word); - } - trie - }); -} +static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| { + let mut trie = Trie::default(); + for word in [ + "recovery", + "techie", + "http", + "https", + "digital", + "hack", + "::", + "//", + "@", + "com", + "crypto", + "bitcoin", + "wallet", + "hacker", + "welcome", + "whatsapp", + "email", + "cryptocurrency", + "stolen", + "freeze", + "quick", + "crucial", + "tracing", + "scammers", + "expers", + "hire", + "century", + "transaction", + "essential", + "managing", + "contact", + "contacting", + "understanding", + "assets", + "funds", + ] { + trie.insert(word); + } + trie +}); impl Stats { pub fn process(&mut self, file: &str) { @@ -88,11 +86,9 @@ impl Stats { capitalized = true; } let lowercase_word = word.to_lowercase(); - FORBIDON.with(|trie| { - if trie.contains(&lowercase_word) { - self.forbidden_count += 1; - } - }); + if FORBIDDEN_WORDS.contains(&lowercase_word) { + self.forbidden_count += 1; + } word = String::new(); continue; } @@ -112,11 +108,9 @@ impl Stats { capitalized = true; } let lowercase_word = word.to_lowercase(); - FORBIDON.with(|trie| { - if trie.contains(&lowercase_word) { - self.forbidden_count += 1; - } - }); + if FORBIDDEN_WORDS.contains(&lowercase_word) { + self.forbidden_count += 1; + } word = String::new(); continue; } @@ -161,11 +155,9 @@ impl Stats { self.capitalized_count += 1; } let lowercase_word = word.to_lowercase(); - FORBIDON.with(|trie| { - if trie.contains(&lowercase_word) { - self.forbidden_count += 1; - } - }); + if FORBIDDEN_WORDS.contains(&lowercase_word) { + self.forbidden_count += 1; + } } } } diff --git a/jest_rust/src/trie.rs b/jest_rust/src/trie.rs index f6be9f4..27ebaef 100644 --- a/jest_rust/src/trie.rs +++ b/jest_rust/src/trie.rs @@ -1,9 +1,12 @@ +use fxhash::FxBuildHasher; use std::collections::HashMap; +type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; //simpler, slightly faster + #[derive(Default, Debug, Clone)] struct Node { end: bool, - children: HashMap<char, Node>, + children: FxHashMap<char, Node>, } #[derive(Default, Debug, Clone)] pub struct Trie {