fxhash and I remembered how to static lazy right
Some checks failed
isspam build / build (push) Failing after 2m33s
Some checks failed
isspam build / build (push) Failing after 2m33s
This commit is contained in:
parent
5b8dd08348
commit
b711d5a908
@ -4,6 +4,7 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
fxhash = "0.2.1"
|
||||||
tokio = { version = "1.44.1", features = ["full"] }
|
tokio = { version = "1.44.1", features = ["full"] }
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
|
@ -146,6 +146,7 @@ capitalized word percentage: 2%
|
|||||||
|
|
||||||
benchmark: 1588ms
|
benchmark: 1588ms
|
||||||
```
|
```
|
||||||
|
with fxhash trie: 1200ms
|
||||||
|
|
||||||
muncher:
|
muncher:
|
||||||
```
|
```
|
||||||
|
@ -2,12 +2,11 @@ mod stats;
|
|||||||
mod trie;
|
mod trie;
|
||||||
|
|
||||||
use stats::Stats;
|
use stats::Stats;
|
||||||
use std::{cell::LazyCell, env, fs};
|
use std::{env, fs, sync::LazyLock};
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use trie::Trie;
|
use trie::Trie;
|
||||||
|
|
||||||
thread_local! {
|
static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
|
||||||
static FORBIDON: LazyCell<Trie> = LazyCell::new(|| {
|
|
||||||
let mut trie = Trie::default();
|
let mut trie = Trie::default();
|
||||||
for word in [
|
for word in [
|
||||||
"recovery",
|
"recovery",
|
||||||
@ -49,8 +48,7 @@ thread_local! {
|
|||||||
trie.insert(word);
|
trie.insert(word);
|
||||||
}
|
}
|
||||||
trie
|
trie
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
impl Stats {
|
impl Stats {
|
||||||
pub fn process(&mut self, file: &str) {
|
pub fn process(&mut self, file: &str) {
|
||||||
@ -88,11 +86,9 @@ impl Stats {
|
|||||||
capitalized = true;
|
capitalized = true;
|
||||||
}
|
}
|
||||||
let lowercase_word = word.to_lowercase();
|
let lowercase_word = word.to_lowercase();
|
||||||
FORBIDON.with(|trie| {
|
if FORBIDDEN_WORDS.contains(&lowercase_word) {
|
||||||
if trie.contains(&lowercase_word) {
|
|
||||||
self.forbidden_count += 1;
|
self.forbidden_count += 1;
|
||||||
}
|
}
|
||||||
});
|
|
||||||
word = String::new();
|
word = String::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -112,11 +108,9 @@ impl Stats {
|
|||||||
capitalized = true;
|
capitalized = true;
|
||||||
}
|
}
|
||||||
let lowercase_word = word.to_lowercase();
|
let lowercase_word = word.to_lowercase();
|
||||||
FORBIDON.with(|trie| {
|
if FORBIDDEN_WORDS.contains(&lowercase_word) {
|
||||||
if trie.contains(&lowercase_word) {
|
|
||||||
self.forbidden_count += 1;
|
self.forbidden_count += 1;
|
||||||
}
|
}
|
||||||
});
|
|
||||||
word = String::new();
|
word = String::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -161,11 +155,9 @@ impl Stats {
|
|||||||
self.capitalized_count += 1;
|
self.capitalized_count += 1;
|
||||||
}
|
}
|
||||||
let lowercase_word = word.to_lowercase();
|
let lowercase_word = word.to_lowercase();
|
||||||
FORBIDON.with(|trie| {
|
if FORBIDDEN_WORDS.contains(&lowercase_word) {
|
||||||
if trie.contains(&lowercase_word) {
|
|
||||||
self.forbidden_count += 1;
|
self.forbidden_count += 1;
|
||||||
}
|
}
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
|
use fxhash::FxBuildHasher;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; //simpler, slightly faster
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone)]
|
#[derive(Default, Debug, Clone)]
|
||||||
struct Node {
|
struct Node {
|
||||||
end: bool,
|
end: bool,
|
||||||
children: HashMap<char, Node>,
|
children: FxHashMap<char, Node>,
|
||||||
}
|
}
|
||||||
#[derive(Default, Debug, Clone)]
|
#[derive(Default, Debug, Clone)]
|
||||||
pub struct Trie {
|
pub struct Trie {
|
||||||
|
Loading…
Reference in New Issue
Block a user