parent
5b8dd08348
commit
b711d5a908
jest_rust
@ -4,6 +4,7 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
fxhash = "0.2.1"
|
||||||
tokio = { version = "1.44.1", features = ["full"] }
|
tokio = { version = "1.44.1", features = ["full"] }
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
|
@ -146,6 +146,7 @@ capitalized word percentage: 2%
|
|||||||
|
|
||||||
benchmark: 1588ms
|
benchmark: 1588ms
|
||||||
```
|
```
|
||||||
|
with fxhash trie: 1200ms
|
||||||
|
|
||||||
muncher:
|
muncher:
|
||||||
```
|
```
|
||||||
|
@ -2,55 +2,53 @@ mod stats;
|
|||||||
mod trie;
|
mod trie;
|
||||||
|
|
||||||
use stats::Stats;
|
use stats::Stats;
|
||||||
use std::{cell::LazyCell, env, fs};
|
use std::{env, fs, sync::LazyLock};
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use trie::Trie;
|
use trie::Trie;
|
||||||
|
|
||||||
thread_local! {
|
static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
|
||||||
static FORBIDON: LazyCell<Trie> = LazyCell::new(|| {
|
let mut trie = Trie::default();
|
||||||
let mut trie = Trie::default();
|
for word in [
|
||||||
for word in [
|
"recovery",
|
||||||
"recovery",
|
"techie",
|
||||||
"techie",
|
"http",
|
||||||
"http",
|
"https",
|
||||||
"https",
|
"digital",
|
||||||
"digital",
|
"hack",
|
||||||
"hack",
|
"::",
|
||||||
"::",
|
"//",
|
||||||
"//",
|
"@",
|
||||||
"@",
|
"com",
|
||||||
"com",
|
"crypto",
|
||||||
"crypto",
|
"bitcoin",
|
||||||
"bitcoin",
|
"wallet",
|
||||||
"wallet",
|
"hacker",
|
||||||
"hacker",
|
"welcome",
|
||||||
"welcome",
|
"whatsapp",
|
||||||
"whatsapp",
|
"email",
|
||||||
"email",
|
"cryptocurrency",
|
||||||
"cryptocurrency",
|
"stolen",
|
||||||
"stolen",
|
"freeze",
|
||||||
"freeze",
|
"quick",
|
||||||
"quick",
|
"crucial",
|
||||||
"crucial",
|
"tracing",
|
||||||
"tracing",
|
"scammers",
|
||||||
"scammers",
|
"expers",
|
||||||
"expers",
|
"hire",
|
||||||
"hire",
|
"century",
|
||||||
"century",
|
"transaction",
|
||||||
"transaction",
|
"essential",
|
||||||
"essential",
|
"managing",
|
||||||
"managing",
|
"contact",
|
||||||
"contact",
|
"contacting",
|
||||||
"contacting",
|
"understanding",
|
||||||
"understanding",
|
"assets",
|
||||||
"assets",
|
"funds",
|
||||||
"funds",
|
] {
|
||||||
] {
|
trie.insert(word);
|
||||||
trie.insert(word);
|
}
|
||||||
}
|
trie
|
||||||
trie
|
});
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Stats {
|
impl Stats {
|
||||||
pub fn process(&mut self, file: &str) {
|
pub fn process(&mut self, file: &str) {
|
||||||
@ -88,11 +86,9 @@ impl Stats {
|
|||||||
capitalized = true;
|
capitalized = true;
|
||||||
}
|
}
|
||||||
let lowercase_word = word.to_lowercase();
|
let lowercase_word = word.to_lowercase();
|
||||||
FORBIDON.with(|trie| {
|
if FORBIDDEN_WORDS.contains(&lowercase_word) {
|
||||||
if trie.contains(&lowercase_word) {
|
self.forbidden_count += 1;
|
||||||
self.forbidden_count += 1;
|
}
|
||||||
}
|
|
||||||
});
|
|
||||||
word = String::new();
|
word = String::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -112,11 +108,9 @@ impl Stats {
|
|||||||
capitalized = true;
|
capitalized = true;
|
||||||
}
|
}
|
||||||
let lowercase_word = word.to_lowercase();
|
let lowercase_word = word.to_lowercase();
|
||||||
FORBIDON.with(|trie| {
|
if FORBIDDEN_WORDS.contains(&lowercase_word) {
|
||||||
if trie.contains(&lowercase_word) {
|
self.forbidden_count += 1;
|
||||||
self.forbidden_count += 1;
|
}
|
||||||
}
|
|
||||||
});
|
|
||||||
word = String::new();
|
word = String::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -161,11 +155,9 @@ impl Stats {
|
|||||||
self.capitalized_count += 1;
|
self.capitalized_count += 1;
|
||||||
}
|
}
|
||||||
let lowercase_word = word.to_lowercase();
|
let lowercase_word = word.to_lowercase();
|
||||||
FORBIDON.with(|trie| {
|
if FORBIDDEN_WORDS.contains(&lowercase_word) {
|
||||||
if trie.contains(&lowercase_word) {
|
self.forbidden_count += 1;
|
||||||
self.forbidden_count += 1;
|
}
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
|
use fxhash::FxBuildHasher;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; //simpler, slightly faster
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone)]
|
#[derive(Default, Debug, Clone)]
|
||||||
struct Node {
|
struct Node {
|
||||||
end: bool,
|
end: bool,
|
||||||
children: HashMap<char, Node>,
|
children: FxHashMap<char, Node>,
|
||||||
}
|
}
|
||||||
#[derive(Default, Debug, Clone)]
|
#[derive(Default, Debug, Clone)]
|
||||||
pub struct Trie {
|
pub struct Trie {
|
||||||
|
Loading…
Reference in New Issue
Block a user