more tests
This commit is contained in:
parent
d73d4ff7c1
commit
c459fe6d79
@ -9,6 +9,7 @@ tokio = { version = "1.44.1", features = ["full"] }
|
||||
|
||||
[profile.release]
|
||||
codegen-units = 1 # less means more compile work but better optimized
|
||||
lto = "thin" # thin has best performance. fat the worst
|
||||
lto = "fat" # thin has best performance. fat the worst
|
||||
strip = true
|
||||
# opt-level = "z" # slows down
|
||||
panic = "abort"
|
||||
|
||||
@ -4,116 +4,28 @@ extract `../books.tar.gz`
|
||||
|
||||
# local machine benchmarks
|
||||
|
||||
single threaded: `Time Jest Rust: 33.63373279571533`
|
||||
single threaded: `33.63373279571533`
|
||||
|
||||
rayon: `Time Jest Rust: 4.294418811798096`
|
||||
rayon: `4.294418811798096`
|
||||
|
||||
tokio: `Time Jest Rust: 4.717588901519775`
|
||||
tokio: `4.717588901519775`
|
||||
|
||||
tokio:
|
||||
|
||||
muncher: `2486ms`
|
||||
|
||||
for_loops: `1227ms`
|
||||
|
||||
for_loops_forbidden_only: `987ms`
|
||||
|
||||
trie creation and stats accumulation take 0ms
|
||||
|
||||
## compile options benchmarks
|
||||
lto not thin: `Time Jest Rust: 5.306957483291626` slower
|
||||
`lto` thin, fat doesn't change much
|
||||
|
||||
lto fat: `Time Jest Rust: 5.413678407669067` slower
|
||||
`codegen-units` 0, 1 doesn't change much
|
||||
|
||||
codegen-units 1: `Time Jest Rust: 4.451631546020508` faster
|
||||
|
||||
opt-level z: `Time Jest Rust: 7.045313119888306` slower
|
||||
|
||||
strip true: `Time Jest Rust: 4.337219476699829` faster
|
||||
|
||||
lto true: `Time Jest Rust: 4.703521728515625` slower
|
||||
|
||||
lto none: `Time Jest Rust: 4.817203998565674`
|
||||
|
||||
lto thin: `Time Jest Rust: 4.429729223251343` faster
|
||||
|
||||
# data integrity
|
||||
(this isn't tested, just guessed, and I don't have data to compare it with)
|
||||
|
||||
for loops:
|
||||
```
|
||||
file count: 904
|
||||
failed file count: 0
|
||||
sentence count: 5602301
|
||||
word count: 81701260
|
||||
capitalized count: 1753639
|
||||
numeric count: 14981248
|
||||
forbidden count: 1237059
|
||||
words per sentence average: 14.6
|
||||
forbidden word percentage: 2%
|
||||
capitalized word percentage: 2%
|
||||
|
||||
benchmark: 5033ms
|
||||
```
|
||||
|
||||
muncher:
|
||||
```
|
||||
file count: 904
|
||||
failed file count: 0
|
||||
sentence count: 5338705
|
||||
word count: 86765116
|
||||
capitalized count: 13640820
|
||||
numeric count: 10902254
|
||||
forbidden count: 0
|
||||
words per sentence average: 16.3
|
||||
forbidden word percentage: 0%
|
||||
capitalized word percentage: 16%
|
||||
|
||||
benchmark: 504ms
|
||||
```
|
||||
with forbidden words:
|
||||
```
|
||||
file count: 904
|
||||
failed file count: 0
|
||||
sentence count: 5338705
|
||||
word count: 86765116
|
||||
capitalized count: 13640820
|
||||
numeric count: 10902254
|
||||
forbidden count: 279717
|
||||
words per sentence average: 16.3
|
||||
forbidden word percentage: 0%
|
||||
capitalized word percentage: 16%
|
||||
|
||||
benchmark: 6078ms
|
||||
```
|
||||
|
||||
# forbidden words benchmarks
|
||||
seems they take up about 4000ms to churn through in the original version
|
||||
|
||||
for loops count forbidden word once only:
|
||||
```
|
||||
file count: 904
|
||||
failed file count: 0
|
||||
sentence count: 5602301
|
||||
word count: 81701260
|
||||
capitalized count: 1753639
|
||||
numeric count: 14981248
|
||||
forbidden count: 1143234
|
||||
words per sentence average: 14.6
|
||||
forbidden word percentage: 1%
|
||||
capitalized word percentage: 2%
|
||||
|
||||
benchmark: 4737ms
|
||||
```
|
||||
for loops with trie:
|
||||
```
|
||||
file count: 904
|
||||
failed file count: 0
|
||||
sentence count: 5602301
|
||||
word count: 81701260
|
||||
capitalized count: 1753639
|
||||
numeric count: 14981248
|
||||
forbidden count: 176528
|
||||
words per sentence average: 14.6
|
||||
forbidden word percentage: 0%
|
||||
capitalized word percentage: 2%
|
||||
|
||||
benchmark: 1588ms
|
||||
```
|
||||
|
||||
muncher with trie is 2600ms
|
||||
|
||||
for loops with fxhash trie: 1200ms
|
||||
`opt-level = "z"` slow things down
|
||||
|
||||
# ubuntu terminal running
|
||||
https://snek.molodetz.nl/terminal.html ubuntu running thing instructions:
|
||||
|
||||
@ -54,32 +54,32 @@ static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let files = env::args().skip(1);
|
||||
let mut stats = Stats::default();
|
||||
let mut rx = {
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
for file in files {
|
||||
//reading files not sequentially average shaves 30ms (of 1250ms), and that's on a NVMe SSD so why not
|
||||
if let Ok(text) = fs::read_to_string(&file) {
|
||||
stats.file_count += 1;
|
||||
let tx = tx.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut stats = Stats::default();
|
||||
let tx = tx.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut stats = Stats::default();
|
||||
//reading files in threads doesn't change speed of any sort but oh well
|
||||
if let Ok(text) = fs::read_to_string(&file) {
|
||||
stats.file_count += 1;
|
||||
parser::for_loops::parse(&mut stats, &text);
|
||||
let _ = tx.send(stats);
|
||||
});
|
||||
} else {
|
||||
stats.failed_file_count += 1;
|
||||
}
|
||||
} else {
|
||||
stats.failed_file_count += 1;
|
||||
}
|
||||
let _ = tx.send(stats);
|
||||
});
|
||||
}
|
||||
rx
|
||||
};
|
||||
let mut stats = Stats::default();
|
||||
while let Some(file_stat) = rx.recv().await {
|
||||
stats += file_stat;
|
||||
}
|
||||
println!("{stats}");
|
||||
}
|
||||
|
||||
/// needs ../books.tar.gz to be extracted
|
||||
/// needs ../books.tar.gz to be extracted into ../books
|
||||
#[test]
|
||||
fn test() {
|
||||
use std::{env, fs, process::Command, time::Instant};
|
||||
|
||||
@ -11,7 +11,7 @@ pub fn parse(stats: &mut Stats, text: &str) {
|
||||
{
|
||||
stats.sentence_count += 1;
|
||||
for word in sentence
|
||||
.split_whitespace()
|
||||
.split_ascii_whitespace()
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
{
|
||||
@ -21,17 +21,15 @@ pub fn parse(stats: &mut Stats, text: &str) {
|
||||
for char in word.chars() {
|
||||
if char.is_numeric() {
|
||||
stats.numeric_count += 1;
|
||||
//TODO are numbers capitalized or not? I don't know!
|
||||
}
|
||||
if !char.is_ascii_uppercase() {
|
||||
all_capitalized = false;
|
||||
} else if !char.is_ascii_uppercase() {
|
||||
all_capitalized = false;
|
||||
}
|
||||
}
|
||||
if all_capitalized {
|
||||
stats.capitalized_count += 1;
|
||||
}
|
||||
let lowercase_word = word.to_lowercase();
|
||||
if FORBIDDEN_WORDS.contains(&lowercase_word) {
|
||||
if FORBIDDEN_WORDS.contains(&word.to_lowercase()) {
|
||||
stats.forbidden_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
14
jest_rust/src/parser/for_loops_forbidden_only.rs
Normal file
14
jest_rust/src/parser/for_loops_forbidden_only.rs
Normal file
@ -0,0 +1,14 @@
|
||||
use crate::{FORBIDDEN_WORDS, stats::Stats};
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn parse(stats: &mut Stats, text: &str) {
|
||||
for word in text
|
||||
.split_ascii_whitespace()
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
{
|
||||
if FORBIDDEN_WORDS.contains(&word.to_lowercase()) {
|
||||
stats.forbidden_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,2 +1,3 @@
|
||||
pub mod muncher;
|
||||
pub mod for_loops;
|
||||
pub mod for_loops;
|
||||
pub mod for_loops_forbidden_only;
|
||||
pub mod muncher;
|
||||
@ -46,12 +46,12 @@ impl Display for Stats {
|
||||
)?;
|
||||
writeln!(
|
||||
f,
|
||||
"forbidden word percentage: {:.0}%",
|
||||
"forbidden word percentage: {:.2}%",
|
||||
(self.forbidden_count as f32 / word_count) * 100.0,
|
||||
)?;
|
||||
write!(
|
||||
f,
|
||||
"capitalized word percentage: {:.0}%",
|
||||
"capitalized word percentage: {:.2}%",
|
||||
(self.capitalized_count as f32 / word_count) * 100.0,
|
||||
)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user