more tests
This commit is contained in:
parent
d73d4ff7c1
commit
c459fe6d79
@ -9,6 +9,7 @@ tokio = { version = "1.44.1", features = ["full"] }
|
|||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
codegen-units = 1 # less means more compile work but better optimized
|
codegen-units = 1 # less means more compile work but better optimized
|
||||||
lto = "thin" # thin has best performance. fat the worst
|
lto = "fat" # thin has best performance. fat the worst
|
||||||
strip = true
|
strip = true
|
||||||
|
# opt-level = "z" # slows down
|
||||||
panic = "abort"
|
panic = "abort"
|
||||||
|
|||||||
@ -4,116 +4,28 @@ extract `../books.tar.gz`
|
|||||||
|
|
||||||
# local machine benchmarks
|
# local machine benchmarks
|
||||||
|
|
||||||
single threaded: `Time Jest Rust: 33.63373279571533`
|
single threaded: `33.63373279571533`
|
||||||
|
|
||||||
rayon: `Time Jest Rust: 4.294418811798096`
|
rayon: `4.294418811798096`
|
||||||
|
|
||||||
tokio: `Time Jest Rust: 4.717588901519775`
|
tokio: `4.717588901519775`
|
||||||
|
|
||||||
|
tokio:
|
||||||
|
|
||||||
|
muncher: `2486ms`
|
||||||
|
|
||||||
|
for_loops: `1227ms`
|
||||||
|
|
||||||
|
for_loops_forbidden_only: `987ms`
|
||||||
|
|
||||||
|
trie creation and stats accumulation take 0ms
|
||||||
|
|
||||||
## compile options benchmarks
|
## compile options benchmarks
|
||||||
lto not thin: `Time Jest Rust: 5.306957483291626` slower
|
`lto` thin, fat doesn't change much
|
||||||
|
|
||||||
lto fat: `Time Jest Rust: 5.413678407669067` slower
|
`codegen-units` 0, 1 doesn't change much
|
||||||
|
|
||||||
codegen-units 1: `Time Jest Rust: 4.451631546020508` faster
|
`opt-level = "z"` slow things down
|
||||||
|
|
||||||
opt-level z: `Time Jest Rust: 7.045313119888306` slower
|
|
||||||
|
|
||||||
strip true: `Time Jest Rust: 4.337219476699829` faster
|
|
||||||
|
|
||||||
lto true: `Time Jest Rust: 4.703521728515625` slower
|
|
||||||
|
|
||||||
lto none: `Time Jest Rust: 4.817203998565674`
|
|
||||||
|
|
||||||
lto thin: `Time Jest Rust: 4.429729223251343` faster
|
|
||||||
|
|
||||||
# data integrity
|
|
||||||
(this isn't tested, just guessed, and I don't have data to compare it with)
|
|
||||||
|
|
||||||
for loops:
|
|
||||||
```
|
|
||||||
file count: 904
|
|
||||||
failed file count: 0
|
|
||||||
sentence count: 5602301
|
|
||||||
word count: 81701260
|
|
||||||
capitalized count: 1753639
|
|
||||||
numeric count: 14981248
|
|
||||||
forbidden count: 1237059
|
|
||||||
words per sentence average: 14.6
|
|
||||||
forbidden word percentage: 2%
|
|
||||||
capitalized word percentage: 2%
|
|
||||||
|
|
||||||
benchmark: 5033ms
|
|
||||||
```
|
|
||||||
|
|
||||||
muncher:
|
|
||||||
```
|
|
||||||
file count: 904
|
|
||||||
failed file count: 0
|
|
||||||
sentence count: 5338705
|
|
||||||
word count: 86765116
|
|
||||||
capitalized count: 13640820
|
|
||||||
numeric count: 10902254
|
|
||||||
forbidden count: 0
|
|
||||||
words per sentence average: 16.3
|
|
||||||
forbidden word percentage: 0%
|
|
||||||
capitalized word percentage: 16%
|
|
||||||
|
|
||||||
benchmark: 504ms
|
|
||||||
```
|
|
||||||
with forbidden words:
|
|
||||||
```
|
|
||||||
file count: 904
|
|
||||||
failed file count: 0
|
|
||||||
sentence count: 5338705
|
|
||||||
word count: 86765116
|
|
||||||
capitalized count: 13640820
|
|
||||||
numeric count: 10902254
|
|
||||||
forbidden count: 279717
|
|
||||||
words per sentence average: 16.3
|
|
||||||
forbidden word percentage: 0%
|
|
||||||
capitalized word percentage: 16%
|
|
||||||
|
|
||||||
benchmark: 6078ms
|
|
||||||
```
|
|
||||||
|
|
||||||
# forbidden words benchmarks
|
|
||||||
seems they take up about 4000ms to churn through in the original version
|
|
||||||
|
|
||||||
for loops count forbidden word once only:
|
|
||||||
```
|
|
||||||
file count: 904
|
|
||||||
failed file count: 0
|
|
||||||
sentence count: 5602301
|
|
||||||
word count: 81701260
|
|
||||||
capitalized count: 1753639
|
|
||||||
numeric count: 14981248
|
|
||||||
forbidden count: 1143234
|
|
||||||
words per sentence average: 14.6
|
|
||||||
forbidden word percentage: 1%
|
|
||||||
capitalized word percentage: 2%
|
|
||||||
|
|
||||||
benchmark: 4737ms
|
|
||||||
```
|
|
||||||
for loops with trie:
|
|
||||||
```
|
|
||||||
file count: 904
|
|
||||||
failed file count: 0
|
|
||||||
sentence count: 5602301
|
|
||||||
word count: 81701260
|
|
||||||
capitalized count: 1753639
|
|
||||||
numeric count: 14981248
|
|
||||||
forbidden count: 176528
|
|
||||||
words per sentence average: 14.6
|
|
||||||
forbidden word percentage: 0%
|
|
||||||
capitalized word percentage: 2%
|
|
||||||
|
|
||||||
benchmark: 1588ms
|
|
||||||
```
|
|
||||||
|
|
||||||
muncher with trie is 2600ms
|
|
||||||
|
|
||||||
for loops with fxhash trie: 1200ms
|
|
||||||
|
|
||||||
# ubuntu terminal running
|
# ubuntu terminal running
|
||||||
https://snek.molodetz.nl/terminal.html ubuntu running thing instructions:
|
https://snek.molodetz.nl/terminal.html ubuntu running thing instructions:
|
||||||
|
|||||||
@ -54,32 +54,32 @@ static FORBIDDEN_WORDS: LazyLock<Trie> = LazyLock::new(|| {
|
|||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
let files = env::args().skip(1);
|
let files = env::args().skip(1);
|
||||||
let mut stats = Stats::default();
|
|
||||||
let mut rx = {
|
let mut rx = {
|
||||||
let (tx, rx) = mpsc::unbounded_channel();
|
let (tx, rx) = mpsc::unbounded_channel();
|
||||||
for file in files {
|
for file in files {
|
||||||
//reading files not sequentially average shaves 30ms (of 1250ms), and that's on a NVMe SSD so why not
|
let tx = tx.clone();
|
||||||
if let Ok(text) = fs::read_to_string(&file) {
|
tokio::spawn(async move {
|
||||||
stats.file_count += 1;
|
let mut stats = Stats::default();
|
||||||
let tx = tx.clone();
|
//reading files in threads doesn't change speed of any sort but oh well
|
||||||
tokio::spawn(async move {
|
if let Ok(text) = fs::read_to_string(&file) {
|
||||||
let mut stats = Stats::default();
|
stats.file_count += 1;
|
||||||
parser::for_loops::parse(&mut stats, &text);
|
parser::for_loops::parse(&mut stats, &text);
|
||||||
let _ = tx.send(stats);
|
} else {
|
||||||
});
|
stats.failed_file_count += 1;
|
||||||
} else {
|
}
|
||||||
stats.failed_file_count += 1;
|
let _ = tx.send(stats);
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
rx
|
rx
|
||||||
};
|
};
|
||||||
|
let mut stats = Stats::default();
|
||||||
while let Some(file_stat) = rx.recv().await {
|
while let Some(file_stat) = rx.recv().await {
|
||||||
stats += file_stat;
|
stats += file_stat;
|
||||||
}
|
}
|
||||||
println!("{stats}");
|
println!("{stats}");
|
||||||
}
|
}
|
||||||
|
|
||||||
/// needs ../books.tar.gz to be extracted
|
/// needs ../books.tar.gz to be extracted into ../books
|
||||||
#[test]
|
#[test]
|
||||||
fn test() {
|
fn test() {
|
||||||
use std::{env, fs, process::Command, time::Instant};
|
use std::{env, fs, process::Command, time::Instant};
|
||||||
|
|||||||
@ -11,7 +11,7 @@ pub fn parse(stats: &mut Stats, text: &str) {
|
|||||||
{
|
{
|
||||||
stats.sentence_count += 1;
|
stats.sentence_count += 1;
|
||||||
for word in sentence
|
for word in sentence
|
||||||
.split_whitespace()
|
.split_ascii_whitespace()
|
||||||
.map(|s| s.trim())
|
.map(|s| s.trim())
|
||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
{
|
{
|
||||||
@ -21,17 +21,15 @@ pub fn parse(stats: &mut Stats, text: &str) {
|
|||||||
for char in word.chars() {
|
for char in word.chars() {
|
||||||
if char.is_numeric() {
|
if char.is_numeric() {
|
||||||
stats.numeric_count += 1;
|
stats.numeric_count += 1;
|
||||||
//TODO are numbers capitalized or not? I don't know!
|
all_capitalized = false;
|
||||||
}
|
} else if !char.is_ascii_uppercase() {
|
||||||
if !char.is_ascii_uppercase() {
|
|
||||||
all_capitalized = false;
|
all_capitalized = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if all_capitalized {
|
if all_capitalized {
|
||||||
stats.capitalized_count += 1;
|
stats.capitalized_count += 1;
|
||||||
}
|
}
|
||||||
let lowercase_word = word.to_lowercase();
|
if FORBIDDEN_WORDS.contains(&word.to_lowercase()) {
|
||||||
if FORBIDDEN_WORDS.contains(&lowercase_word) {
|
|
||||||
stats.forbidden_count += 1;
|
stats.forbidden_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
14
jest_rust/src/parser/for_loops_forbidden_only.rs
Normal file
14
jest_rust/src/parser/for_loops_forbidden_only.rs
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
use crate::{FORBIDDEN_WORDS, stats::Stats};
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn parse(stats: &mut Stats, text: &str) {
|
||||||
|
for word in text
|
||||||
|
.split_ascii_whitespace()
|
||||||
|
.map(|s| s.trim())
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
{
|
||||||
|
if FORBIDDEN_WORDS.contains(&word.to_lowercase()) {
|
||||||
|
stats.forbidden_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,2 +1,3 @@
|
|||||||
pub mod muncher;
|
pub mod for_loops;
|
||||||
pub mod for_loops;
|
pub mod for_loops_forbidden_only;
|
||||||
|
pub mod muncher;
|
||||||
@ -46,12 +46,12 @@ impl Display for Stats {
|
|||||||
)?;
|
)?;
|
||||||
writeln!(
|
writeln!(
|
||||||
f,
|
f,
|
||||||
"forbidden word percentage: {:.0}%",
|
"forbidden word percentage: {:.2}%",
|
||||||
(self.forbidden_count as f32 / word_count) * 100.0,
|
(self.forbidden_count as f32 / word_count) * 100.0,
|
||||||
)?;
|
)?;
|
||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"capitalized word percentage: {:.0}%",
|
"capitalized word percentage: {:.2}%",
|
||||||
(self.capitalized_count as f32 / word_count) * 100.0,
|
(self.capitalized_count as f32 / word_count) * 100.0,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user