parent
ab32a81d9c
commit
854a1c3991
@ -97,4 +97,37 @@ opt-level z: `Time Jest Rust: 7.045313119888306` slower
|
||||
strip true: `Time Jest Rust: 4.337219476699829` faster
|
||||
lto true: `Time Jest Rust: 4.703521728515625` slower
|
||||
lto none: `Time Jest Rust: 4.817203998565674`
|
||||
lto thin: `Time Jest Rust: 4.429729223251343` faster
|
||||
lto thin: `Time Jest Rust: 4.429729223251343` faster
|
||||
|
||||
# data integrity
|
||||
(this isn't tested, just guessed, and I don't have data to compare it with)
|
||||
for loops:
|
||||
```
|
||||
file count: 904
|
||||
failed file count: 0
|
||||
sentence count: 5602301
|
||||
word count: 81701260
|
||||
capitalized count: 1753639
|
||||
numeric count: 14981248
|
||||
forbidden count: 1237059
|
||||
words per sentence average: 14.6
|
||||
forbidden word percentage: 2%
|
||||
capitalized word percentage: 2%
|
||||
|
||||
benchmark: 5033ms
|
||||
```
|
||||
muncher:
|
||||
```
|
||||
file count: 904
|
||||
failed file count: 0
|
||||
sentence count: 5338705
|
||||
word count: 86765116
|
||||
capitalized count: 13640820
|
||||
numeric count: 10902254
|
||||
forbidden count: 0
|
||||
words per sentence average: 16.3
|
||||
forbidden word percentage: 0%
|
||||
capitalized word percentage: 16%
|
||||
|
||||
benchmark: 504ms
|
||||
```
|
@ -58,6 +58,79 @@ impl Stats {
|
||||
return;
|
||||
};
|
||||
self.file_count += 1;
|
||||
self.muncher(&text);
|
||||
// self.for_loops(&text);
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
/// probably buggy. for example, are new lines sentences? what if the text has no last period?
|
||||
/// 500ms is without forbidden words check, but...
|
||||
/// 6000ms if adding forbidden words.. so not faster
|
||||
fn muncher(&mut self, text: &str) {
|
||||
let mut capitalized = true;
|
||||
let mut whitespaced = false;
|
||||
let mut dotted = false;
|
||||
let mut word = String::new();
|
||||
for char in text.chars() {
|
||||
if whitespaced {
|
||||
if !char.is_whitespace() {
|
||||
whitespaced = false; //end whiteness
|
||||
}
|
||||
continue;
|
||||
} else if char.is_whitespace() {
|
||||
whitespaced = true;
|
||||
self.word_count += 1; //end of word
|
||||
if capitalized {
|
||||
self.capitalized_count += 1;
|
||||
} else {
|
||||
//reset capitalized word
|
||||
capitalized = true;
|
||||
}
|
||||
let lowercase_word = word.to_lowercase();
|
||||
for forbidden_word in FORBIDDEN_WORDS {
|
||||
if lowercase_word.contains(forbidden_word) {
|
||||
self.forbidden_count += 1;
|
||||
}
|
||||
}
|
||||
word = String::new();
|
||||
continue;
|
||||
}
|
||||
if dotted {
|
||||
if char != '.' {
|
||||
dotted = false; //end sentencing
|
||||
}
|
||||
continue;
|
||||
} else if char == '.' {
|
||||
dotted = true;
|
||||
self.sentence_count += 1;
|
||||
self.word_count += 1; //end of word
|
||||
if capitalized {
|
||||
self.capitalized_count += 1;
|
||||
} else {
|
||||
//reset capitalized word
|
||||
capitalized = true;
|
||||
}
|
||||
let lowercase_word = word.to_lowercase();
|
||||
for forbidden_word in FORBIDDEN_WORDS {
|
||||
if lowercase_word.contains(forbidden_word) {
|
||||
self.forbidden_count += 1;
|
||||
}
|
||||
}
|
||||
word = String::new();
|
||||
continue;
|
||||
}
|
||||
word += &char.to_string();
|
||||
if char.is_numeric() {
|
||||
self.numeric_count += 1;
|
||||
capitalized = false;
|
||||
}
|
||||
if !char.is_ascii_uppercase() {
|
||||
capitalized = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
/// typically 5000ms
|
||||
fn for_loops(&mut self, text: &str) {
|
||||
for sentence in text
|
||||
.split('.')
|
||||
.map(|s| s.trim())
|
||||
@ -75,6 +148,7 @@ impl Stats {
|
||||
for char in word.chars() {
|
||||
if char.is_numeric() {
|
||||
self.numeric_count += 1;
|
||||
//TODO are numbers capitalized or not? I don't know!
|
||||
}
|
||||
if !char.is_ascii_uppercase() {
|
||||
all_capitalized = false;
|
||||
@ -197,3 +271,41 @@ fn test() {
|
||||
}
|
||||
println!("benchmark: {}ms", benchmark.elapsed().as_millis());
|
||||
}
|
||||
#[test]
|
||||
fn books_test() {
|
||||
use std::{env, fs, process::Command, time::Instant};
|
||||
println!("cwd: {}", env::current_dir().unwrap().display());
|
||||
|
||||
//compile
|
||||
let mut compile = Command::new("cargo");
|
||||
let compile_arged = compile.arg("build").arg("--release");
|
||||
match compile_arged.output() {
|
||||
Ok(output) => println!("compiled {}", String::from_utf8_lossy(&output.stdout)),
|
||||
Err(err) => eprintln!("compile failed: {err}"),
|
||||
}
|
||||
|
||||
//get test files
|
||||
let files = fs::read_dir("../books")
|
||||
.unwrap()
|
||||
.map(|f| {
|
||||
f.unwrap()
|
||||
.path()
|
||||
.canonicalize()
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.to_string()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
println!("test files found: {:#?}", files);
|
||||
|
||||
//benchmark run
|
||||
let benchmark = Instant::now();
|
||||
let mut run = Command::new("target/release/jisspam");
|
||||
let run_arged = run.args(files);
|
||||
match run_arged.output() {
|
||||
Ok(output) => println!("{}", String::from_utf8_lossy(&output.stdout)),
|
||||
Err(err) => eprintln!("run failed: {err}"),
|
||||
}
|
||||
println!("benchmark: {}ms", benchmark.elapsed().as_millis());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user