use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use std::{env, fmt::Display, fs};
static FORBIDDEN_WORDS: &'static [&'static str] = &[
"recovery",
"techie",
"http",
"https",
"digital",
"hack",
"::",
"//",
"@",
"com",
"crypto",
"bitcoin",
"wallet",
"hacker",
"welcome",
"whatsapp",
"email",
"cryptocurrency",
"stolen",
"freeze",
"quick",
"crucial",
"tracing",
"scammers",
"expers",
"hire",
"century",
"transaction",
"essential",
"managing",
"contact",
"contacting",
"understanding",
"assets",
"funds",
];
#[derive(Debug, Default)]
pub struct Stats {
file_count: u32,
failed_file_count: u32,
sentence_count: u32,
word_count: u32,
capitalized_count: u32,
numeric_count: u32,
forbidden_count: u32,
}
impl Stats {
pub fn process(&mut self, file: &str) {
let Ok(text) = fs::read_to_string(&file) else {
self.failed_file_count += 1;
return;
};
self.file_count += 1;
for sentence in text
.split('.')
.map(|s| s.trim())
.filter(|s| !s.is_empty())
{
self.sentence_count += 1;
for word in sentence
.split_whitespace()
.map(|s| s.trim())
.filter(|s| !s.is_empty())
{
self.word_count += 1;
//get all numbers counted
let mut all_capitalized = true;
for char in word.chars() {
if char.is_numeric() {
self.numeric_count += 1;
}
if !char.is_ascii_uppercase() {
all_capitalized = false;
}
}
if all_capitalized {
self.capitalized_count += 1;
}
let lowercase_word = word.to_lowercase();
for forbidden_word in FORBIDDEN_WORDS {
if lowercase_word.contains(forbidden_word) {
self.forbidden_count += 1;
}
}
}
}
}
}
impl Display for Stats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "file count: {}", self.file_count)?;
writeln!(f, "failed file count: {}", self.failed_file_count)?;
writeln!(f, "sentence count: {}", self.sentence_count)?;
writeln!(f, "word count: {}", self.word_count)?;
writeln!(f, "capitalized count: {}", self.capitalized_count)?;
writeln!(f, "numeric count: {}", self.numeric_count)?;
writeln!(f, "forbidden count: {}", self.forbidden_count)?;
let word_count = self.word_count as f32;
writeln!(
f,
"words per sentence average: {:.1}",
word_count / self.sentence_count as f32
)?;
writeln!(
f,
"forbidden word percentage: {:.0}%",
(self.forbidden_count as f32 / word_count) * 100.0,
)?;
write!(
f,
"capitalized word percentage: {:.0}%",
(self.capitalized_count as f32 / word_count) * 100.0,
)
}
}
fn main() {
let files = env::args().skip(1);
// let mut stats = Stats::default();
// for file in files {
// stats.process(&file);
// }
let files = files.collect::<Vec<_>>();
files.par_iter().for_each(|file| {
let mut stats = Stats::default();
stats.process(&file);
println!("{stats}");
});
}
#[test]
fn test() {
use std::{env, fs, process::Command, time::Instant};
println!("cwd: {}", env::current_dir().unwrap().display());
//compile
let mut compile = Command::new("cargo");
let compile_arged = compile.arg("build").arg("--release");
match compile_arged.output() {
Ok(output) => println!("compiled {}", String::from_utf8_lossy(&output.stdout)),
Err(err) => eprintln!("compile failed: {err}"),
}
//get test files
let files = fs::read_dir("test_files")
.unwrap()
.map(|f| {
f.unwrap()
.path()
.canonicalize()
.unwrap()
.to_str()
.unwrap()
.to_string()
})
.collect::<Vec<_>>();
println!("test files found: {:#?}", files);
//benchmark run
let benchmark = Instant::now();
let mut run = Command::new("target/release/jisspam");
let run_arged = run.args(files);
match run_arged.output() {
Ok(output) => println!("{}", String::from_utf8_lossy(&output.stdout)),
Err(err) => eprintln!("run failed: {err}"),
}
println!("benchmark: {}ms", benchmark.elapsed().as_millis());
}