New version rust.

This commit is contained in:
retoor 2024-12-01 23:23:32 +01:00
parent c26927a265
commit 68d2ef1823
2 changed files with 72 additions and 49 deletions

View File

@ -5,3 +5,7 @@ edition = "2021"
[dependencies] [dependencies]
rayon = "1.10.0" rayon = "1.10.0"
[profile.release]
lto = "thin"
panic = "abort"

View File

@ -14,21 +14,36 @@ fn clean_content(content: &str) -> String {
clean_content clean_content
} }
fn get_sentences(content: &str) -> Vec<&str> { fn get_sentences(content: &str) -> usize {
let mut sentences = content.split('.') let sentences = content.split('.')
.map(|s| s.trim_start()) // Remove leading whitespace .map(|s| s.trim_start()) // Remove leading whitespace
.collect::<Vec<_>>(); .count();
// Remove last "sentence" if didn't end with a dot // // Remove last "sentence" if didn't end with a dot
if let Some(last) = sentences.last() && !last.ends_with('.') { // if let Some(last) = sentences.last() && !last.ends_with('.') {
sentences.pop(); // sentences.pop();
} // }
sentences sentences
} }
fn get_words(sentences: &str) -> impl Iterator<Item = &str> + Clone { fn get_words(content: &str, words: &mut usize, caps: &mut usize, fw: &mut usize) {
sentences.split_whitespace() fn check_forbidden(w: &str) -> bool {
FORBIDDEN_WORDS.iter()
.find(|fw| str::eq_ignore_ascii_case(w, fw))
.is_some()
}
for word in content.split_whitespace() {
*words += 1;
if is_fully_capitalized_word(word) {
*caps += 1;
}
if check_forbidden(word) {
*fw += 1;
}
}
} }
fn is_fully_capitalized_word(word: &str) -> bool { fn is_fully_capitalized_word(word: &str) -> bool {
@ -36,65 +51,68 @@ fn is_fully_capitalized_word(word: &str) -> bool {
.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase()) .all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())
} }
fn get_capitalized_words(content: &str) -> Vec<&str> { //fn get_capitalized_words(content: &str) -> usize {
let sentences = get_sentences(content); // let sentences = get_sentences(content);
let mut cap_words = vec![]; //// let mut cap_words = vec![];
// let mut count = 0;
for sentence in sentences { //
// Always skip the first word since sentences start with // for sentence in sentences {
for word in get_words(sentence).skip(1) { // // Always skip the first word since sentences start with
if is_fully_capitalized_word(word) { // for word in get_words(sentence).skip(1) {
cap_words.push(word); // if is_fully_capitalized_word(word) {
} // count += 1;
} // }
} // }
// }
cap_words //
// count
//}
fn get_numbers(clean_content: &str) -> usize {
clean_content.split(|c: char| !c.is_ascii_digit())
.count()
} }
fn get_numbers(clean_content: &str) -> Vec<&str> { //fn get_forbidden_words(content: &str) -> usize {
clean_content.split(|c: char| c.is_ascii_digit()) // fn check_forbidden(w: &str) -> bool {
.collect() // FORBIDDEN_WORDS.iter()
} // .find(|fw| str::eq_ignore_ascii_case(w, fw))
// .is_some()
fn get_forbidden_words(content: &str) -> Vec<&str> { // }
fn check_forbidden(w: &str) -> bool { //
FORBIDDEN_WORDS.iter() // get_words(content)
.find(|fw| str::eq_ignore_ascii_case(w, fw)) // .filter(|w| check_forbidden(w))
.is_some() // .collect()
} //}
get_words(content)
.filter(|w| check_forbidden(w))
.collect()
}
fn analyze(data: &str) { fn analyze(data: &str) {
let clean_data = clean_content(data); let clean_data = clean_content(data);
// drop(clean_data); // You aren't actually using clean_data :O // drop(clean_data); // You aren't actually using clean_data :O
// All capitalized words // All capitalized words
let cap_words = get_capitalized_words(data); let mut words = 0;
println!("All capitalized words: {}", cap_words.len()); let mut fw = 0;
let mut cap_words = 0;
get_words(&clean_data, &mut words, &mut fw, &mut cap_words);
println!("All capitalized words: {}", cap_words);
// All sentences // All sentences
let sentences = get_sentences(data); let sentences = get_sentences(data);
println!("Sentences: {}", sentences.len()); println!("Sentences: {}", sentences);
// All words // All words
let words = get_words(data); println!("Words: {}", words);
println!("Words: {}", words.clone().count());
// Numbers // Numbers
let numbers = get_numbers(&clean_data); let numbers = get_numbers(&clean_data);
println!("Numbers: {}", numbers.len()); println!("Numbers: {}", numbers);
// Forbidden words // Forbidden words
let fw = get_forbidden_words(data); println!("Forbidden words: {}", fw);
println!("Forbidden words: {}", fw.len());
if sentences.len() > 0 { if sentences > 0 {
let word_count_per_sentence = words.count() / sentences.len(); let word_count_per_sentence = words / sentences;
println!("Word count per sentence: {}", word_count_per_sentence); println!("Word count per sentence: {}", word_count_per_sentence);
} }
} }
@ -136,3 +154,4 @@ static FORBIDDEN_WORDS: &'static [&'static str] = &[
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds" "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds"
]; ];