|
#![feature(let_chains)]\n\nuse std::{env, fs};\n\nfn clean_content(content: &str) -> String {\n\tlet alloed_ichars = \"01234567891abcdefghijklmnopqrstuvwxyz \\n.,!?\";\n\t\n\tlet clean_content = content.chars()\n\t\t.filter(|&c| alloed_ichars.contains(c))\n\t\t.collect::<String>();\n\t\n\tclean_content\n}\n\nfn get_sentences(content: &str) -> Vec<&str> {\n\tlet mut sentences = content.split('.')\n\t\t.map(|s| s.trim_start()) // Remove leading whitespace\n\t\t.collect::<Vec<_>>();\n\t\n\t// Remove last \"sentence\" if didn't end with a dot\n\tif let Some(last) = sentences.last() && !last.ends_with('.') {\n\t\tsentences.pop();\n\t}\n\t\n\tsentences\n}\n\nfn get_words(sentences: &str) -> impl Iterator<Item = &str> + Clone {\n\tsentences.split_whitespace()\n}\n\nfn is_fully_capitalized_word(word: &str) -> bool {\n\tword.chars()\n\t\t.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())\n}\n\nfn get_capitalized_words(content: &str) -> Vec<&str> {\n\tlet sentences = get_sentences(content);\n\tlet mut cap_words = vec![];\n\t\n\tfor sentence in sentences {\n\t\t// Always skip the first word since sentences start with\n\t\tfor word in get_words(sentence).skip(1) {\n\t\t\tif is_fully_capitalized_word(word) {\n\t\t\t\tcap_words.push(word);\n\t\t\t}\n\t\t}\n\t}\n\t\n\tcap_words\n}\n\nfn get_numbers(content: &str) -> Vec<String> {\n\tlet clean = clean_content(content);\n\t\n\tclean.split(|c: char| c.is_ascii_digit())\n\t\t.map(|n| n.to_string())\n\t\t.collect()\n}\n\nfn get_forbidden_words(content: &str) -> Vec<&str> {\n\tfn check_forbidden(w: &str) -> bool {\n\t\tFORBIDDEN_WORDS.iter()\n\t\t\t.find(|fw| str::eq_ignore_ascii_case(w, fw))\n\t\t\t.is_some()\n\t}\n\t\n\tget_words(content)\n\t\t.filter(|w| check_forbidden(w))\n\t\t.collect()\n}\n\nfn analyze(data: &str) {\n\tlet clean_data = clean_content(data);\n\tdrop(clean_data); // You aren't actually using clean_data :O\n\t\n\t// All capitalized words\n\tlet cap_words = get_capitalized_words(data);\n\tprintln!(\"All capitalized words: {}\", cap_words.len());\n\t\n\t// All sentences\n\tlet sentences = get_sentences(data);\n\tprintln!(\"Sentences: {}\", sentences.len());\n\t\n\t// All words\n\tlet words = get_words(data);\n\tprintln!(\"Words: {}\", words.clone().count());\n\t\n\t// Numbers\n\tlet numbers = get_numbers(data);\n\tprintln!(\"Numbers: {}\", numbers.len());\n\t\n\t// Forbidden words\n\tlet fw = get_forbidden_words(data);\n\tprintln!(\"Forbidden words: {}\", fw.len());\n\t\n\tlet word_count_per_sentence = words.count() / sentences.len();\n\tprintln!(\"Word count per sentence: {}\", word_count_per_sentence);\n}\n\nfn main() {\n // Read in files from args\n for arg in env::args().skip(1) { // skip program arg\n \tlet Ok(text) = fs::read_to_string(&arg) else {\n \t\teprintln!(\"{arg} isn't a valid file or couldn't be read\");\n \t\tcontinue;\n \t};\n \t\n \tanalyze(&text);\n }\n \n//\tanalyze(&SPAM1);\n}\n\nstatic FORBIDDEN_WORDS: &'static [&'static str] = &[\n \"recovery\", \"techie\", \"http\", \"https\", \"digital\", \"hack\", \"::\", \"//\", \"com\",\n \"@\", \"crypto\", \"bitcoin\", \"wallet\", \"hacker\", \"welcome\", \"whatsapp\", \"email\", \"cryptocurrency\",\n \"stolen\", \"freeze\", \"quick\", \"crucial\", \"tracing\", \"scammers\", \"expers\", \"hire\", \"century\",\n \"transaction\", \"essential\", \"managing\", \"contact\", \"contacting\", \"understanding\", \"assets\", \"funds\"\n];\n |