parent
e4354a7645
commit
5eded442b7
jest_rust
2
jest_rust/.gitignore
vendored
Normal file
2
jest_rust/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
/target
|
||||||
|
/Cargo.lock
|
54
jest_rust/Cargo.lock
generated
54
jest_rust/Cargo.lock
generated
@ -2,6 +2,60 @@
|
|||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 4
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-deque"
|
||||||
|
version = "0.8.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.8.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jisspam"
|
name = "jisspam"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"rayon",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon"
|
||||||
|
version = "1.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"rayon-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon-core"
|
||||||
|
version = "1.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-deque",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
@ -4,3 +4,9 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
rayon = "1.10.0"
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
lto = "thin" # Full Link-Time Optimization for maximum runtime speed
|
||||||
|
debug = false # Disables debug symbols to streamline the binary
|
||||||
|
panic = "abort"
|
||||||
|
@ -1,39 +1,5 @@
|
|||||||
`cd jestdotty_rust/jisspam && cargo run --release && cp target/release/jisspam ../../`
|
|
||||||
|
|
||||||
for https://retoor.molodetz.nl/retoor/isspam
|
for https://retoor.molodetz.nl/retoor/isspam
|
||||||
|
|
||||||
some older benchmarks to know what to beat, or something:
|
|
||||||
```
|
|
||||||
Time C: 11.447573184967041
|
|
||||||
Time Rust: 1.2248871326446533
|
|
||||||
Time CPP: 2.1745784282684326
|
|
||||||
Time Borded CPP: 1.4606633186340332
|
|
||||||
Time Retoor Python: 38.008224964141846
|
|
||||||
|
|
||||||
Time C: 10.476306915283203
|
|
||||||
Time Rust: 1.1816489696502686
|
|
||||||
Time CPP: 2.030345916748047
|
|
||||||
Time Borded CPP: 0.6507000923156738
|
|
||||||
|
|
||||||
Time Rust: 1.1833229064941406
|
|
||||||
Time C: 10.77005124092102
|
|
||||||
Time CPP: 2.075010061264038
|
|
||||||
Time Borded CPP: 0.8050553798675537
|
|
||||||
Time Retoor Python: 39.06818628311157
|
|
||||||
|
|
||||||
Time Rust: 1.187262773513794
|
|
||||||
Time C: 10.271284818649292
|
|
||||||
Time CPP: 2.0337636470794678
|
|
||||||
Time Borded CPP: 0.7784948348999023
|
|
||||||
Time Retoor Python: 37.15883994102478
|
|
||||||
|
|
||||||
Time Rust: 1.239715337753296
|
|
||||||
Time C: 11.51186990737915
|
|
||||||
Time CPP: 2.085871934890747
|
|
||||||
Time Borded CPP: 0.6888203620910645
|
|
||||||
Time Retoor Python: 35.5970196723938
|
|
||||||
```
|
|
||||||
|
|
||||||
https://snek.molodetz.nl/terminal.html ubuntu running thing instructions:
|
https://snek.molodetz.nl/terminal.html ubuntu running thing instructions:
|
||||||
```
|
```
|
||||||
mkdir /project
|
mkdir /project
|
||||||
@ -50,4 +16,39 @@ rustup default nightly
|
|||||||
make
|
make
|
||||||
make benchmark
|
make benchmark
|
||||||
python3 bench.py
|
python3 bench.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
clone: `git clone https://gitlab.com/jestdotty-group/draft/jisspam.git jest_rust`
|
||||||
|
|
||||||
|
edit make: `vi makefile` and add build:
|
||||||
|
```
|
||||||
|
build_jest:
|
||||||
|
@echo "compiling jest_rust project"
|
||||||
|
cd jest_rust && cargo build --release && cp target/release/jisspam ..
|
||||||
|
```
|
||||||
|
append to all script:
|
||||||
|
```
|
||||||
|
all: build run valgrind build_risspam run_risspam build_cpp build_borded_cpp build_py build_jest
|
||||||
|
```
|
||||||
|
|
||||||
|
add to bench: `vi bench.py`
|
||||||
|
```py
|
||||||
|
time_start = time.time()
|
||||||
|
subprocess.check_output('./jisspam books/*.txt', shell=True)
|
||||||
|
print("Time Jest Rust:", time.time() - time_start)
|
||||||
|
```
|
||||||
|
|
||||||
|
run: `python3 bench.py`
|
||||||
|
output looks something like this:
|
||||||
|
```
|
||||||
|
***benchmarking***
|
||||||
|
Time C: 31.315868377685547
|
||||||
|
Time Rust: 41.232205867767334
|
||||||
|
Time CPP: 20.1683189868927
|
||||||
|
Time Borded CPP: 15.468477964401245
|
||||||
|
Time Jest Rust: 54.74523115158081
|
||||||
|
Time Retoor Python: 287.63036131858826
|
||||||
|
***end benchmark***
|
||||||
|
```
|
||||||
|
|
||||||
|
add `/jisspam` to `.gitignore` to not commit the executable accidentally
|
@ -1,3 +1,4 @@
|
|||||||
|
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
||||||
use std::{env, fmt::Display, fs};
|
use std::{env, fmt::Display, fs};
|
||||||
|
|
||||||
static FORBIDDEN_WORDS: &'static [&'static str] = &[
|
static FORBIDDEN_WORDS: &'static [&'static str] = &[
|
||||||
@ -39,7 +40,7 @@ static FORBIDDEN_WORDS: &'static [&'static str] = &[
|
|||||||
];
|
];
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
struct Stats {
|
pub struct Stats {
|
||||||
file_count: u32,
|
file_count: u32,
|
||||||
failed_file_count: u32,
|
failed_file_count: u32,
|
||||||
|
|
||||||
@ -50,6 +51,48 @@ struct Stats {
|
|||||||
numeric_count: u32,
|
numeric_count: u32,
|
||||||
forbidden_count: u32,
|
forbidden_count: u32,
|
||||||
}
|
}
|
||||||
|
impl Stats {
|
||||||
|
pub fn process(&mut self, file: &str) {
|
||||||
|
let Ok(text) = fs::read_to_string(&file) else {
|
||||||
|
self.failed_file_count += 1;
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
self.file_count += 1;
|
||||||
|
for sentence in text
|
||||||
|
.split('.')
|
||||||
|
.map(|s| s.trim())
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
{
|
||||||
|
self.sentence_count += 1;
|
||||||
|
for word in sentence
|
||||||
|
.split_whitespace()
|
||||||
|
.map(|s| s.trim())
|
||||||
|
.filter(|s| !s.is_empty())
|
||||||
|
{
|
||||||
|
self.word_count += 1;
|
||||||
|
//get all numbers counted
|
||||||
|
let mut all_capitalized = true;
|
||||||
|
for char in word.chars() {
|
||||||
|
if char.is_numeric() {
|
||||||
|
self.numeric_count += 1;
|
||||||
|
}
|
||||||
|
if !char.is_ascii_uppercase() {
|
||||||
|
all_capitalized = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if all_capitalized {
|
||||||
|
self.capitalized_count += 1;
|
||||||
|
}
|
||||||
|
let lowercase_word = word.to_lowercase();
|
||||||
|
for forbidden_word in FORBIDDEN_WORDS {
|
||||||
|
if lowercase_word.contains(forbidden_word) {
|
||||||
|
self.forbidden_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
impl Display for Stats {
|
impl Display for Stats {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
writeln!(f, "file count: {}", self.file_count)?;
|
writeln!(f, "file count: {}", self.file_count)?;
|
||||||
@ -83,48 +126,16 @@ impl Display for Stats {
|
|||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let files = env::args().skip(1);
|
let files = env::args().skip(1);
|
||||||
let mut stats = Stats::default();
|
// let mut stats = Stats::default();
|
||||||
for file in files {
|
// for file in files {
|
||||||
let Ok(text) = fs::read_to_string(&file) else {
|
// stats.process(&file);
|
||||||
stats.failed_file_count += 1;
|
// }
|
||||||
continue;
|
let files = files.collect::<Vec<_>>();
|
||||||
};
|
files.par_iter().for_each(|file| {
|
||||||
stats.file_count += 1;
|
let mut stats = Stats::default();
|
||||||
for sentence in text
|
stats.process(&file);
|
||||||
.split('.')
|
println!("{stats}");
|
||||||
.map(|s| s.trim())
|
});
|
||||||
.filter(|s| !s.is_empty())
|
|
||||||
{
|
|
||||||
stats.sentence_count += 1;
|
|
||||||
for word in sentence
|
|
||||||
.split_whitespace()
|
|
||||||
.map(|s| s.trim())
|
|
||||||
.filter(|s| !s.is_empty())
|
|
||||||
{
|
|
||||||
stats.word_count += 1;
|
|
||||||
//get all numbers counted
|
|
||||||
let mut all_capitalized = true;
|
|
||||||
for char in word.chars() {
|
|
||||||
if char.is_numeric() {
|
|
||||||
stats.numeric_count += 1;
|
|
||||||
}
|
|
||||||
if !char.is_ascii_uppercase() {
|
|
||||||
all_capitalized = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if all_capitalized {
|
|
||||||
stats.capitalized_count += 1;
|
|
||||||
}
|
|
||||||
let lowercase_word = word.to_lowercase();
|
|
||||||
for forbidden_word in FORBIDDEN_WORDS {
|
|
||||||
if lowercase_word.contains(forbidden_word) {
|
|
||||||
stats.forbidden_count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
println!("{stats}");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
Reference in New Issue
Block a user