Improve rust performance with some initial simd stuff
This commit is contained in:
parent
8224a609c5
commit
dc3e47f365
@ -3,3 +3,7 @@ rustflags = [
|
||||
"-Ztls-model=initial-exec",
|
||||
"-Ctarget-cpu=native"
|
||||
]
|
||||
|
||||
#[unstable]
|
||||
#build-std = ["compiler_builtins", "alloc", "std", "panic_abort"] # choose only what you need
|
||||
#build-std-features = ["compiler-builtins-mem"]
|
||||
|
||||
66
12bitfloat_rust/risspam/Cargo.lock
generated
66
12bitfloat_rust/risspam/Cargo.lock
generated
@ -2,17 +2,6 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "core_affinity"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a034b3a7b624016c6e13f5df875747cc25f884156aad2abd12b6c46797971342"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"num_cpus",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.5"
|
||||
@ -44,37 +33,6 @@ version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.176"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.10.0"
|
||||
@ -99,29 +57,5 @@ dependencies = [
|
||||
name = "risspam"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"core_affinity",
|
||||
"memmap2",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
@ -11,8 +11,10 @@ debug = "line-tables-only"
|
||||
|
||||
[dependencies]
|
||||
rayon = "1.10.0"
|
||||
memmap2 = "0.9.8"
|
||||
core_affinity = "0.8.3"
|
||||
#mimalloc = "0.1.48"
|
||||
#io-uring = "0.7"
|
||||
#memmap2 = "0.9.8"
|
||||
#core_affinity = "0.8.3"
|
||||
#libc = "0.2.176"
|
||||
#glommio = "0.9.0"
|
||||
#monoio = "0.2.4"
|
||||
|
||||
@ -5,6 +5,8 @@
|
||||
|
||||
//mod books;
|
||||
|
||||
mod uring;
|
||||
|
||||
use rayon::prelude::*;
|
||||
use std::cell::RefCell;
|
||||
use std::ffi::OsStr;
|
||||
@ -15,6 +17,10 @@ use std::thread::available_parallelism;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{array, env, hint, process};
|
||||
|
||||
// NOTE: mimalloc is actually slower
|
||||
//#[global_allocator]
|
||||
//static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
#[inline]
|
||||
fn is_ascii_whitespace(b: u8) -> bool {
|
||||
matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ')
|
||||
@ -74,9 +80,9 @@ extern "rust-cold" fn die() -> ! {
|
||||
|
||||
fn work(file_path: &OsStr, stats: &mut Stats) {
|
||||
WORK_STATE.with_borrow_mut(|state: &mut WorkState| {
|
||||
// // Load file
|
||||
// let start_time = Instant::now();
|
||||
|
||||
// Load file
|
||||
let start_time = Instant::now();
|
||||
|
||||
// let Ok(text) = fs::read(file_path) else {
|
||||
// eprintln!("invalid file!");
|
||||
// process::abort();
|
||||
@ -102,19 +108,260 @@ fn work(file_path: &OsStr, stats: &mut Stats) {
|
||||
read_offset += rb;
|
||||
}
|
||||
let text = &state.work_mem[..read_offset];
|
||||
|
||||
|
||||
// file.read_exact(&mut state.work_mem[..file_len]).unwrap();
|
||||
|
||||
|
||||
// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice();
|
||||
|
||||
let time_reading = start_time.elapsed();
|
||||
{
|
||||
let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
||||
*guard += time_reading;
|
||||
}
|
||||
|
||||
// analyze(&text, stats);
|
||||
// analyze2(&text, stats);
|
||||
analyze_simd(&text, stats);
|
||||
});
|
||||
}
|
||||
|
||||
// let time_reading = start_time.elapsed();
|
||||
// {
|
||||
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
||||
// *guard += time_reading;
|
||||
fn analyze_simd(text: &[u8], stats: &mut Stats) {
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
let mut sentences = 0;
|
||||
let mut words = 0;
|
||||
let mut capitalizeds = 0;
|
||||
let mut numbers = 0;
|
||||
let mut forbiddens = 0;
|
||||
|
||||
const BLOCK_ITEMS: usize = 32; // MUST be 32! 256 bit ymm regs
|
||||
const REGION_NUM_BLOCKS: usize = 16;
|
||||
|
||||
if text.len() < (BLOCK_ITEMS * REGION_NUM_BLOCKS) {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut offset = 0;
|
||||
while offset < text.len() {
|
||||
for block_idx in 0..REGION_NUM_BLOCKS {
|
||||
// Get block ptr from mem
|
||||
let block_slice: &[u8; 32] = unsafe {
|
||||
&*text.as_ptr()
|
||||
.byte_offset((offset + block_idx * BLOCK_ITEMS) as isize)
|
||||
.cast()
|
||||
};
|
||||
|
||||
// NOTE: This has *truly* terrible codegen
|
||||
// for i in 0..32 {
|
||||
// dots += (chunk[i] == b'.') as u32;
|
||||
// numbers += (chunk[i] >= b'0' && chunk[i] <= b'9') as u32;
|
||||
// }
|
||||
|
||||
unsafe {
|
||||
let m_chars = _mm256_loadu_si256(block_slice.as_ptr().cast());
|
||||
|
||||
// Count dots
|
||||
let m_dots_masks = _mm256_cmpeq_epi8(m_chars, _mm256_set1_epi8(b'.' as i8));
|
||||
sentences += i32::count_ones(_mm256_movemask_epi8(m_dots_masks));
|
||||
|
||||
// Count digits
|
||||
let m_ge_0_masks = _mm256_cmpgt_epi8(m_chars, _mm256_set1_epi8(b'0' as i8 - 1));
|
||||
let m_le_9_masks = _mm256_cmpgt_epi8(_mm256_set1_epi8(b'9' as i8 + 1), m_chars);
|
||||
numbers += i32::count_ones(_mm256_movemask_epi8(_mm256_and_si256(m_ge_0_masks, m_le_9_masks)));
|
||||
}
|
||||
}
|
||||
|
||||
offset += BLOCK_ITEMS * REGION_NUM_BLOCKS;
|
||||
}
|
||||
|
||||
if text.len() == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut idx = 0;
|
||||
'full_loop: loop {
|
||||
unsafe {
|
||||
hint::assert_unchecked(idx < text.len());
|
||||
}
|
||||
|
||||
// Skip whitespace
|
||||
while is_ascii_whitespace(text[idx]) { // Pretty sure this is UB. There have been out of bounds panics
|
||||
idx += 1;
|
||||
if hint::unlikely(idx >= text.len()) {
|
||||
break 'full_loop;
|
||||
}
|
||||
}
|
||||
|
||||
// Find end of word
|
||||
let word_start = idx;
|
||||
let mut has_non_upper = false;
|
||||
|
||||
'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) {
|
||||
idx += 1;
|
||||
if hint::unlikely(idx >= text.len()) {
|
||||
break 'find_word_end;
|
||||
}
|
||||
|
||||
if !is_ascii_upper(b) {
|
||||
has_non_upper = true;
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
hint::assert_unchecked(word_start < idx && idx <= text.len());
|
||||
}
|
||||
|
||||
let word = &text[word_start..idx];
|
||||
|
||||
words += 1;
|
||||
|
||||
if !has_non_upper {
|
||||
capitalizeds += 1;
|
||||
}
|
||||
|
||||
// Check forbidden
|
||||
if unsafe { FW_TAB.lookup(word) } {
|
||||
// if FW_PHF.contains(word) { // phf is a lot slower than my FwTab
|
||||
forbiddens += 1;
|
||||
}
|
||||
}
|
||||
|
||||
stats.sentences = sentences;
|
||||
stats.words = words;
|
||||
stats.capitalizeds = capitalizeds;
|
||||
stats.numbers = numbers;
|
||||
stats.forbiddens = forbiddens;
|
||||
}
|
||||
|
||||
fn analyze2(text: &[u8], stats: &mut Stats) {
|
||||
// // NOTE: mmap is quite a bit slower
|
||||
// // Load file
|
||||
// let Ok(file) = File::open(file_path) else {
|
||||
// eprintln!("invalid file!");
|
||||
// std::process::abort();
|
||||
// };
|
||||
// let mmap = unsafe {
|
||||
// Mmap::map(&file).unwrap()
|
||||
// };
|
||||
// mem::forget(file);
|
||||
// let text = &*mmap;
|
||||
|
||||
// // Load file
|
||||
// let start_time = Instant::now();
|
||||
// let Ok(text) = fs::read(file_path) else {
|
||||
// eprintln!("invalid file!");
|
||||
// process::abort();
|
||||
// };
|
||||
// let time_reading = start_time.elapsed();
|
||||
// {
|
||||
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
||||
// *guard += time_reading;
|
||||
// }
|
||||
|
||||
let mut sentences = 0;
|
||||
let mut words = 0;
|
||||
let mut capitalizeds = 0;
|
||||
let mut numbers = 0;
|
||||
let mut forbiddens = 0;
|
||||
|
||||
// __A_
|
||||
|
||||
// B_BB
|
||||
|
||||
if text.len() == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut idx = 0;
|
||||
'full_loop: loop {
|
||||
unsafe {
|
||||
hint::assert_unchecked(idx < text.len());
|
||||
}
|
||||
|
||||
// Skip whitespace
|
||||
while is_ascii_whitespace(text[idx]) { // Pretty sure this is UB. There have been out of bounds panics
|
||||
idx += 1;
|
||||
if hint::unlikely(idx >= text.len()) {
|
||||
break 'full_loop;
|
||||
}
|
||||
}
|
||||
|
||||
// Find end of word
|
||||
let word_start = idx;
|
||||
let mut has_non_upper = false;
|
||||
|
||||
'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) {
|
||||
idx += 1;
|
||||
if hint::unlikely(idx >= text.len()) {
|
||||
break 'find_word_end;
|
||||
}
|
||||
|
||||
if !is_ascii_upper(b) {
|
||||
has_non_upper = true;
|
||||
}
|
||||
if b == b'.' {
|
||||
sentences += 1;
|
||||
}
|
||||
if is_ascii_digit(b) {
|
||||
numbers += 1;
|
||||
}
|
||||
|
||||
// sentences += (b == b'.') as u32;
|
||||
// numbers += is_ascii_digit(b) as u32;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
hint::assert_unchecked(word_start < idx && idx <= text.len());
|
||||
}
|
||||
|
||||
let word = &text[word_start..idx];
|
||||
|
||||
// Per-char logic
|
||||
// for &b in word {
|
||||
// if !is_ascii_upper(b) {
|
||||
// has_non_upper = true;
|
||||
// }
|
||||
// if b == b'.' {
|
||||
// sentences += 1;
|
||||
// }
|
||||
// if is_ascii_digit(b) {
|
||||
// numbers += 1;
|
||||
// }
|
||||
// }
|
||||
// for &b in word {
|
||||
// if !is_ascii_upper(b) {
|
||||
// has_non_upper = true;
|
||||
// }
|
||||
// }
|
||||
// for &b in word {
|
||||
// if b == b'.' {
|
||||
// sentences += 1;
|
||||
// }
|
||||
// }
|
||||
// for &b in word {
|
||||
// if is_ascii_digit(b) {
|
||||
// numbers += 1;
|
||||
// }
|
||||
// }
|
||||
|
||||
analyze(&text, stats);
|
||||
});
|
||||
words += 1;
|
||||
|
||||
if !has_non_upper {
|
||||
capitalizeds += 1;
|
||||
}
|
||||
|
||||
// Check forbidden
|
||||
if unsafe { FW_TAB.lookup(word) } {
|
||||
// if FW_PHF.contains(word) { // phf is a lot slower than my FwTab
|
||||
forbiddens += 1;
|
||||
}
|
||||
}
|
||||
|
||||
stats.sentences = sentences;
|
||||
stats.words = words;
|
||||
stats.capitalizeds = capitalizeds;
|
||||
stats.numbers = numbers;
|
||||
stats.forbiddens = forbiddens;
|
||||
}
|
||||
|
||||
fn analyze(text: &[u8], stats: &mut Stats) {
|
||||
@ -148,6 +395,14 @@ fn analyze(text: &[u8], stats: &mut Stats) {
|
||||
let mut numbers = 0;
|
||||
let mut forbiddens = 0;
|
||||
|
||||
// __A_
|
||||
|
||||
// B_BB
|
||||
|
||||
if text.len() == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut idx = 0;
|
||||
'full_loop: loop {
|
||||
// Skip whitespace
|
||||
@ -184,7 +439,7 @@ fn analyze(text: &[u8], stats: &mut Stats) {
|
||||
|
||||
let word = &text[word_start..idx];
|
||||
// let word = unsafe { &text.get_unchecked(word_start..idx) };
|
||||
|
||||
|
||||
// dbg!(str::from_utf8(word).unwrap());
|
||||
|
||||
words += 1;
|
||||
@ -360,6 +615,12 @@ fn analyze_old(file_path: &OsStr, stats: &mut Stats) {
|
||||
*/
|
||||
|
||||
fn main() {
|
||||
// // DEBUG:
|
||||
// uring::test();
|
||||
// if 1 == 1 {
|
||||
// return;
|
||||
// }
|
||||
|
||||
// Read in files from args
|
||||
let mut files = Vec::with_capacity(env::args().len());
|
||||
// let mut do_parallel = false;
|
||||
|
||||
8
12bitfloat_rust/risspam/src/uring.rs
Normal file
8
12bitfloat_rust/risspam/src/uring.rs
Normal file
@ -0,0 +1,8 @@
|
||||
|
||||
pub fn test() {
|
||||
// let ring = io_uring::Builder::<io_uring::squeue::Entry, io_uring::cqueue::Entry>::default()
|
||||
// .build(128)
|
||||
// .unwrap();
|
||||
//
|
||||
// ring.
|
||||
}
|
||||
2
Makefile
2
Makefile
@ -33,6 +33,8 @@ run: run_spam wl run_not_spam
|
||||
run_risspam: run_spam_risspam run_not_spam_risspam
|
||||
|
||||
bench_rust: build_risspam benchmark_only
|
||||
bench_rust_only: build_risspam
|
||||
cd 12bitfloat_rust/risspam && time ./target/release/risspam ../../books/*.txt
|
||||
|
||||
format:
|
||||
clang-format *.c *.h -i
|
||||
|
||||
Loading…
Reference in New Issue
Block a user