整点mut ptr

This commit is contained in:
shenjack 2024-05-15 00:16:15 +08:00
parent 3e8770c441
commit 0c9bc1a6be
Signed by: shenjack
GPG Key ID: 7B1134A979775551
3 changed files with 42 additions and 37 deletions

2
Cargo.lock generated
View File

@ -1831,7 +1831,7 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]] [[package]]
name = "tswn" name = "tswn"
version = "0.1.14" version = "0.1.15"
dependencies = [ dependencies = [
"base16384", "base16384",
"chrono", "chrono",

View File

@ -1,7 +1,7 @@
[package] [package]
name = "tswn" name = "tswn"
description = "tool shenjack work shop namerena" description = "tool shenjack work shop namerena"
version = "0.1.14" version = "0.1.15"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,6 +1,8 @@
use std::cmp::min; use std::cmp::min;
#[cfg(feature = "simd")] #[cfg(feature = "simd")]
use std::simd::u8x64; // 考虑到 avx512 的普及性
// 咱还是用 u8x32 吧
use std::simd::u8x32;
use tracing::warn; use tracing::warn;
@ -165,13 +167,13 @@ impl Namer {
{ {
let mut simd_val = val; let mut simd_val = val;
let mut simd_val_b = [0_u8; 256]; let mut simd_val_b = [0_u8; 256];
let simd_181 = u8x64::splat(181); let simd_181 = u8x32::splat(181);
let simd_160 = u8x64::splat(160); let simd_160 = u8x32::splat(160);
let simd_63 = u8x64::splat(63); let simd_63 = u8x32::splat(63);
for i in (0..256).step_by(64) { for i in (0..256).step_by(32) {
// 一次性加载64个数字 // 一次性加载64个数字
let mut x = u8x64::from_slice(unsafe { simd_val.get_unchecked(i..) }); let mut x = u8x32::from_slice(unsafe { simd_val.get_unchecked(i..) });
x = x * simd_181 + simd_160; x = x * simd_181 + simd_160;
// 写入到 simd_val // 写入到 simd_val
unsafe { unsafe {
@ -313,43 +315,46 @@ impl Namer {
let name_bytes = name.as_bytes(); let name_bytes = name.as_bytes();
let name_len = name_bytes.len(); let name_len = name_bytes.len();
let b_name_len = name_len + 1;
for _ in 0..2 { unsafe {
let mut s = 0_u8; let val_ptr = self.val.as_mut_ptr();
unsafe { for _ in 0..2 {
self.val.swap_unchecked(s as usize, 0); let mut s = 0_u8;
let mut k = 0; // self.val.swap_unchecked(s as usize, 0);
for i in 0..256 { std::ptr::swap(val_ptr.add(s as usize), val_ptr);
s = s.wrapping_add(if k == 0 { 0 } else { *name_bytes.get_unchecked(k - 1) }); let mut k = 0;
s = s.wrapping_add(*self.val.get_unchecked(i)); for i in 0..256 {
self.val.swap_unchecked(i, s as usize); s = s.wrapping_add(if k == 0 { 0 } else { *name_bytes.get_unchecked(k - 1) });
k = if k == b_name_len - 1 { 0 } else { k + 1 }; s = s.wrapping_add(*self.val.get_unchecked(i));
} // self.val.swap_unchecked(i, s as usize);
std::ptr::swap(val_ptr.add(i), val_ptr.add(s as usize));
k = if k == name_len { 0 } else { k + 1 };
}
} }
} }
// simd! // simd!
#[cfg(feature = "simd")] #[cfg(feature = "simd")]
{ {
let mut simd_val = [0_u8; 256]; let mut simd_val = [0_u8; 256];
let mut simd_val_b = [0_u8; 256]; let mut simd_val_b = [0_u8; 256];
let simd_181 = u8x64::splat(181); let simd_181 = u8x32::splat(181);
let simd_160 = u8x64::splat(160); let simd_160 = u8x32::splat(160);
let simd_63 = u8x64::splat(63); let simd_63 = u8x32::splat(63);
for i in (0..256).step_by(64) { for i in (0..256).step_by(32) {
unsafe { unsafe {
let mut x = u8x64::from_slice(self.val.get_unchecked(i..i + 64)); let mut x = u8x32::from_slice(self.val.get_unchecked(i..));
x = x * simd_181 + simd_160; x = x * simd_181 + simd_160;
x.copy_to_slice(simd_val.get_unchecked_mut(i..i + 64)); x.copy_to_slice(simd_val.get_unchecked_mut(i..));
let y = x & simd_63; let y = x & simd_63;
y.copy_to_slice(simd_val_b.get_unchecked_mut(i..i + 64)); y.copy_to_slice(simd_val_b.get_unchecked_mut(i..));
} }
} }
let mut mod_count = 0; let mut mod_count = 0;
for i in 0..96 { for i in 0..96 {
unsafe { unsafe {
if simd_val.get_unchecked(i) > &88 && simd_val.get_unchecked(i) < &217 { if *simd_val.get_unchecked(i) > 88 && *simd_val.get_unchecked(i) < 217 {
*self.name_base.get_unchecked_mut(mod_count as usize) = *simd_val_b.get_unchecked(i); *self.name_base.get_unchecked_mut(mod_count as usize) = *simd_val_b.get_unchecked(i);
mod_count += 1; mod_count += 1;
} }
@ -452,17 +457,17 @@ impl Namer {
{ {
let mut simd_val = self.val; let mut simd_val = self.val;
let mut simd_val_b = self.val; let mut simd_val_b = self.val;
let simd_181 = u8x64::splat(181); let simd_181 = u8x32::splat(181);
let simd_199 = u8x64::splat(199); let simd_199 = u8x32::splat(199);
let simd_128 = u8x64::splat(128); let simd_128 = u8x32::splat(128);
let simd_53 = u8x64::splat(53); let simd_53 = u8x32::splat(53);
let simd_63 = u8x64::splat(63); let simd_63 = u8x32::splat(63);
let simd_32 = u8x64::splat(32); let simd_32 = u8x32::splat(32);
for i in (0..256).step_by(64) { for i in (0..256).step_by(32) {
unsafe { unsafe {
let mut x = u8x64::from_slice(simd_val.get_unchecked(i..)); let mut x = u8x32::from_slice(simd_val.get_unchecked(i..));
let mut y = u8x64::from_slice(simd_val_b.get_unchecked(i..)); let mut y = u8x32::from_slice(simd_val_b.get_unchecked(i..));
x = (x * simd_181 + simd_199) & simd_128; x = (x * simd_181 + simd_199) & simd_128;
y = (y * simd_53) & simd_63 ^ simd_32; y = (y * simd_53) & simd_63 ^ simd_32;
x.copy_to_slice(simd_val.get_unchecked_mut(i..)); x.copy_to_slice(simd_val.get_unchecked_mut(i..));