整点mut ptr

This commit is contained in:
shenjack 2024-05-15 00:16:15 +08:00
parent 3e8770c441
commit 0c9bc1a6be
Signed by: shenjack
GPG Key ID: 7B1134A979775551
3 changed files with 42 additions and 37 deletions

2
Cargo.lock generated
View File

@ -1831,7 +1831,7 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "tswn"
version = "0.1.14"
version = "0.1.15"
dependencies = [
"base16384",
"chrono",

View File

@ -1,7 +1,7 @@
[package]
name = "tswn"
description = "tool shenjack work shop namerena"
version = "0.1.14"
version = "0.1.15"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,6 +1,8 @@
use std::cmp::min;
#[cfg(feature = "simd")]
use std::simd::u8x64;
// 考虑到 avx512 的普及性
// 咱还是用 u8x32 吧
use std::simd::u8x32;
use tracing::warn;
@ -165,13 +167,13 @@ impl Namer {
{
let mut simd_val = val;
let mut simd_val_b = [0_u8; 256];
let simd_181 = u8x64::splat(181);
let simd_160 = u8x64::splat(160);
let simd_63 = u8x64::splat(63);
let simd_181 = u8x32::splat(181);
let simd_160 = u8x32::splat(160);
let simd_63 = u8x32::splat(63);
for i in (0..256).step_by(64) {
for i in (0..256).step_by(32) {
// 一次性加载64个数字
let mut x = u8x64::from_slice(unsafe { simd_val.get_unchecked(i..) });
let mut x = u8x32::from_slice(unsafe { simd_val.get_unchecked(i..) });
x = x * simd_181 + simd_160;
// 写入到 simd_val
unsafe {
@ -313,43 +315,46 @@ impl Namer {
let name_bytes = name.as_bytes();
let name_len = name_bytes.len();
let b_name_len = name_len + 1;
unsafe {
let val_ptr = self.val.as_mut_ptr();
for _ in 0..2 {
let mut s = 0_u8;
unsafe {
self.val.swap_unchecked(s as usize, 0);
// self.val.swap_unchecked(s as usize, 0);
std::ptr::swap(val_ptr.add(s as usize), val_ptr);
let mut k = 0;
for i in 0..256 {
s = s.wrapping_add(if k == 0 { 0 } else { *name_bytes.get_unchecked(k - 1) });
s = s.wrapping_add(*self.val.get_unchecked(i));
self.val.swap_unchecked(i, s as usize);
k = if k == b_name_len - 1 { 0 } else { k + 1 };
// self.val.swap_unchecked(i, s as usize);
std::ptr::swap(val_ptr.add(i), val_ptr.add(s as usize));
k = if k == name_len { 0 } else { k + 1 };
}
}
}
// simd!
#[cfg(feature = "simd")]
{
let mut simd_val = [0_u8; 256];
let mut simd_val_b = [0_u8; 256];
let simd_181 = u8x64::splat(181);
let simd_160 = u8x64::splat(160);
let simd_63 = u8x64::splat(63);
let simd_181 = u8x32::splat(181);
let simd_160 = u8x32::splat(160);
let simd_63 = u8x32::splat(63);
for i in (0..256).step_by(64) {
for i in (0..256).step_by(32) {
unsafe {
let mut x = u8x64::from_slice(self.val.get_unchecked(i..i + 64));
let mut x = u8x32::from_slice(self.val.get_unchecked(i..));
x = x * simd_181 + simd_160;
x.copy_to_slice(simd_val.get_unchecked_mut(i..i + 64));
x.copy_to_slice(simd_val.get_unchecked_mut(i..));
let y = x & simd_63;
y.copy_to_slice(simd_val_b.get_unchecked_mut(i..i + 64));
y.copy_to_slice(simd_val_b.get_unchecked_mut(i..));
}
}
let mut mod_count = 0;
for i in 0..96 {
unsafe {
if simd_val.get_unchecked(i) > &88 && simd_val.get_unchecked(i) < &217 {
if *simd_val.get_unchecked(i) > 88 && *simd_val.get_unchecked(i) < 217 {
*self.name_base.get_unchecked_mut(mod_count as usize) = *simd_val_b.get_unchecked(i);
mod_count += 1;
}
@ -452,17 +457,17 @@ impl Namer {
{
let mut simd_val = self.val;
let mut simd_val_b = self.val;
let simd_181 = u8x64::splat(181);
let simd_199 = u8x64::splat(199);
let simd_128 = u8x64::splat(128);
let simd_53 = u8x64::splat(53);
let simd_63 = u8x64::splat(63);
let simd_32 = u8x64::splat(32);
let simd_181 = u8x32::splat(181);
let simd_199 = u8x32::splat(199);
let simd_128 = u8x32::splat(128);
let simd_53 = u8x32::splat(53);
let simd_63 = u8x32::splat(63);
let simd_32 = u8x32::splat(32);
for i in (0..256).step_by(64) {
for i in (0..256).step_by(32) {
unsafe {
let mut x = u8x64::from_slice(simd_val.get_unchecked(i..));
let mut y = u8x64::from_slice(simd_val_b.get_unchecked(i..));
let mut x = u8x32::from_slice(simd_val.get_unchecked(i..));
let mut y = u8x32::from_slice(simd_val_b.get_unchecked(i..));
x = (x * simd_181 + simd_199) & simd_128;
y = (y * simd_53) & simd_63 ^ simd_32;
x.copy_to_slice(simd_val.get_unchecked_mut(i..));