This commit is contained in:
shenjack 2024-05-25 12:16:03 +08:00
parent 76c3928313
commit 344d6abd45
Signed by: shenjack
GPG Key ID: 7B1134A979775551
8 changed files with 183 additions and 85 deletions

2
Cargo.lock generated
View File

@ -1831,7 +1831,7 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "tswn"
version = "0.2.16"
version = "0.3.0"
dependencies = [
"base16384",
"chrono",

View File

@ -1,7 +1,7 @@
[package]
name = "tswn"
description = "tool shenjack work shop namerena"
version = "0.2.16"
version = "0.3.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -6,6 +6,35 @@
>
> 也是 target-cpu=native
## 0.2.17
```text
.\runs\tswn-0217.exe --team shenjacka -q 5500 --end 1000000000 --bench --bench-core 10
2024-05-24T17:45:13.668057Z INFO tswn: 输出文件: "./namerena/namerena-shenjacka-2024-05-25_01-45-13.csv"
2024-05-24T17:45:13.668234Z INFO tswn: 开始: 0 结尾: 1000000000
2024-05-24T17:45:13.668263Z INFO tswn: 线程数: 10
2024-05-24T17:45:13.668288Z INFO tswn: 八围预期: 640
2024-05-24T17:45:13.668312Z INFO tswn: 队伍名: shenjacka
2024-05-24T17:45:13.668336Z INFO tswn: 输出文件名: "./namerena/namerena-shenjacka-2024-05-25_01-45-13.csv"
2024-05-24T17:45:13.668360Z INFO tswn: 预期状态输出时间间隔: 10 秒
2024-05-24T17:45:13.668384Z INFO tswn: 是否启动 benchmark 模式: true
2024-05-24T17:45:13.668409Z INFO tswn: 开始 benchmark
2024-05-24T17:45:13.668443Z INFO tswn: 设置进程亲和性成功 1
2024-05-24T17:45:13.668489Z INFO tswn: 设置线程亲和性成功 1024
2024-05-24T17:45:13.786660Z INFO tswn::cacluate: | 1|Id: 999991|846240.62/s 731.152E/d 0.12⬆️|0 |预计:0:1:58|
2024-05-24T17:45:23.851392Z INFO tswn::cacluate: | 1|Id: 85623991|840805.41/s 726.456E/d 10.06⬇️|0 |预计:0:1:48|
2024-05-24T17:45:33.986282Z INFO tswn::cacluate: | 1|Id: 169704491|829621.17/s 716.793E/d 10.13⬇️|0 |预计:0:1:40|
2024-05-24T17:45:43.811321Z INFO tswn::cacluate: | 1|Id: 252666591|844401.73/s 729.563E/d 9.82⬆️|0 |预计:0:1:28|
2024-05-24T17:45:54.027554Z INFO tswn::cacluate: | 1|Id: 337106691|826536.42/s 714.127E/d 10.22⬇️|0 |预计:0:1:20|
2024-05-24T17:46:03.814511Z INFO tswn::cacluate: | 1|Id: 419760291|844535.37/s 729.679E/d 9.79⬆️|0 |预计:0:1:8|
2024-05-24T17:46:13.819607Z INFO tswn::cacluate: | 1|Id: 504213791|844115.45/s 729.316E/d 10.00⬇️|0 |预计:0:0:58|
2024-05-24T17:46:23.977954Z INFO tswn::cacluate: | 1|Id: 588625291|830964.22/s 717.953E/d 10.16⬇️|0 |预计:0:0:49|
2024-05-24T17:46:33.817530Z INFO tswn::cacluate: | 1|Id: 671721691|844519.05/s 729.664E/d 9.84⬆️|0 |预计:0:0:38|
2024-05-24T17:46:43.816524Z INFO tswn::cacluate: | 1|Id: 756173591|844612.89/s 729.746E/d 10.00⬆️|0 |预计:0:0:28|
2024-05-24T17:46:53.819867Z INFO tswn::cacluate: | 1|Id: 840634791|844342.60/s 729.512E/d 10.00⬇️|0 |预计:0:0:18|
2024-05-24T17:47:03.835116Z INFO tswn::cacluate: | 1|Id: 925068991|843065.55/s 728.409E/d 10.02⬇️|0 |预计:0:0:8|
```
## 0.2.16
```text

View File

@ -1,5 +1,23 @@
# tswn 的 更新
## 0.3.x
### 0.3.0
`--bench-core` 改成了 `--core-pick`
同时也支持在单线程运行的时候跑在指定的核心上了
然后加入了强制的过滤机制
> 就是卡常
反正现在在我的 5800x 上能跑到 700E/d (单线程) 了
> 我预计在后面加一下多线程运行的时候加个总计效率
## 0.2.15~16
一些提前过滤的东西
## 0.1.x
### 0.1.6~9

View File

@ -59,10 +59,6 @@ pub fn cacl(config: CacluateConfig, id: u64, outfile: &PathBuf) {
let mut main_namer = Namer::new_from_team_namer_unchecked(&team_namer, "dummy");
for i in (config.start + id..config.end).step_by(config.thread_count as usize) {
let name = gen_name(i);
main_namer.replace_name(&team_namer, &name);
let prop = main_namer.get_property();
k += 1;
if k >= report_interval {
let now = std::time::Instant::now();
@ -100,6 +96,14 @@ pub fn cacl(config: CacluateConfig, id: u64, outfile: &PathBuf) {
start_time = std::time::Instant::now();
k = 0;
}
// 这堆操作放在这边了, 保证统计没问题
let name = gen_name(i);
// 新加的提前检测
if !main_namer.replace_name(&team_namer, &name) {
continue;
}
// println!("{} {}", i, name);
let prop = main_namer.get_property();
if prop > config.prop_expect as f32 {
let name = gen_name(i);
@ -110,7 +114,7 @@ pub fn cacl(config: CacluateConfig, id: u64, outfile: &PathBuf) {
let xu = crate::evaluate::xuping::XuPing2_0_1015::evaluate(&main_namer);
let xu_qd = crate::evaluate::xuping::XuPing2_0_1015_QD::evaluate(&main_namer);
if xu < config.qp_expect as f64 && xu_qd < config.qp_expect as f64 {
if xu < config.qp_expect as f64 || xu_qd < config.qp_expect as f64 {
continue;
}

View File

@ -1,7 +1,5 @@
/// 虚评 1.3.1
#[cfg(feature = "simd")]
use std::simd::f64x4;
#[cfg(feature = "simd")]
use std::simd::f64x64;
#[cfg(feature = "simd")]
@ -213,17 +211,17 @@ pub fn predict_20(name: &Namer) -> f64 {
unsafe {
#[cfg(feature = "simd")]
{
let mut simd_sum = f64x4::splat(0.0);
// 加载四个, 计算, 然后加
for i in (0..xp.len() - 2).step_by(4) {
let simd_xp = f64x4::from_slice(&xp.get_unchecked(i..));
let simd_model = f64x4::from_slice(&xuping20::MODEL.get_unchecked(i..));
let mut simd_sum = f64x64::splat(0.0);
for i in (0..1024).step_by(64) {
let simd_xp = f64x64::from_slice(xp.get_unchecked(i..));
let simd_model = f64x64::from_slice(xuping20::MODEL.get_unchecked(i..));
simd_sum += simd_xp * simd_model;
}
// 最后加两个
sum += simd_sum.reduce_sum();
sum += xp.get_unchecked(xp.len() - 2) * xuping20::MODEL.get_unchecked(xp.len() - 2);
sum += xp.get_unchecked(xp.len() - 1) * xuping20::MODEL.get_unchecked(xp.len() - 1);
// 剩10个
for i in 0..10 {
sum += xp[i + 1024] * xuping20::MODEL[i + 1024];
}
}
#[cfg(not(feature = "simd"))]
{
@ -271,24 +269,20 @@ pub fn predict_20_qd(name: &Namer) -> f64 {
let mut sum = xuping20::BASE_QD;
// for (i, xp) in xp.iter().enumerate() {
// sum_qd += xp * xuping20::MODEL_QD[i];
// }
unsafe {
#[cfg(feature = "simd")]
{
let mut simd_sum = f64x4::splat(0.0);
// 加载四个, 计算, 然后加
for i in (0..xp.len() - 2).step_by(4) {
let simd_xp = f64x4::from_slice(&xp.get_unchecked(i..));
let simd_model = f64x4::from_slice(&xuping20::MODEL_QD.get_unchecked(i..));
let mut simd_sum = f64x64::splat(0.0);
for i in (0..1024).step_by(64) {
let simd_xp = f64x64::from_slice(xp.get_unchecked(i..));
let simd_model = f64x64::from_slice(xuping20::MODEL_QD.get_unchecked(i..));
simd_sum += simd_xp * simd_model;
}
// 最后加两个
sum += simd_sum.reduce_sum();
sum += xp.get_unchecked(xp.len() - 2) * xuping20::MODEL_QD.get_unchecked(xp.len() - 2);
sum += xp.get_unchecked(xp.len() - 1) * xuping20::MODEL_QD.get_unchecked(xp.len() - 1);
// 剩10个
for i in 0..10 {
sum += xp[i + 1024] * xuping20::MODEL_QD[i + 1024];
}
}
#[cfg(not(feature = "simd"))]
{
@ -331,7 +325,7 @@ mod test {
#[cfg(not(feature = "simd"))]
assert_eq!(predict_20(&namer), 3603.4389333619297);
#[cfg(feature = "simd")]
assert_eq!(predict_20(&namer), 3603.438933361928);
assert_eq!(predict_20(&namer), 3603.4389333619315);
}
#[test]
@ -343,8 +337,8 @@ mod test {
println!("{:?}", namer.get_info());
#[cfg(not(feature = "simd"))]
assert_eq!(predict_20_qd(&namer), 3603.4389333619297);
#[cfg(feature = "simd")]
assert_eq!(predict_20_qd(&namer), 3639.8920896688987);
#[cfg(feature = "simd")]
assert_eq!(predict_20_qd(&namer), 3639.8920896689424);
}
}

View File

@ -39,9 +39,9 @@ pub struct Command {
/// Windows 下会强制单线程, 且设置线程亲和性为核心 0
#[arg(long = "bench", default_value_t = false)]
pub bench: bool,
/// benchmark 模式下的核心亲和性核心号 (从 0 开始)
#[arg(long = "bench-core", default_value_t = 0)]
pub bench_core: usize,
/// 单线程模式 / benchmark 模式下的核心亲和性核心号 (从 0 开始)
#[arg(long = "core-pick", default_value_t = 0)]
pub pick_core: usize,
}
impl Command {
@ -54,7 +54,7 @@ impl Command {
qp_expect: self.qp_expect,
team: self.team.clone(),
report_interval: self.report_interval,
core_affinity: if self.bench { Some(1 << self.bench_core) } else { None },
core_affinity: if self.bench { Some(1 << self.pick_core) } else { None },
}
}
}
@ -124,28 +124,37 @@ fn main() {
if cli_arg.bench {
info!("开始 benchmark");
cli_arg.thread_count = 1;
let mut config = cli_arg.as_cacl_config();
config.core_affinity = Some(1 << cli_arg.bench_core);
config.core_affinity = Some(1 << cli_arg.pick_core);
set_process_cores(config.core_affinity.unwrap());
cacluate::cacl(config, 1, &out_path);
} else {
let mut n = 0;
let mut cores = 0;
for i in 0..cli_arg.thread_count {
n += 1;
if cli_arg.thread_count == 1 {
// 单线程运行的时候也是让他放在主线程跑
let mut config = cli_arg.as_cacl_config();
// 核心亲和性: n, n+1
config.core_affinity = Some(1 << i);
cores |= 1 << i;
let out_path = out_path.clone();
let thread_name = format!("thread_{}", n);
threads.push(std::thread::spawn(move || {
info!("线程 {} 开始计算", thread_name);
cacluate::cacl(config, n, &out_path);
info!("线程 {} 结束计算", thread_name);
}));
config.core_affinity = Some(1 << cli_arg.pick_core);
set_process_cores(config.core_affinity.unwrap());
cacluate::cacl(config, 1, &out_path);
} else {
for i in 0..cli_arg.thread_count {
n += 1;
let mut config = cli_arg.as_cacl_config();
// 核心亲和性: n, n+1
config.core_affinity = Some(1 << i);
cores |= 1 << i;
let out_path = out_path.clone();
let thread_name = format!("thread_{}", n);
threads.push(std::thread::spawn(move || {
info!("线程 {} 开始计算", thread_name);
cacluate::cacl(config, n, &out_path);
info!("线程 {} 结束计算", thread_name);
}));
}
set_process_cores(cores);
}
set_process_cores(cores);
}
for t in threads {

View File

@ -310,7 +310,7 @@ impl Namer {
/// 更新当前的名字
#[inline(always)]
pub fn replace_name(&mut self, team_namer: &TeamNamer, name: &str) {
pub fn replace_name(&mut self, team_namer: &TeamNamer, name: &str) -> bool {
self.val = team_namer.clone_vals();
self.name = name.to_string();
@ -387,21 +387,20 @@ impl Namer {
}
// 计算 name_prop
unsafe {
let mut full = 0;
let mut prop_name = [0_u8; 32];
prop_name.copy_from_slice(self.name_base.get_unchecked(0..32));
prop_name.get_unchecked_mut(0..10).sort_unstable();
*self.name_prop.get_unchecked_mut(0) = 154
+ *prop_name.get_unchecked(3) as u32
+ *prop_name.get_unchecked(4) as u32
+ *prop_name.get_unchecked(5) as u32
+ *prop_name.get_unchecked(6) as u32;
*self.name_prop.get_unchecked_mut(1) = median(
*prop_name.get_unchecked(10),
*prop_name.get_unchecked(11),
*prop_name.get_unchecked(12),
// 加一些特殊检测
*self.name_prop.get_unchecked_mut(7) = median(
*prop_name.get_unchecked(28),
*prop_name.get_unchecked(29),
*prop_name.get_unchecked(30),
) as u32
+ 36;
full += self.name_prop.get_unchecked(7) - 36;
if full < 24 {
return false;
}
*self.name_prop.get_unchecked_mut(2) = median(
*prop_name.get_unchecked(13),
*prop_name.get_unchecked(14),
@ -414,6 +413,22 @@ impl Namer {
*prop_name.get_unchecked(18),
) as u32
+ 36;
*self.name_prop.get_unchecked_mut(6) = median(
*prop_name.get_unchecked(25),
*prop_name.get_unchecked(26),
*prop_name.get_unchecked(27),
) as u32
+ 36;
full += self.name_prop.get_unchecked(2) + self.name_prop.get_unchecked(3) + self.name_prop.get_unchecked(6) - 108;
if full < 165 {
return false;
}
*self.name_prop.get_unchecked_mut(1) = median(
*prop_name.get_unchecked(10),
*prop_name.get_unchecked(11),
*prop_name.get_unchecked(12),
) as u32
+ 36;
*self.name_prop.get_unchecked_mut(4) = median(
*prop_name.get_unchecked(19),
*prop_name.get_unchecked(20),
@ -426,19 +441,23 @@ impl Namer {
*prop_name.get_unchecked(24),
) as u32
+ 36;
*self.name_prop.get_unchecked_mut(6) = median(
*prop_name.get_unchecked(25),
*prop_name.get_unchecked(26),
*prop_name.get_unchecked(27),
) as u32
+ 36;
*self.name_prop.get_unchecked_mut(7) = median(
*prop_name.get_unchecked(28),
*prop_name.get_unchecked(29),
*prop_name.get_unchecked(30),
) as u32
+ 36;
full += self.name_prop.get_unchecked(1) + self.name_prop.get_unchecked(4) + self.name_prop.get_unchecked(5) - 108;
if full < 250 {
return false;
}
prop_name.get_unchecked_mut(0..10).sort_unstable();
*self.name_prop.get_unchecked_mut(0) = 154
+ *prop_name.get_unchecked(3) as u32
+ *prop_name.get_unchecked(4) as u32
+ *prop_name.get_unchecked(5) as u32
+ *prop_name.get_unchecked(6) as u32;
full += self.name_prop.get_unchecked(0) / 3 + 154;
if full < 380 {
println!("name_prop[0] < 380 {}", self.name);
return false;
}
}
true
}
#[inline(always)]
@ -450,22 +469,18 @@ impl Namer {
#[cfg(feature = "simd")]
{
let mut simd_val = self.val;
let mut simd_val_b = self.val;
let mut simd_val = [0_u8; 256];
let mut simd_val_b = [0_u8; 256];
let simd_181 = u8x32::splat(181);
let simd_199 = u8x32::splat(199);
let simd_128 = u8x32::splat(128);
let simd_53 = u8x32::splat(53);
let simd_160 = u8x32::splat(160);
let simd_63 = u8x32::splat(63);
let simd_32 = u8x32::splat(32);
for i in (0..256).step_by(32) {
unsafe {
let mut x = u8x32::from_slice(simd_val.get_unchecked(i..));
let mut y = u8x32::from_slice(simd_val_b.get_unchecked(i..));
x = (x * simd_181 + simd_199) & simd_128;
y = (y * simd_53) & simd_63 ^ simd_32;
let mut x = u8x32::from_slice(self.val.get_unchecked(i..));
x = x * simd_181 + simd_160;
x.copy_to_slice(simd_val.get_unchecked_mut(i..));
let y = x & simd_63;
y.copy_to_slice(simd_val_b.get_unchecked_mut(i..));
}
}
@ -473,12 +488,41 @@ impl Namer {
let mut mod_count = 0;
for i in 0..256 {
unsafe {
if simd_val.get_unchecked(i) != &0 {
if *simd_val.get_unchecked(i) > 88 && *simd_val.get_unchecked(i) < 217 {
*self.name_base.get_unchecked_mut(mod_count as usize) = *simd_val_b.get_unchecked(i);
mod_count += 1;
}
}
}
// let mut simd_val = self.val;
// let mut simd_val_b = self.val;
// let simd_181 = u8x32::splat(181);
// let simd_199 = u8x32::splat(199);
// let simd_128 = u8x32::splat(128);
// let simd_53 = u8x32::splat(53);
// let simd_63 = u8x32::splat(63);
// let simd_32 = u8x32::splat(32);
// for i in (0..256).step_by(32) {
// unsafe {
// let mut x = u8x32::from_slice(simd_val.get_unchecked(i..));
// let mut y = u8x32::from_slice(simd_val_b.get_unchecked(i..));
// x = (x * simd_181 + simd_199) & simd_128;
// y = (y * simd_53) & simd_63 ^ simd_32;
// x.copy_to_slice(simd_val.get_unchecked_mut(i..));
// y.copy_to_slice(simd_val_b.get_unchecked_mut(i..));
// }
// }
// let mut mod_count = 0;
// for i in 0..256 {
// unsafe {
// if simd_val.get_unchecked(i) != &0 {
// *self.name_base.get_unchecked_mut(mod_count as usize) = *simd_val_b.get_unchecked(i);
// mod_count += 1;
// }
// }
// }
// const int N = 256, M = 128, K = 64, skill_cnt = 40, max_len = 25;
let mut a: u8 = 0;
let mut b: u8 = 0;