要长脑子了.png

This commit is contained in:
shenjack-5600u 2024-04-30 06:09:08 +08:00
parent 5d4a32ef01
commit 751f551141
Signed by: shenjack
GPG Key ID: FDF9864E11C7E79F
4 changed files with 78 additions and 26 deletions

View File

@ -109,11 +109,10 @@ pub fn cacl(config: CacluateConfig, id: u64, outfile: &PathBuf) {
let xu = crate::evaluate::xuping::XuPing2_0_1015::evaluate(&namer);
let xu_qd = crate::evaluate::xuping::XuPing2_0_1015_QD::evaluate(&namer);
if xu < config.qp_expect as f64 && xu_qd < config.qp_expect as f64{
if xu < config.qp_expect as f64 && xu_qd < config.qp_expect as f64 {
continue;
}
get_count += 1;
info!("Id:{:>15}|{}|{:.4}|{:.4}|{}", i, full_name, xu, xu_qd, namer.get_info());

View File

@ -54,11 +54,7 @@ impl Command {
qp_expect: self.qp_expect,
team: self.team.clone(),
report_interval: self.report_interval,
core_affinity: if self.bench {
Some(1 << self.bench_core)
} else {
None
},
core_affinity: if self.bench { Some(1 << self.bench_core) } else { None },
}
}
}
@ -84,7 +80,7 @@ pub fn set_thread2core(core: usize) {
pub fn set_process_cores(cores: usize) {
#[cfg(windows)]
unsafe {
use windows_sys::Win32::System::Threading::{SetProcessAffinityMask, GetCurrentProcess};
use windows_sys::Win32::System::Threading::{GetCurrentProcess, SetProcessAffinityMask};
let process = GetCurrentProcess();
let core_mask = cores;
match SetProcessAffinityMask(process, core_mask) {

View File

@ -1,10 +1,20 @@
@group(0) @binding(0) var<uniform> first_step: array<vec4<u32>, 64>;
// 处理第二步的两次 rc4 名称, 输入名字 bytes, 从第一步得到的 val
@group(1) @binding(0) var<storage, read> name_len: array<u32, 256>;
@group(1) @binding(1) var<storage, read> name_bytes: array<array<u32, 256>, 256>;
@compute
@workgroup_size(16, 16)
fn rc4_name(name_bytes: array<u32, 256>, name_len: u32) -> array<u32, 256> {
// var val: array<u32, 256> = first_step;
// 处理第二步的两次 rc4 名称, 输入名字 bytes, 从第一步得到的 val
// fn rc4_name(@location(0) workname_bytes: array<u32, 256>, @location(1) name_len: u32) -> @location(0) array<u32, 256> {
// 输入计算着色器的 index, 从全局数据中取对应位置的数据
fn rc4_name(@builtin(local_invocation_id) compute_pos: vec3<u32>) {
// 计算线程的 index
var index: u32 = compute_pos.x + compute_pos.y * 16u;
// 从全局数据中取对应位置的数据
var name_bytes: array<u32, 256> = name_bytes[index];
var name_len: u32 = name_len[index];
var val: array<u32, 256> = array<u32, 256>();
// first_step 的值复制到 val
// first_step 内当成连续的内存即可
@ -34,5 +44,5 @@ fn rc4_name(name_bytes: array<u32, 256>, name_len: u32) -> array<u32, 256> {
}
}
}
return val;
return;
}

View File

@ -6,15 +6,41 @@ pub struct Works {
/// 队伍名(统一)
pub team: String,
/// 队伍名(每个任务)
pub names: Vec<String>,
pub names: Vec<Vec<String>>,
}
impl Works {
/// 创建一个新的工作信息
pub fn new(team: String, names: Vec<String>) -> Self {
// 把输入的名字填充到 256 长度的数组里
// 如果不够 256 长度, 就用空字符串填充
let names = {
if names.len() < 256 {
// 先把数组扩容到 256 长度
let mut names = names;
names.resize(256, "".to_string());
// 把数组放到一个新的数组里
vec![names]
} else {
// 如果长度已经是 256 了, 直接截取
// 不满 256 的部分用空字符串填充
names
.chunks_exact(256)
.map(|c| {
let mut c = c.to_vec();
c.resize(256, "".to_string());
c
})
.collect::<Vec<Vec<String>>>()
}
};
Self { team, names }
}
}
#[cfg_attr(test, allow(dead_code))]
async fn run() {
let works = Works {
team: "team".to_string(),
names: vec!["name1".to_string(), "name2".to_string()],
};
let works = Works::new("team".to_string(), vec!["name1".to_string(), "name2".to_string()]);
let steps = execute_gpu(works).await.unwrap();
@ -97,10 +123,10 @@ async fn execute_gpu_inner(device: &wgpu::Device, queue: &wgpu::Queue, works: Wo
// A storage buffer (can be bound within a bind group and thus available to a shader).
// The destination of a copy.
// The source of a copy.
let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
let uniform_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("team bytes input buffer"),
contents: bytemuck::cast_slice(&uniform_val),
usage: wgpu::BufferUsages::STORAGE,
usage: wgpu::BufferUsages::UNIFORM,
});
// A bind group defines how buffers are accessed by shaders.
@ -114,21 +140,41 @@ async fn execute_gpu_inner(device: &wgpu::Device, queue: &wgpu::Queue, works: Wo
label: None,
layout: None,
module: &cs_module,
entry_point: "team_bytes",
entry_point: "rc4_name",
// constants: &Default::default(),
});
// Instantiates the bind group, once again specifying the binding of buffers.
let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
let bind_group_layout_0 = compute_pipeline.get_bind_group_layout(0);
let bind_group_0 = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bind_group_layout,
layout: &bind_group_layout_0,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: storage_buffer.as_entire_binding(),
resource: uniform_buffer.as_entire_binding(),
}],
});
let bind_group_layout_1 = compute_pipeline.get_bind_group_layout(1);
let bind_group_1 = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bind_group_layout_1,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: staging_buffer.as_entire_binding(),
},
// wgpu::BindGroupEntry {
// binding: 1,
// resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding {
// buffer: uniform_buffer,
// offset: 0,
// size,
// }),
// },
],
});
// A command encoder executes one or many pipelines.
// It is to WebGPU what a command buffer is to Vulkan.
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
@ -138,9 +184,10 @@ async fn execute_gpu_inner(device: &wgpu::Device, queue: &wgpu::Queue, works: Wo
timestamp_writes: None,
});
cpass.set_pipeline(&compute_pipeline);
cpass.set_bind_group(0, &bind_group, &[]);
cpass.set_bind_group(0, &bind_group_0, &[]);
cpass.set_bind_group(1, &bind_group_1, &[]);
cpass.insert_debug_marker("compute collatz iterations");
cpass.dispatch_workgroups(numbers.len() as u32, 1, 1); // Number of cells to run, the (x,y,z) size of item being processed
cpass.dispatch_workgroups(uniform_val.len() as u32, 1, 1); // Number of cells to run, the (x,y,z) size of item being processed
}
// Sets adds copy operation to command encoder.
// Will copy data from storage buffer on GPU to staging buffer on CPU.