commit fc013bae9618db07256d324607ff1309f283cf59 Author: shenjack <3695888@qq.com> Date: Wed Oct 16 21:26:54 2024 +0800 reeeee diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..e675686 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,210 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +dependencies = [ + "backtrace", +] + +[[package]] +name = "backtrace" +version = "0.3.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "cc" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cl3" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823f24e72fa0c68aa14a250ae1c0848e68d4ae188b71c3972343e45b46f8644" +dependencies = [ + "libc", + "opencl-sys", + "thiserror", +] + +[[package]] +name = "gimli" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "object" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" +dependencies = [ + "memchr", +] + +[[package]] +name = "opencl-sys" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de15dd01496ae90c5799f5266184ab020082b4065800ff0b732f489371d0e5cf" +dependencies = [ + "libc", +] + +[[package]] +name = "opencl-test" +version = "0.1.0" +dependencies = [ + "anyhow", + "opencl3", +] + +[[package]] +name = "opencl3" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26ab4a90cb496f787d3934deb0c54fa9d65e7bed710c10071234aab0196fba04" +dependencies = [ + "cl3", + "libc", + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8b64565 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "opencl-test" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { version = "1.0", features = ["backtrace"] } +opencl3 = { version = "0.9.5", features = ["serde"] } diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..3da9b11 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,157 @@ +use opencl3::command_queue::{CommandQueue, CL_QUEUE_PROFILING_ENABLE}; +use opencl3::context::Context; +use opencl3::device::{get_all_devices, get_device_info, Device, CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, CL_DEVICE_TYPE_GPU}; +use opencl3::kernel::{ExecuteKernel, Kernel}; +use opencl3::memory::{Buffer, CL_MAP_WRITE, CL_MEM_READ_ONLY}; +use opencl3::program::Program; +use opencl3::svm::SvmVec; +use opencl3::types::{cl_int, cl_uchar, CL_BLOCKING}; +use std::ptr; + +const PROGRAM_SOURCE: &str = include_str!("program.cl"); + +const KERNEL_NAME: &str = "load_team"; + +const BLOCK_SIZE: usize = 256; + +fn main() -> anyhow::Result<()> { + // Find a usable device for this application + let device_id = *get_all_devices(CL_DEVICE_TYPE_GPU)? + .first() + .expect("no device found in platform"); + let size = match get_device_info(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD) { + Ok(size) => { + size.to_size() + }, + Err(err) => { + println!("get_device_info failed: {}", err); + panic!(); + } + }; + let device = Device::new(device_id); + + let worker_count: cl_int = size as cl_int; + println!("device max work group size: {} real count: {}", size, worker_count); + + // Create a Context on an OpenCL device + let context = Context::from_device(&device).expect("Context::from_device failed"); + + let queue = CommandQueue::create_default_with_properties( + &context, + CL_QUEUE_PROFILING_ENABLE, + worker_count as u32, + ) + .expect("create_command_queue_with_properties failed"); + + // Build the OpenCL program source and create the kernel. + let program = match Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "") { + Ok(p) => p, + Err(err) => { + println!( + "OpenCL Program::create_and_build_from_source failed: {}", + err + ); + panic!(); + } + }; + let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed"); + + let team_raw_vec = vec!["x"; worker_count as usize]; + let name_raw_vec = vec!["x"; worker_count as usize]; + let team_bytes_vec = team_raw_vec.iter().map(|s| s.as_bytes()).collect::>(); + let name_bytes_vec = name_raw_vec.iter().map(|s| s.as_bytes()).collect::>(); + let t_len_vec = team_bytes_vec.iter().map(|s| s.len() as cl_int + 1).collect::>(); + let n_len_vec = name_bytes_vec.iter().map(|s| s.len() as cl_int).collect::>(); + + let work_count = team_bytes_vec.len(); + + // Create OpenCL device buffers + let mut team = unsafe { + Buffer::::create( + &context, + CL_MEM_READ_ONLY, + BLOCK_SIZE * work_count, + ptr::null_mut(), + )? + }; + let mut name = unsafe { + Buffer::::create( + &context, + CL_MEM_READ_ONLY, + BLOCK_SIZE * work_count, + ptr::null_mut(), + )? + }; + let mut t_len = unsafe { + Buffer::::create(&context, CL_MEM_READ_ONLY, work_count, ptr::null_mut())? + }; + let mut n_len = unsafe { + Buffer::::create(&context, CL_MEM_READ_ONLY, work_count, ptr::null_mut())? + }; + let mut output = SvmVec::::allocate(&context, BLOCK_SIZE * work_count)?; + // 准备一下数据, 都给拼成一维数组 + // 填充成 256 * len + let team_data_vec = { + let mut vec = Vec::new(); + for data in team_bytes_vec { + let left_over = BLOCK_SIZE - data.len(); + vec.extend_from_slice(data); + vec.extend_from_slice(&vec![0; left_over]); + } + vec + }; + let name_data_vec = { + let mut vec = Vec::new(); + for data in name_bytes_vec { + let left_over = BLOCK_SIZE - data.len(); + vec.extend_from_slice(data); + vec.extend_from_slice(&vec![0; left_over]); + } + vec + }; + + // 阻塞写 + let _team_write_event = + unsafe { queue.enqueue_write_buffer(&mut team, CL_BLOCKING, 0, &team_data_vec, &[]) }?; + let _name_write_event = + unsafe { queue.enqueue_write_buffer(&mut name, CL_BLOCKING, 0, &name_data_vec, &[]) }?; + let _t_len_write_event = + unsafe { queue.enqueue_write_buffer(&mut t_len, CL_BLOCKING, 0, &t_len_vec, &[]) }?; + let _n_len_write_event = + unsafe { queue.enqueue_write_buffer(&mut n_len, CL_BLOCKING, 0, &n_len_vec, &[]) }?; + + // println!("output: {:?} {}", output, output.len()); + let kernel_event = unsafe { + ExecuteKernel::new(&kernel) + .set_arg(&team) + .set_arg(&t_len) + .set_arg(&name) + .set_arg(&n_len) + .set_arg_svm(output.as_mut_ptr()) + .set_arg(&worker_count) + .set_global_work_size(worker_count as usize) + .enqueue_nd_range(&queue)? + }; + kernel_event.wait()?; + queue.finish()?; + + if !output.is_fine_grained() { + unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut output, &[]) }?; + } + + let start_time = kernel_event.profiling_command_start()?; + let end_time = kernel_event.profiling_command_end()?; + let duration = end_time - start_time; + let time = std::time::Duration::from_nanos(duration as u64); + println!("kernel execution duration: {:?}", time); + let pre_sec = 1_000_000_000 as f32 / duration as f32; + println!("kernel execution speed (pre/sec): {:?}", pre_sec * worker_count as f32); + // println!("output: {:?} {}", output, output.len()); + + if !output.is_fine_grained() { + let unmap_event = unsafe { queue.enqueue_svm_unmap(&output, &[]) }?; + unmap_event.wait()?; + } + + Ok(()) +} diff --git a/src/program.cl b/src/program.cl new file mode 100644 index 0000000..48fe614 --- /dev/null +++ b/src/program.cl @@ -0,0 +1,93 @@ + +// 取中值 +uchar median(uchar a, uchar b, uchar c) { + return max(min(a, b), min(max(a, b), c)); +} + +// 输入: 1~255 长度的 u8 数组 +// 输出: 255 长度的 u8 数组 +kernel void load_team( + global const uchar* all_team_bytes, + global const int* all_t_len, + global const uchar* all_name_bytes, + global const int* all_n_len, + // 一个 svm 的 [u8; 256] * worker_count + global uchar* all_val, + int worker_count +) { + int gid = get_global_id(0); + if (gid >= worker_count) { + return; + } + + local uchar val[256]; + local uchar team_bytes[256]; + local uchar name_bytes[256]; + for (int i = 0; i < 256; i++) { + val[i] = i; + } + for (int i = 0; i < 256; i += 4) { + vstore4(vload4(0, &all_team_bytes[256 * gid + i]), i, team_bytes); + vstore4(vload4(0, &all_name_bytes[256 * gid + i]), i, name_bytes); + } + int t_len = all_t_len[gid]; + int n_len = all_n_len[gid]; + + // 外面初始化好了 + uchar s = 0; + for (int i = s = 0; i < 256; ++i) { + if (i % t_len) { + s += team_bytes[i % t_len - 1]; + } + s += val[i]; + uchar tmp = val[i]; + val[i] = val[s]; + val[s] = tmp; + } + + for (int _ = 0; _ < 2; _++) { + uchar s = 0; + uchar k = 0; + for (int i = 0; i < 256; i++) { + if (k != 0) { + s += name_bytes[k - 1]; + } + s += val[i]; + uchar tmp = val[i]; + val[i] = val[s]; + val[s] = tmp; + if (k == n_len) { + k = 0; + } else { + k++; + } + } + } + + local uchar val_2[256]; + + for (int i = 0; i < 256; i++) { + val_2[i] = val[i] * 181 + 160; + } + + local uchar name_nase[40]; + local int b_counter; + b_counter = 0; + for (int i = 0; i < 256; i += 1) { + if (val_2[i] >= 89 && val_2[i] < 217) { + name_nase[b_counter] = val_2[i] & 63; + b_counter++; + if (b_counter >= 40) { + break; + } + } + } + + // 将结果从局部内存拷贝回全局内存 + + // for (int i = 0; i < 256; i += 4) { + // vstore4(vload4(0, &val_2[i]), i, &all_val[256 * gid + i]); + // } + // 这里这么整一下, 防止他优化掉最后的这点东西 + // all_val[256 * gid] = name_nase[0]; +} \ No newline at end of file