This commit is contained in:
shenjack 2024-10-16 22:10:54 +08:00
parent 10a5623a54
commit 2f27ea2b69
Signed by: shenjack
GPG Key ID: 7B1134A979775551

View File

@ -1,6 +1,12 @@
use opencl3::command_queue::{CommandQueue, CL_QUEUE_ON_DEVICE, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, CL_QUEUE_PROFILING_ENABLE}; use opencl3::command_queue::{
CommandQueue, CL_QUEUE_ON_DEVICE, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
CL_QUEUE_PROFILING_ENABLE,
};
use opencl3::context::Context; use opencl3::context::Context;
use opencl3::device::{get_all_devices, get_device_info, Device, CL_DEVICE_MAX_WORK_GROUP_SIZE, CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, CL_DEVICE_TYPE_GPU}; use opencl3::device::{
get_all_devices, get_device_info, Device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, CL_DEVICE_TYPE_GPU,
};
use opencl3::kernel::{ExecuteKernel, Kernel}; use opencl3::kernel::{ExecuteKernel, Kernel};
use opencl3::memory::{Buffer, CL_MAP_WRITE, CL_MEM_READ_ONLY}; use opencl3::memory::{Buffer, CL_MAP_WRITE, CL_MEM_READ_ONLY};
use opencl3::program::Program; use opencl3::program::Program;
@ -21,16 +27,17 @@ fn main() -> anyhow::Result<()> {
.expect("no device found in platform"); .expect("no device found in platform");
let size = { let size = {
match get_device_info(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD) { match get_device_info(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD) {
Ok(size) => { Ok(size) => size.to_size(),
size.to_size()
},
Err(err) => { Err(err) => {
println!("警告: get_device_info failed: {}\n也许是你没有一张AMD显卡,让我们试试非AMD", err); println!(
"警告: get_device_info failed: {}\n也许是你没有一张AMD显卡,让我们试试非AMD",
err
);
match get_device_info(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE) { match get_device_info(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE) {
Ok(size) => { Ok(size) => {
println!("非 amd size 获取成功"); println!("非 amd size 获取成功");
size.to_size() size.to_size()
}, }
Err(err) => { Err(err) => {
println!("错误: get_device_info failed: {}\n", err); println!("错误: get_device_info failed: {}\n", err);
panic!(); panic!();
@ -47,11 +54,10 @@ fn main() -> anyhow::Result<()> {
// Create a Context on an OpenCL device // Create a Context on an OpenCL device
let context = Context::from_device(&device).expect("Context::from_device failed"); let context = Context::from_device(&device).expect("Context::from_device failed");
let property = CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE; let property =
CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE;
let queue = match CommandQueue::create_default_with_properties( let queue = match CommandQueue::create_default_with_properties(
&context, &context, property, 10, // 写死试试, 看起来没问题
property,
10, // 写死试试, 看起来没问题
) { ) {
Ok(q) => q, Ok(q) => q,
Err(err) => { Err(err) => {
@ -60,9 +66,14 @@ fn main() -> anyhow::Result<()> {
} }
}; };
println!("队列创建完成");
// Build the OpenCL program source and create the kernel. // Build the OpenCL program source and create the kernel.
let program = match Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "") { let program = match Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "") {
Ok(p) => p, Ok(p) => {
println!("程序构建成功");
p
}
Err(err) => { Err(err) => {
println!( println!(
"OpenCL Program::create_and_build_from_source failed: {}", "OpenCL Program::create_and_build_from_source failed: {}",
@ -75,10 +86,22 @@ fn main() -> anyhow::Result<()> {
let team_raw_vec = vec!["x"; worker_count as usize]; let team_raw_vec = vec!["x"; worker_count as usize];
let name_raw_vec = vec!["x"; worker_count as usize]; let name_raw_vec = vec!["x"; worker_count as usize];
let team_bytes_vec = team_raw_vec.iter().map(|s| s.as_bytes()).collect::<Vec<&[u8]>>(); let team_bytes_vec = team_raw_vec
let name_bytes_vec = name_raw_vec.iter().map(|s| s.as_bytes()).collect::<Vec<&[u8]>>(); .iter()
let t_len_vec = team_bytes_vec.iter().map(|s| s.len() as cl_int + 1).collect::<Vec<i32>>(); .map(|s| s.as_bytes())
let n_len_vec = name_bytes_vec.iter().map(|s| s.len() as cl_int).collect::<Vec<i32>>(); .collect::<Vec<&[u8]>>();
let name_bytes_vec = name_raw_vec
.iter()
.map(|s| s.as_bytes())
.collect::<Vec<&[u8]>>();
let t_len_vec = team_bytes_vec
.iter()
.map(|s| s.len() as cl_int + 1)
.collect::<Vec<i32>>();
let n_len_vec = name_bytes_vec
.iter()
.map(|s| s.len() as cl_int)
.collect::<Vec<i32>>();
let work_count = team_bytes_vec.len(); let work_count = team_bytes_vec.len();
@ -166,7 +189,10 @@ fn main() -> anyhow::Result<()> {
let time = std::time::Duration::from_nanos(duration as u64); let time = std::time::Duration::from_nanos(duration as u64);
println!("kernel execution duration: {:?}", time); println!("kernel execution duration: {:?}", time);
let pre_sec = 1_000_000_000 as f32 / duration as f32; let pre_sec = 1_000_000_000 as f32 / duration as f32;
println!("kernel execution speed (pre/sec): {:?}", pre_sec * worker_count as f32); println!(
"kernel execution speed (pre/sec): {:?}",
pre_sec * worker_count as f32
);
// println!("output: {:?} {}", output, output.len()); // println!("output: {:?} {}", output, output.len());
if !output.is_fine_grained() { if !output.is_fine_grained() {