reeeee
This commit is contained in:
commit
fc013bae96
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
210
Cargo.lock
generated
Normal file
210
Cargo.lock
generated
Normal file
@ -0,0 +1,210 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "adler"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.104"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "cl3"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b823f24e72fa0c68aa14a250ae1c0848e68d4ae188b71c3972343e45b46f8644"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"opencl-sys",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.155"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opencl-sys"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de15dd01496ae90c5799f5266184ab020082b4065800ff0b732f489371d0e5cf"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opencl-test"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"opencl3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opencl3"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26ab4a90cb496f787d3934deb0c54fa9d65e7bed710c10071234aab0196fba04"
|
||||
dependencies = [
|
||||
"cl3",
|
||||
"libc",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.203"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.203"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.61"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.61"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
8
Cargo.toml
Normal file
8
Cargo.toml
Normal file
@ -0,0 +1,8 @@
|
||||
[package]
|
||||
name = "opencl-test"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
opencl3 = { version = "0.9.5", features = ["serde"] }
|
157
src/main.rs
Normal file
157
src/main.rs
Normal file
@ -0,0 +1,157 @@
|
||||
use opencl3::command_queue::{CommandQueue, CL_QUEUE_PROFILING_ENABLE};
|
||||
use opencl3::context::Context;
|
||||
use opencl3::device::{get_all_devices, get_device_info, Device, CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, CL_DEVICE_TYPE_GPU};
|
||||
use opencl3::kernel::{ExecuteKernel, Kernel};
|
||||
use opencl3::memory::{Buffer, CL_MAP_WRITE, CL_MEM_READ_ONLY};
|
||||
use opencl3::program::Program;
|
||||
use opencl3::svm::SvmVec;
|
||||
use opencl3::types::{cl_int, cl_uchar, CL_BLOCKING};
|
||||
use std::ptr;
|
||||
|
||||
const PROGRAM_SOURCE: &str = include_str!("program.cl");
|
||||
|
||||
const KERNEL_NAME: &str = "load_team";
|
||||
|
||||
const BLOCK_SIZE: usize = 256;
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
// Find a usable device for this application
|
||||
let device_id = *get_all_devices(CL_DEVICE_TYPE_GPU)?
|
||||
.first()
|
||||
.expect("no device found in platform");
|
||||
let size = match get_device_info(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD) {
|
||||
Ok(size) => {
|
||||
size.to_size()
|
||||
},
|
||||
Err(err) => {
|
||||
println!("get_device_info failed: {}", err);
|
||||
panic!();
|
||||
}
|
||||
};
|
||||
let device = Device::new(device_id);
|
||||
|
||||
let worker_count: cl_int = size as cl_int;
|
||||
println!("device max work group size: {} real count: {}", size, worker_count);
|
||||
|
||||
// Create a Context on an OpenCL device
|
||||
let context = Context::from_device(&device).expect("Context::from_device failed");
|
||||
|
||||
let queue = CommandQueue::create_default_with_properties(
|
||||
&context,
|
||||
CL_QUEUE_PROFILING_ENABLE,
|
||||
worker_count as u32,
|
||||
)
|
||||
.expect("create_command_queue_with_properties failed");
|
||||
|
||||
// Build the OpenCL program source and create the kernel.
|
||||
let program = match Program::create_and_build_from_source(&context, PROGRAM_SOURCE, "") {
|
||||
Ok(p) => p,
|
||||
Err(err) => {
|
||||
println!(
|
||||
"OpenCL Program::create_and_build_from_source failed: {}",
|
||||
err
|
||||
);
|
||||
panic!();
|
||||
}
|
||||
};
|
||||
let kernel = Kernel::create(&program, KERNEL_NAME).expect("Kernel::create failed");
|
||||
|
||||
let team_raw_vec = vec!["x"; worker_count as usize];
|
||||
let name_raw_vec = vec!["x"; worker_count as usize];
|
||||
let team_bytes_vec = team_raw_vec.iter().map(|s| s.as_bytes()).collect::<Vec<&[u8]>>();
|
||||
let name_bytes_vec = name_raw_vec.iter().map(|s| s.as_bytes()).collect::<Vec<&[u8]>>();
|
||||
let t_len_vec = team_bytes_vec.iter().map(|s| s.len() as cl_int + 1).collect::<Vec<i32>>();
|
||||
let n_len_vec = name_bytes_vec.iter().map(|s| s.len() as cl_int).collect::<Vec<i32>>();
|
||||
|
||||
let work_count = team_bytes_vec.len();
|
||||
|
||||
// Create OpenCL device buffers
|
||||
let mut team = unsafe {
|
||||
Buffer::<cl_uchar>::create(
|
||||
&context,
|
||||
CL_MEM_READ_ONLY,
|
||||
BLOCK_SIZE * work_count,
|
||||
ptr::null_mut(),
|
||||
)?
|
||||
};
|
||||
let mut name = unsafe {
|
||||
Buffer::<cl_uchar>::create(
|
||||
&context,
|
||||
CL_MEM_READ_ONLY,
|
||||
BLOCK_SIZE * work_count,
|
||||
ptr::null_mut(),
|
||||
)?
|
||||
};
|
||||
let mut t_len = unsafe {
|
||||
Buffer::<cl_int>::create(&context, CL_MEM_READ_ONLY, work_count, ptr::null_mut())?
|
||||
};
|
||||
let mut n_len = unsafe {
|
||||
Buffer::<cl_int>::create(&context, CL_MEM_READ_ONLY, work_count, ptr::null_mut())?
|
||||
};
|
||||
let mut output = SvmVec::<cl_uchar>::allocate(&context, BLOCK_SIZE * work_count)?;
|
||||
// 准备一下数据, 都给拼成一维数组
|
||||
// 填充成 256 * len
|
||||
let team_data_vec = {
|
||||
let mut vec = Vec::new();
|
||||
for data in team_bytes_vec {
|
||||
let left_over = BLOCK_SIZE - data.len();
|
||||
vec.extend_from_slice(data);
|
||||
vec.extend_from_slice(&vec![0; left_over]);
|
||||
}
|
||||
vec
|
||||
};
|
||||
let name_data_vec = {
|
||||
let mut vec = Vec::new();
|
||||
for data in name_bytes_vec {
|
||||
let left_over = BLOCK_SIZE - data.len();
|
||||
vec.extend_from_slice(data);
|
||||
vec.extend_from_slice(&vec![0; left_over]);
|
||||
}
|
||||
vec
|
||||
};
|
||||
|
||||
// 阻塞写
|
||||
let _team_write_event =
|
||||
unsafe { queue.enqueue_write_buffer(&mut team, CL_BLOCKING, 0, &team_data_vec, &[]) }?;
|
||||
let _name_write_event =
|
||||
unsafe { queue.enqueue_write_buffer(&mut name, CL_BLOCKING, 0, &name_data_vec, &[]) }?;
|
||||
let _t_len_write_event =
|
||||
unsafe { queue.enqueue_write_buffer(&mut t_len, CL_BLOCKING, 0, &t_len_vec, &[]) }?;
|
||||
let _n_len_write_event =
|
||||
unsafe { queue.enqueue_write_buffer(&mut n_len, CL_BLOCKING, 0, &n_len_vec, &[]) }?;
|
||||
|
||||
// println!("output: {:?} {}", output, output.len());
|
||||
let kernel_event = unsafe {
|
||||
ExecuteKernel::new(&kernel)
|
||||
.set_arg(&team)
|
||||
.set_arg(&t_len)
|
||||
.set_arg(&name)
|
||||
.set_arg(&n_len)
|
||||
.set_arg_svm(output.as_mut_ptr())
|
||||
.set_arg(&worker_count)
|
||||
.set_global_work_size(worker_count as usize)
|
||||
.enqueue_nd_range(&queue)?
|
||||
};
|
||||
kernel_event.wait()?;
|
||||
queue.finish()?;
|
||||
|
||||
if !output.is_fine_grained() {
|
||||
unsafe { queue.enqueue_svm_map(CL_BLOCKING, CL_MAP_WRITE, &mut output, &[]) }?;
|
||||
}
|
||||
|
||||
let start_time = kernel_event.profiling_command_start()?;
|
||||
let end_time = kernel_event.profiling_command_end()?;
|
||||
let duration = end_time - start_time;
|
||||
let time = std::time::Duration::from_nanos(duration as u64);
|
||||
println!("kernel execution duration: {:?}", time);
|
||||
let pre_sec = 1_000_000_000 as f32 / duration as f32;
|
||||
println!("kernel execution speed (pre/sec): {:?}", pre_sec * worker_count as f32);
|
||||
// println!("output: {:?} {}", output, output.len());
|
||||
|
||||
if !output.is_fine_grained() {
|
||||
let unmap_event = unsafe { queue.enqueue_svm_unmap(&output, &[]) }?;
|
||||
unmap_event.wait()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
93
src/program.cl
Normal file
93
src/program.cl
Normal file
@ -0,0 +1,93 @@
|
||||
|
||||
// 取中值
|
||||
uchar median(uchar a, uchar b, uchar c) {
|
||||
return max(min(a, b), min(max(a, b), c));
|
||||
}
|
||||
|
||||
// 输入: 1~255 长度的 u8 数组
|
||||
// 输出: 255 长度的 u8 数组
|
||||
kernel void load_team(
|
||||
global const uchar* all_team_bytes,
|
||||
global const int* all_t_len,
|
||||
global const uchar* all_name_bytes,
|
||||
global const int* all_n_len,
|
||||
// 一个 svm 的 [u8; 256] * worker_count
|
||||
global uchar* all_val,
|
||||
int worker_count
|
||||
) {
|
||||
int gid = get_global_id(0);
|
||||
if (gid >= worker_count) {
|
||||
return;
|
||||
}
|
||||
|
||||
local uchar val[256];
|
||||
local uchar team_bytes[256];
|
||||
local uchar name_bytes[256];
|
||||
for (int i = 0; i < 256; i++) {
|
||||
val[i] = i;
|
||||
}
|
||||
for (int i = 0; i < 256; i += 4) {
|
||||
vstore4(vload4(0, &all_team_bytes[256 * gid + i]), i, team_bytes);
|
||||
vstore4(vload4(0, &all_name_bytes[256 * gid + i]), i, name_bytes);
|
||||
}
|
||||
int t_len = all_t_len[gid];
|
||||
int n_len = all_n_len[gid];
|
||||
|
||||
// 外面初始化好了
|
||||
uchar s = 0;
|
||||
for (int i = s = 0; i < 256; ++i) {
|
||||
if (i % t_len) {
|
||||
s += team_bytes[i % t_len - 1];
|
||||
}
|
||||
s += val[i];
|
||||
uchar tmp = val[i];
|
||||
val[i] = val[s];
|
||||
val[s] = tmp;
|
||||
}
|
||||
|
||||
for (int _ = 0; _ < 2; _++) {
|
||||
uchar s = 0;
|
||||
uchar k = 0;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (k != 0) {
|
||||
s += name_bytes[k - 1];
|
||||
}
|
||||
s += val[i];
|
||||
uchar tmp = val[i];
|
||||
val[i] = val[s];
|
||||
val[s] = tmp;
|
||||
if (k == n_len) {
|
||||
k = 0;
|
||||
} else {
|
||||
k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
local uchar val_2[256];
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
val_2[i] = val[i] * 181 + 160;
|
||||
}
|
||||
|
||||
local uchar name_nase[40];
|
||||
local int b_counter;
|
||||
b_counter = 0;
|
||||
for (int i = 0; i < 256; i += 1) {
|
||||
if (val_2[i] >= 89 && val_2[i] < 217) {
|
||||
name_nase[b_counter] = val_2[i] & 63;
|
||||
b_counter++;
|
||||
if (b_counter >= 40) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 将结果从局部内存拷贝回全局内存
|
||||
|
||||
// for (int i = 0; i < 256; i += 4) {
|
||||
// vstore4(vload4(0, &val_2[i]), i, &all_val[256 * gid + i]);
|
||||
// }
|
||||
// 这里这么整一下, 防止他优化掉最后的这点东西
|
||||
// all_val[256 * gid] = name_nase[0];
|
||||
}
|
Loading…
Reference in New Issue
Block a user