1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
//! GPGPU framework for Rust based on [CUDA Driver API] //! //! [CUDA Driver API]: https://docs.nvidia.com/cuda/cuda-driver-api/ //! //! Setup //! ----- //! Currently (0.3.0), accel works only on Linux system. Windows support will come in future release (0.3.x or 0.4~). //! //! 1. Install [CUDA](https://developer.nvidia.com/cuda-downloads) on your system //! 2. Setup Rust environement using rustup (Requires 1.42 or later) //! 3. Add `nvptx64-nvidia-cuda` target and install `ptx-linker`, or run //! //! ```shell //! curl -sSL https://gitlab.com/termoshtt/accel/raw/master/setup_nvptx_toolchain.sh | bash //! ``` //! //! Examples //! -------- //! accel works with stable Rust //! //! ```toml //! [dependencies] //! accel = "=0.3.0-alpha.2" //! ``` //! //! Do **NOT** add `accel-core` to `[dependencies]`. //! It will be linked automatically into the device code. //! //! ### Vector Add //! //! ``` //! use accel::*; //! //! #[kernel] //! unsafe fn add(a: *const f32, b: *const f32, c: *mut f32, n: usize) { //! let i = accel_core::index(); //! if (i as usize) < n { //! *c.offset(i) = *a.offset(i) + *b.offset(i); //! } //! } //! //! fn main() -> error::Result<()> { //! let device = Device::nth(0)?; //! let ctx = device.create_context(); //! //! // Allocate memories on GPU //! let n = 32; //! let mut a = DeviceMemory::<f32>::zeros(&ctx, n); //! let mut b = DeviceMemory::<f32>::zeros(&ctx, n); //! let mut c = DeviceMemory::<f32>::zeros(&ctx, n); //! //! // Accessible from CPU as usual Rust slice (though this will be slow) //! for i in 0..n { //! a[i] = i as f32; //! b[i] = 2.0 * i as f32; //! } //! println!("a = {:?}", a.as_slice()); //! println!("b = {:?}", b.as_slice()); //! //! // Launch kernel synchronously //! add(&ctx, //! 1 /* grid */, //! n /* block */, //! (a.as_ptr(), b.as_ptr(), c.as_mut_ptr(), n) //! ).expect("Kernel call failed"); //! //! println!("c = {:?}", c.as_slice()); //! Ok(()) //! } //! ``` //! //! ### Assertion on GPU //! //! ``` //! use accel::*; //! //! #[kernel] //! fn assert() { //! accel_core::assert_eq!(1 + 2, 4); // will fail //! } //! //! fn main() -> error::Result<()> { //! let device = Device::nth(0)?; //! let ctx = device.create_context(); //! let result = assert(&ctx, 1 /* grid */, 4 /* block */, ()); //! assert!(result.is_err()); // assertion failed //! Ok(()) //! } //! ``` //! //! ### Print from GPU //! //! ``` //! use accel::*; //! //! #[kernel] //! pub fn print() { //! let i = accel_core::index(); //! accel_core::println!("Hello from {}", i); //! } //! //! fn main() -> error::Result<()> { //! let device = Device::nth(0)?; //! let ctx = device.create_context(); //! print(&ctx, 1, 4, ())?; //! Ok(()) //! } //! ``` extern crate cuda_driver_sys as cuda; pub use accel_derive::kernel; pub mod device; pub mod error; pub mod execution; pub mod linker; pub mod memory; pub mod module; pub mod profiler; pub mod stream; mod block; mod grid; mod instruction; pub use block::Block; pub use device::*; pub use execution::*; pub use grid::Grid; pub use instruction::Instruction; pub use linker::*; pub use memory::*; pub use module::*; pub use profiler::*; pub use stream::*; #[cfg(test)] mod tests { /// Test accel_derive::kernel can be used in accel crate itself #[super::kernel] fn f() {} }