1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
//! Traits for CUDA Kernel launching
//!
//! Launchable traits
//! -----------------
//!
//! Launchable traits, i.e. `Launchable0`, `Launchable1`, ..., implement `launch` function which launches a kernel on device.
//!
//! ```
//! use accel::{*, error::Result};
//!
//! // Trait for 2-arg kernel
//! pub trait Launchable2 {
//!     // Type of arg1 on device
//!     type Target1;
//!     // Type of arg2 on device
//!     type Target2;
//!
//!     // Launch kernel code on device
//!     fn launch<
//!         Arg1 /* Type of arg1 on host */,
//!         Arg2 /* Type of arg2 on host */
//!     >(
//!         &self,
//!         grid:  impl Into<Grid>,
//!         block: impl Into<Block>,
//!         (arg1, arg2): (Arg1, Arg2)
//!     ) -> Result<()>
//!     where
//!         // Types on host and on device are bundled by DeviceSend trait
//!         Arg1: DeviceSend<Target=Self::Target1>,
//!         Arg2: DeviceSend<Target=Self::Target2>,
//!     {
//!         // default impl which uses crate-internal features
//!         todo!() // skip for document
//!     }
//!
//!     // Specify entry point (see following example)
//!     fn get_kernel(&self) -> Result<Kernel>;
//! }
//! ```
//!
//! These traits are generated by `accel_derive::define_launchable!` proc-macro.
//! Launchable traits are specialized for N-args functions because it uses a tuple `(Arg1, Arg2, ..., ArgN)`
//! for `launch` argument.
//! [DeviceSend] trait specify how the host value is sent to device.
//!
//! One of Launchable traits will be implemented automatically by [accel::kernel] for an auto-generated [Module] struct:
//!
//! ```
//! #[accel::kernel]
//! fn f(a: i32) {}
//! ```
//!
//! This simple definition will create a submodule `f` (same name of the function):
//!
//! ```
//! mod f { // same name sub-module
//!
//!     pub const PTX_STR: &str = "{{ PTX string generated by rustc/nvptx64-nvidia-cuda }}";
//!
//!     // wrapper for implement one of Launchable traits
//!     pub struct Module(::accel::Module);
//!
//!     // impl Launchable1 because number of arugment is 1
//!     impl ::accel::execution::Launchable1<'_> for Module {
//!         type Target1 = i32; // first argument of `f`
//!
//!         // How to get kernel PTX code
//!         fn get_kernel(&self) -> ::accel::error::Result<::accel::Kernel> {
//!             self.0.get_kernel("f")
//!         }
//!     }
//! }
//! ```
//!
//! For a function which takes N arguments, `Launchable{N}` will be implemented for corresponding module.
//! Be sure that this sub-module will be generated where the `f` is defined.
//! `get_kernel` and default implementation of `launch` are separated to keep unsafe codes in this crate.
//!
//! [DeviceSend]: trait.DeviceSend.html
//! [accel::kernel]: ../attr.kernel.html
//! [Module]: ../module/struct.Module.html

use crate::{contexted_call, device::*, error::*, *};
use cuda::*;
use std::{ffi::*, ptr::null_mut};

/// Type which can be sent to device
pub trait DeviceSend {
    /// Type on device
    type Target;
    fn as_kernel_parameter(&self) -> *mut c_void {
        self as *const Self as *mut c_void
    }
}

impl<T: Sized> DeviceSend for *mut T {
    type Target = Self;
}

impl<T: Sized> DeviceSend for *const T {
    type Target = Self;
}

impl<'arg, T: Sized> DeviceSend for &'arg [T] {
    type Target = *const T;
}

impl<'arg, T: Sized> DeviceSend for &'arg mut [T] {
    type Target = *mut T;
}

macro_rules! impl_device_send {
    ($pri:ty) => {
        impl DeviceSend for $pri {
            type Target = Self;
        }

        impl<'arg> DeviceSend for &'arg $pri {
            type Target = Self;
        }

        impl<'arg> DeviceSend for &'arg mut $pri {
            type Target = Self;
        }
    };
}

impl_device_send!(bool);
impl_device_send!(i8);
impl_device_send!(i16);
impl_device_send!(i32);
impl_device_send!(i64);
impl_device_send!(i128);
impl_device_send!(isize);
impl_device_send!(u8);
impl_device_send!(u16);
impl_device_send!(u32);
impl_device_send!(u64);
impl_device_send!(u128);
impl_device_send!(usize);
impl_device_send!(f32);
impl_device_send!(f64);

accel_derive::define_launchable!(12 /* 0..=12 */);