CUB
|
Classes | |
struct | cub::CachingDeviceAllocator |
A simple caching allocator for device memory allocations. More... | |
Macros | |
#define | CUB_PTX_ARCH 0 |
CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass). | |
#define | CUB_RUNTIME_ENABLED |
Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API. | |
#define | CUB_RUNTIME_FUNCTION __host__ __device__ |
#define | CUB_LOG_WARP_THREADS(arch) (5) |
Number of threads per warp (log) | |
#define | CUB_WARP_THREADS(arch) (1 << CUB_LOG_WARP_THREADS(arch)) |
Number of threads per warp. | |
#define | CUB_LOG_SMEM_BANKS(arch) |
Number of smem banks (log) More... | |
#define | CUB_SMEM_BANKS(arch) (1 << CUB_LOG_SMEM_BANKS(arch)) |
Number of smem banks. | |
#define | CUB_SMEM_BANK_BYTES(arch) (4) |
Number of bytes per smem bank. | |
#define | CUB_SMEM_BYTES(arch) |
Number of smem bytes provisioned per SM. More... | |
#define | CUB_SMEM_ALLOC_UNIT(arch) |
Smem allocation size in bytes. More... | |
#define | CUB_REGS_BY_BLOCK(arch) |
Whether or not the architecture allocates registers by block (or by warp) More... | |
#define | CUB_REG_ALLOC_UNIT(arch) |
Number of registers allocated at a time per block (or by warp) More... | |
#define | CUB_WARP_ALLOC_UNIT(arch) |
Granularity of warps for which registers are allocated. More... | |
#define | CUB_MAX_SM_THREADS(arch) |
Maximum number of threads per SM. More... | |
#define | CUB_MAX_SM_BLOCKS(arch) |
Maximum number of thread blocks per SM. More... | |
#define | CUB_MAX_BLOCK_THREADS(arch) |
Maximum number of threads per thread block. More... | |
#define | CUB_MAX_SM_REGISTERS(arch) |
Maximum number of registers per SM. More... | |
#define | CUB_SUBSCRIPTION_FACTOR(arch) |
Oversubscription factor. More... | |
#define | CUB_PREFER_CONFLICT_OVER_PADDING(arch) |
Prefer padding overhead vs X-way conflicts greater than this threshold. More... | |
#define | CubDebug(e) cub::Debug((e), __FILE__, __LINE__) |
Debug macro. | |
#define | CubDebugExit(e) if (cub::Debug((e), __FILE__, __LINE__)) { exit(1); } |
Debug macro with exit. | |
#define | CubLog(format,...) printf(format,__VA_ARGS__); |
Log macro for printf statements. | |
Functions | |
__host__ __device__ __forceinline__ cudaError_t | cub::Debug (cudaError_t error, const char *filename, int line) |
CUB error reporting macro (prints error messages to stderr) More... | |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::PtxVersion (int &ptx_version) |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) More... | |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::SmVersion (int &sm_version, int device_ordinal) |
Retrieves the SM version (major * 100 + minor * 10) | |
template<typename KernelPtr > | |
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::MaxSmOccupancy (int &max_sm_occupancy, KernelPtr kernel_ptr, int block_threads) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block. More... | |
#define CUB_LOG_SMEM_BANKS | ( | arch) |
Number of smem banks (log)
Definition at line 78 of file util_arch.cuh.
#define CUB_SMEM_BYTES | ( | arch) |
Number of smem bytes provisioned per SM.
Definition at line 92 of file util_arch.cuh.
#define CUB_SMEM_ALLOC_UNIT | ( | arch) |
Smem allocation size in bytes.
Definition at line 98 of file util_arch.cuh.
#define CUB_REGS_BY_BLOCK | ( | arch) |
Whether or not the architecture allocates registers by block (or by warp)
Definition at line 106 of file util_arch.cuh.
#define CUB_REG_ALLOC_UNIT | ( | arch) |
Number of registers allocated at a time per block (or by warp)
Definition at line 112 of file util_arch.cuh.
#define CUB_WARP_ALLOC_UNIT | ( | arch) |
Granularity of warps for which registers are allocated.
Definition at line 122 of file util_arch.cuh.
#define CUB_MAX_SM_THREADS | ( | arch) |
Maximum number of threads per SM.
Definition at line 128 of file util_arch.cuh.
#define CUB_MAX_SM_BLOCKS | ( | arch) |
Maximum number of thread blocks per SM.
Definition at line 138 of file util_arch.cuh.
#define CUB_MAX_BLOCK_THREADS | ( | arch) |
Maximum number of threads per thread block.
Definition at line 144 of file util_arch.cuh.
#define CUB_MAX_SM_REGISTERS | ( | arch) |
Maximum number of registers per SM.
Definition at line 150 of file util_arch.cuh.
#define CUB_SUBSCRIPTION_FACTOR | ( | arch) |
Oversubscription factor.
Definition at line 160 of file util_arch.cuh.
#define CUB_PREFER_CONFLICT_OVER_PADDING | ( | arch) |
Prefer padding overhead vs X-way conflicts greater than this threshold.
Definition at line 168 of file util_arch.cuh.
__host__ __device__ __forceinline__ cudaError_t cub::Debug | ( | cudaError_t | error, |
const char * | filename, | ||
int | line | ||
) |
CUB error reporting macro (prints error messages to stderr)
If CUB_STDERR
is defined and error
is not cudaSuccess
, the corresponding error message is printed to stderr
(or stdout
in device code) along with the supplied source context.
Definition at line 68 of file util_debug.cuh.
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::PtxVersion | ( | int & | ptx_version) |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10)
Type definition of the EmptyKernel kernel entry point
Force EmptyKernel<void> to be generated if this class is used
Definition at line 118 of file util_device.cuh.
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::MaxSmOccupancy | ( | int & | max_sm_occupancy, |
KernelPtr | kernel_ptr, | ||
int | block_threads | ||
) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr
on the current device with block_threads
per thread block.
[out] | max_sm_occupancy | maximum number of thread blocks that can reside on a single SM |
[in] | kernel_ptr | Kernel pointer for which to compute SM occupancy |
[in] | block_threads | Number of threads per thread block |
Definition at line 334 of file util_device.cuh.