CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
util_arch.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include "util_namespace.cuh"
37 
39 CUB_NS_PREFIX
40 
42 namespace cub {
43 
44 
51 #ifndef __CUDA_ARCH__
53  #define CUB_PTX_ARCH 0
54 #else
55  #define CUB_PTX_ARCH __CUDA_ARCH__
56 #endif
57 
58 
60 #if (CUB_PTX_ARCH == 0) || defined(CUB_CDP)
61  #define CUB_RUNTIME_ENABLED
62  #define CUB_RUNTIME_FUNCTION __host__ __device__
63 #else
64  #define CUB_RUNTIME_FUNCTION __host__
65 #endif
66 
67 
68 
70 #define CUB_LOG_WARP_THREADS(arch) \
71  (5)
72 
74 #define CUB_WARP_THREADS(arch) \
75  (1 << CUB_LOG_WARP_THREADS(arch))
76 
78 #define CUB_LOG_SMEM_BANKS(arch) \
79  ((arch >= 200) ? \
80  (5) : \
81  (4))
82 
84 #define CUB_SMEM_BANKS(arch) \
85  (1 << CUB_LOG_SMEM_BANKS(arch))
86 
88 #define CUB_SMEM_BANK_BYTES(arch) \
89  (4)
90 
92 #define CUB_SMEM_BYTES(arch) \
93  ((arch >= 200) ? \
94  (48 * 1024) : \
95  (16 * 1024))
96 
98 #define CUB_SMEM_ALLOC_UNIT(arch) \
99  ((arch >= 300) ? \
100  (256) : \
101  ((arch >= 200) ? \
102  (128) : \
103  (512)))
104 
106 #define CUB_REGS_BY_BLOCK(arch) \
107  ((arch >= 200) ? \
108  (false) : \
109  (true))
110 
112 #define CUB_REG_ALLOC_UNIT(arch) \
113  ((arch >= 300) ? \
114  (256) : \
115  ((arch >= 200) ? \
116  (64) : \
117  ((arch >= 120) ? \
118  (512) : \
119  (256))))
120 
122 #define CUB_WARP_ALLOC_UNIT(arch) \
123  ((arch >= 300) ? \
124  (4) : \
125  (2))
126 
128 #define CUB_MAX_SM_THREADS(arch) \
129  ((arch >= 300) ? \
130  (2048) : \
131  ((arch >= 200) ? \
132  (1536) : \
133  ((arch >= 120) ? \
134  (1024) : \
135  (768))))
136 
138 #define CUB_MAX_SM_BLOCKS(arch) \
139  ((arch >= 300) ? \
140  (16) : \
141  (8))
142 
144 #define CUB_MAX_BLOCK_THREADS(arch) \
145  ((arch >= 200) ? \
146  (1024) : \
147  (512))
148 
150 #define CUB_MAX_SM_REGISTERS(arch) \
151  ((arch >= 300) ? \
152  (64 * 1024) : \
153  ((arch >= 200) ? \
154  (32 * 1024) : \
155  ((arch >= 120) ? \
156  (16 * 1024) : \
157  (8 * 1024))))
158 
160 #define CUB_SUBSCRIPTION_FACTOR(arch) \
161  ((arch >= 300) ? \
162  (5) : \
163  ((arch >= 200) ? \
164  (3) : \
165  (10)))
166 
168 #define CUB_PREFER_CONFLICT_OVER_PADDING(arch) \
169  ((arch >= 300) ? \
170  (1) : \
171  (4))
172 
173 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
174 
175 #define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_ARCH)
176 #define CUB_PTX_WARP_THREADS CUB_WARP_THREADS(CUB_PTX_ARCH)
177 #define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH)
178 #define CUB_PTX_SMEM_BANKS CUB_SMEM_BANKS(CUB_PTX_ARCH)
179 #define CUB_PTX_SMEM_BANK_BYTES CUB_SMEM_BANK_BYTES(CUB_PTX_ARCH)
180 #define CUB_PTX_SMEM_BYTES CUB_SMEM_BYTES(CUB_PTX_ARCH)
181 #define CUB_PTX_SMEM_ALLOC_UNIT CUB_SMEM_ALLOC_UNIT(CUB_PTX_ARCH)
182 #define CUB_PTX_REGS_BY_BLOCK CUB_REGS_BY_BLOCK(CUB_PTX_ARCH)
183 #define CUB_PTX_REG_ALLOC_UNIT CUB_REG_ALLOC_UNIT(CUB_PTX_ARCH)
184 #define CUB_PTX_WARP_ALLOC_UNIT CUB_WARP_ALLOC_UNIT(CUB_PTX_ARCH)
185 #define CUB_PTX_MAX_SM_THREADS CUB_MAX_SM_THREADS(CUB_PTX_ARCH)
186 #define CUB_PTX_MAX_SM_BLOCKS CUB_MAX_SM_BLOCKS(CUB_PTX_ARCH)
187 #define CUB_PTX_MAX_BLOCK_THREADS CUB_MAX_BLOCK_THREADS(CUB_PTX_ARCH)
188 #define CUB_PTX_MAX_SM_REGISTERS CUB_MAX_SM_REGISTERS(CUB_PTX_ARCH)
189 #define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH)
190 
191 #endif // Do not document
192 
193  // end group UtilMgmt
195 
196 } // CUB namespace
197 CUB_NS_POSTFIX // Optional outer namespace(s)