CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
block_discontinuity.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include "../util_type.cuh"
37 #include "../util_ptx.cuh"
38 #include "../util_namespace.cuh"
39 
41 CUB_NS_PREFIX
42 
44 namespace cub {
45 
102 template <
103  typename T,
104  int BLOCK_DIM_X,
105  int BLOCK_DIM_Y = 1,
106  int BLOCK_DIM_Z = 1,
107  int PTX_ARCH = CUB_PTX_ARCH>
109 {
110 private:
111 
112  /******************************************************************************
113  * Constants and type definitions
114  ******************************************************************************/
115 
117  enum
118  {
120  BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z,
121  };
122 
123 
125  typedef T _TempStorage[BLOCK_THREADS];
126 
127 
128  /******************************************************************************
129  * Utility methods
130  ******************************************************************************/
131 
133  __device__ __forceinline__ _TempStorage& PrivateStorage()
134  {
135  __shared__ _TempStorage private_storage;
136  return private_storage;
137  }
138 
139 
141  template <typename FlagOp, bool HAS_PARAM = BinaryOpHasIdxParam<T, FlagOp>::HAS_PARAM>
142  struct ApplyOp
143  {
144  // Apply flag operator
145  static __device__ __forceinline__ bool Flag(FlagOp flag_op, const T &a, const T &b, int idx)
146  {
147  return flag_op(a, b, idx);
148  }
149  };
150 
152  template <typename FlagOp>
153  struct ApplyOp<FlagOp, false>
154  {
155  // Apply flag operator
156  static __device__ __forceinline__ bool Flag(FlagOp flag_op, const T &a, const T &b, int idx)
157  {
158  return flag_op(a, b);
159  }
160  };
161 
163  template <int ITERATION, int MAX_ITERATIONS>
164  struct Iterate
165  {
166  template <
167  int ITEMS_PER_THREAD,
168  typename FlagT,
169  typename FlagOp>
170  static __device__ __forceinline__ void FlagItems(
171  int linear_tid,
172  FlagT (&flags)[ITEMS_PER_THREAD],
173  T (&input)[ITEMS_PER_THREAD],
174  FlagOp flag_op)
175  {
176  flags[ITERATION] = ApplyOp<FlagOp>::Flag(
177  flag_op,
178  input[ITERATION - 1],
179  input[ITERATION],
180  (linear_tid * ITEMS_PER_THREAD) + ITERATION);
181 
182  Iterate<ITERATION + 1, MAX_ITERATIONS>::FlagItems(linear_tid, flags, input, flag_op);
183  }
184  };
185 
187  template <int MAX_ITERATIONS>
188  struct Iterate<MAX_ITERATIONS, MAX_ITERATIONS>
189  {
190  template <
191  int ITEMS_PER_THREAD,
192  typename FlagT,
193  typename FlagOp>
194  static __device__ __forceinline__ void FlagItems(
195  int linear_tid,
196  FlagT (&flags)[ITEMS_PER_THREAD],
197  T (&input)[ITEMS_PER_THREAD],
198  FlagOp flag_op)
199  {}
200  };
201 
202 
203  /******************************************************************************
204  * Thread fields
205  ******************************************************************************/
206 
208  _TempStorage &temp_storage;
209 
211  int linear_tid;
212 
213 
214 public:
215 
217  struct TempStorage : Uninitialized<_TempStorage> {};
218 
219 
220  /******************************************************************/
224 
228  __device__ __forceinline__ BlockDiscontinuity()
229  :
230  temp_storage(PrivateStorage()),
231  linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
232  {}
233 
234 
238  __device__ __forceinline__ BlockDiscontinuity(
239  TempStorage &temp_storage)
240  :
241  temp_storage(temp_storage.Alias()),
242  linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
243  {}
244 
245 
247  /******************************************************************/
251 
252 
302  template <
303  int ITEMS_PER_THREAD,
304  typename FlagT,
305  typename FlagOp>
306  __device__ __forceinline__ void FlagHeads(
307  FlagT (&head_flags)[ITEMS_PER_THREAD],
308  T (&input)[ITEMS_PER_THREAD],
309  FlagOp flag_op)
310  {
311  // Share last item
312  temp_storage[linear_tid] = input[ITEMS_PER_THREAD - 1];
313 
314  __syncthreads();
315 
316  // Set flag for first item
317  head_flags[0] = (linear_tid == 0) ?
318  1 : // First thread
319  ApplyOp<FlagOp>::Flag(
320  flag_op,
321  temp_storage[linear_tid - 1],
322  input[0],
323  linear_tid * ITEMS_PER_THREAD);
324 
325  // Set head_flags for remaining items
326  Iterate<1, ITEMS_PER_THREAD>::FlagItems(linear_tid, head_flags, input, flag_op);
327  }
328 
329 
385  template <
386  int ITEMS_PER_THREAD,
387  typename FlagT,
388  typename FlagOp>
389  __device__ __forceinline__ void FlagHeads(
390  FlagT (&head_flags)[ITEMS_PER_THREAD],
391  T (&input)[ITEMS_PER_THREAD],
392  FlagOp flag_op,
393  T tile_predecessor_item)
394  {
395  // Share last item
396  temp_storage[linear_tid] = input[ITEMS_PER_THREAD - 1];
397 
398  __syncthreads();
399 
400  // Set flag for first item
401  T predecessor_item = (linear_tid == 0) ?
402  tile_predecessor_item : // First thread
403  temp_storage[linear_tid - 1];
404 
405  head_flags[0] = ApplyOp<FlagOp>::Flag(
406  flag_op,
407  predecessor_item,
408  input[0],
409  linear_tid * ITEMS_PER_THREAD);
410 
411  // Set head_flags for remaining items
412  Iterate<1, ITEMS_PER_THREAD>::FlagItems(linear_tid, head_flags, input, flag_op);
413  }
414 
415 
417  /******************************************************************/
421 
422 
473  template <
474  int ITEMS_PER_THREAD,
475  typename FlagT,
476  typename FlagOp>
477  __device__ __forceinline__ void FlagTails(
478  FlagT (&tail_flags)[ITEMS_PER_THREAD],
479  T (&input)[ITEMS_PER_THREAD],
480  FlagOp flag_op)
481  {
482  // Share first item
483  temp_storage[linear_tid] = input[0];
484 
485  __syncthreads();
486 
487  // Set flag for last item
488  tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ?
489  1 : // Last thread
490  ApplyOp<FlagOp>::Flag(
491  flag_op,
492  input[ITEMS_PER_THREAD - 1],
493  temp_storage[linear_tid + 1],
494  (linear_tid * ITEMS_PER_THREAD) + (ITEMS_PER_THREAD - 1));
495 
496  // Set tail_flags for remaining items
497  Iterate<0, ITEMS_PER_THREAD - 1>::FlagItems(linear_tid, tail_flags, input, flag_op);
498  }
499 
500 
557  template <
558  int ITEMS_PER_THREAD,
559  typename FlagT,
560  typename FlagOp>
561  __device__ __forceinline__ void FlagTails(
562  FlagT (&tail_flags)[ITEMS_PER_THREAD],
563  T (&input)[ITEMS_PER_THREAD],
564  FlagOp flag_op,
565  T tile_successor_item)
566  {
567  // Share first item
568  temp_storage[linear_tid] = input[0];
569 
570  __syncthreads();
571 
572  // Set flag for last item
573  T successor_item = (linear_tid == BLOCK_THREADS - 1) ?
574  tile_successor_item : // Last thread
575  temp_storage[linear_tid + 1];
576 
577  tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp<FlagOp>::Flag(
578  flag_op,
579  input[ITEMS_PER_THREAD - 1],
580  successor_item,
581  (linear_tid * ITEMS_PER_THREAD) + (ITEMS_PER_THREAD - 1));
582 
583  // Set tail_flags for remaining items
584  Iterate<0, ITEMS_PER_THREAD - 1>::FlagItems(linear_tid, tail_flags, input, flag_op);
585  }
586 
588 
589 };
590 
591 
592 } // CUB namespace
593 CUB_NS_POSTFIX // Optional outer namespace(s)