36 #include "../util_type.cuh"
37 #include "../util_ptx.cuh"
38 #include "../util_namespace.cuh"
120 BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z,
125 typedef T _TempStorage[BLOCK_THREADS];
133 __device__ __forceinline__ _TempStorage& PrivateStorage()
135 __shared__ _TempStorage private_storage;
136 return private_storage;
141 template <typename FlagOp, bool HAS_PARAM = BinaryOpHasIdxParam<T, FlagOp>::HAS_PARAM>
145 static __device__ __forceinline__
bool Flag(FlagOp flag_op,
const T &a,
const T &b,
int idx)
147 return flag_op(a, b, idx);
152 template <
typename FlagOp>
153 struct ApplyOp<FlagOp, false>
156 static __device__ __forceinline__
bool Flag(FlagOp flag_op,
const T &a,
const T &b,
int idx)
158 return flag_op(a, b);
163 template <
int ITERATION,
int MAX_ITERATIONS>
167 int ITEMS_PER_THREAD,
170 static __device__ __forceinline__
void FlagItems(
172 FlagT (&flags)[ITEMS_PER_THREAD],
173 T (&input)[ITEMS_PER_THREAD],
176 flags[ITERATION] = ApplyOp<FlagOp>::Flag(
178 input[ITERATION - 1],
180 (linear_tid * ITEMS_PER_THREAD) + ITERATION);
182 Iterate<ITERATION + 1, MAX_ITERATIONS>::FlagItems(linear_tid, flags, input, flag_op);
187 template <
int MAX_ITERATIONS>
188 struct Iterate<MAX_ITERATIONS, MAX_ITERATIONS>
191 int ITEMS_PER_THREAD,
194 static __device__ __forceinline__
void FlagItems(
196 FlagT (&flags)[ITEMS_PER_THREAD],
197 T (&input)[ITEMS_PER_THREAD],
208 _TempStorage &temp_storage;
230 temp_storage(PrivateStorage()),
231 linear_tid(
RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
241 temp_storage(temp_storage.Alias()),
242 linear_tid(
RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
303 int ITEMS_PER_THREAD,
307 FlagT (&head_flags)[ITEMS_PER_THREAD],
308 T (&input)[ITEMS_PER_THREAD],
312 temp_storage[linear_tid] = input[ITEMS_PER_THREAD - 1];
317 head_flags[0] = (linear_tid == 0) ?
319 ApplyOp<FlagOp>::Flag(
321 temp_storage[linear_tid - 1],
323 linear_tid * ITEMS_PER_THREAD);
326 Iterate<1, ITEMS_PER_THREAD>::FlagItems(linear_tid, head_flags, input, flag_op);
386 int ITEMS_PER_THREAD,
390 FlagT (&head_flags)[ITEMS_PER_THREAD],
391 T (&input)[ITEMS_PER_THREAD],
393 T tile_predecessor_item)
396 temp_storage[linear_tid] = input[ITEMS_PER_THREAD - 1];
401 T predecessor_item = (linear_tid == 0) ?
402 tile_predecessor_item :
403 temp_storage[linear_tid - 1];
405 head_flags[0] = ApplyOp<FlagOp>::Flag(
409 linear_tid * ITEMS_PER_THREAD);
412 Iterate<1, ITEMS_PER_THREAD>::FlagItems(linear_tid, head_flags, input, flag_op);
474 int ITEMS_PER_THREAD,
478 FlagT (&tail_flags)[ITEMS_PER_THREAD],
479 T (&input)[ITEMS_PER_THREAD],
483 temp_storage[linear_tid] = input[0];
488 tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ?
490 ApplyOp<FlagOp>::Flag(
492 input[ITEMS_PER_THREAD - 1],
493 temp_storage[linear_tid + 1],
494 (linear_tid * ITEMS_PER_THREAD) + (ITEMS_PER_THREAD - 1));
497 Iterate<0, ITEMS_PER_THREAD - 1>::FlagItems(linear_tid, tail_flags, input, flag_op);
558 int ITEMS_PER_THREAD,
562 FlagT (&tail_flags)[ITEMS_PER_THREAD],
563 T (&input)[ITEMS_PER_THREAD],
565 T tile_successor_item)
568 temp_storage[linear_tid] = input[0];
573 T successor_item = (linear_tid == BLOCK_THREADS - 1) ?
574 tile_successor_item :
575 temp_storage[linear_tid + 1];
577 tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp<FlagOp>::Flag(
579 input[ITEMS_PER_THREAD - 1],
581 (linear_tid * ITEMS_PER_THREAD) + (ITEMS_PER_THREAD - 1));
584 Iterate<0, ITEMS_PER_THREAD - 1>::FlagItems(linear_tid, tail_flags, input, flag_op);