36 #include "specializations/warp_reduce_shfl.cuh"
37 #include "specializations/warp_reduce_smem.cuh"
38 #include "../thread/thread_operators.cuh"
39 #include "../util_arch.cuh"
40 #include "../util_type.cuh"
41 #include "../util_namespace.cuh"
139 int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS,
160 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
164 WarpReduceShfl<T, LOGICAL_WARP_THREADS, PTX_ARCH>,
165 WarpReduceSmem<T, LOGICAL_WARP_THREADS, PTX_ARCH> >::Type InternalWarpReduce;
167 #endif // DOXYGEN_SHOULD_SKIP_THIS
173 typedef typename InternalWarpReduce::TempStorage _TempStorage;
181 _TempStorage &temp_storage;
206 temp_storage(temp_storage.Alias())
251 __device__ __forceinline__ T
Sum(
254 return InternalWarpReduce(temp_storage).Sum<
true, 1>(input, LOGICAL_WARP_THREADS);
295 __device__ __forceinline__ T
Sum(
300 if (valid_items >= LOGICAL_WARP_THREADS)
302 return InternalWarpReduce(temp_storage).Sum<
true, 1>(input, valid_items);
306 return InternalWarpReduce(temp_storage).Sum<
false, 1>(input, valid_items);
451 template <
typename ReductionOp>
454 ReductionOp reduction_op)
456 return InternalWarpReduce(temp_storage).Reduce<
true, 1>(input, LOGICAL_WARP_THREADS, reduction_op);
500 template <
typename ReductionOp>
503 ReductionOp reduction_op,
507 if (valid_items >= LOGICAL_WARP_THREADS)
509 return InternalWarpReduce(temp_storage).Reduce<
true, 1>(input, valid_items, reduction_op);
513 return InternalWarpReduce(temp_storage).Reduce<
false, 1>(input, valid_items, reduction_op);
558 typename ReductionOp,
563 ReductionOp reduction_op)
565 return InternalWarpReduce(temp_storage).template SegmentedReduce<true>(input, head_flag, reduction_op);
609 typename ReductionOp,
614 ReductionOp reduction_op)
616 return InternalWarpReduce(temp_storage).template SegmentedReduce<false>(input, tail_flag, reduction_op);