40 #include "dispatch/device_reduce_dispatch.cuh"
41 #include "../util_namespace.cuh"
145 typename InputIterator,
146 typename OutputIterator,
147 typename ReductionOp>
150 void *d_temp_storage,
151 size_t &temp_storage_bytes,
153 OutputIterator d_out,
155 ReductionOp reduction_op,
156 cudaStream_t stream = 0,
157 bool debug_synchronous =
false)
163 typedef DeviceReduceDispatch<InputIterator, OutputIterator, Offset, ReductionOp> DeviceReduceDispatch;
165 return DeviceReduceDispatch::Dispatch(
223 typename InputIterator,
224 typename OutputIterator>
227 void *d_temp_storage,
228 size_t &temp_storage_bytes,
230 OutputIterator d_out,
232 cudaStream_t stream = 0,
233 bool debug_synchronous =
false)
239 typedef DeviceReduceDispatch<InputIterator, OutputIterator, Offset, cub::Sum> DeviceReduceDispatch;
241 return DeviceReduceDispatch::Dispatch(
295 typename InputIterator,
296 typename OutputIterator>
299 void *d_temp_storage,
300 size_t &temp_storage_bytes,
302 OutputIterator d_out,
304 cudaStream_t stream = 0,
305 bool debug_synchronous =
false)
311 typedef DeviceReduceDispatch<InputIterator, OutputIterator, Offset, cub::Min> DeviceReduceDispatch;
313 return DeviceReduceDispatch::Dispatch(
372 typename InputIterator,
373 typename OutputIterator>
376 void *d_temp_storage,
377 size_t &temp_storage_bytes,
379 OutputIterator d_out,
381 cudaStream_t stream = 0,
382 bool debug_synchronous =
false)
389 ArgIndexInputIterator d_argmin_in(d_in, 0);
392 typedef DeviceReduceDispatch<ArgIndexInputIterator, OutputIterator, Offset, cub::ArgMin> DeviceReduceDispatch;
394 return DeviceReduceDispatch::Dispatch(
448 typename InputIterator,
449 typename OutputIterator>
452 void *d_temp_storage,
453 size_t &temp_storage_bytes,
455 OutputIterator d_out,
457 cudaStream_t stream = 0,
458 bool debug_synchronous =
false)
464 typedef DeviceReduceDispatch<InputIterator, OutputIterator, Offset, cub::Max> DeviceReduceDispatch;
466 return DeviceReduceDispatch::Dispatch(
525 typename InputIterator,
526 typename OutputIterator>
529 void *d_temp_storage,
530 size_t &temp_storage_bytes,
532 OutputIterator d_out,
534 cudaStream_t stream = 0,
535 bool debug_synchronous =
false)
542 ArgIndexInputIterator d_argmax_in(d_in, 0);
545 typedef DeviceReduceDispatch<ArgIndexInputIterator, OutputIterator, Offset, cub::ArgMax> DeviceReduceDispatch;
547 return DeviceReduceDispatch::Dispatch(
641 typename KeyInputIterator,
642 typename KeyOutputIterator,
643 typename ValueInputIterator,
644 typename ValueOutputIterator,
645 typename NumSegmentsIterator,
646 typename ReductionOp>
647 CUB_RUNTIME_FUNCTION __forceinline__
649 void *d_temp_storage,
650 size_t &temp_storage_bytes,
651 KeyInputIterator d_keys_in,
652 KeyOutputIterator d_keys_out,
653 ValueInputIterator d_values_in,
654 ValueOutputIterator d_values_out,
655 NumSegmentsIterator d_num_segments,
656 ReductionOp reduction_op,
658 cudaStream_t stream = 0,
659 bool debug_synchronous =
false)
666 return DeviceReduceByKeyDispatch<KeyInputIterator, KeyOutputIterator, ValueInputIterator, ValueOutputIterator, NumSegmentsIterator, EqualityOp, ReductionOp, Offset>::Dispatch(
749 typename InputIterator,
750 typename OutputIterator,
751 typename CountsOutputIterator,
752 typename NumSegmentsIterator>
753 CUB_RUNTIME_FUNCTION __forceinline__
755 void *d_temp_storage,
756 size_t &temp_storage_bytes,
758 OutputIterator d_compacted_out,
759 CountsOutputIterator d_counts_out,
760 NumSegmentsIterator d_num_segments,
762 cudaStream_t stream = 0,
763 bool debug_synchronous =
false)
766 typedef typename std::iterator_traits<CountsOutputIterator>::value_type Value;
780 return DeviceReduceByKeyDispatch<InputIterator, OutputIterator, CountsInputIterator, CountsOutputIterator, NumSegmentsIterator, EqualityOp, ReductionOp, Offset>::Dispatch(
785 CountsInputIterator(one_val),