Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize implementation of cuda::std::conditional_t #2779

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c2h/generators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ struct random_to_item_t
template <typename T>
struct random_to_item_t<T, true>
{
using storage_t = ::cuda::std::_If<(sizeof(T) > 4), double, float>;
using storage_t = ::cuda::std::conditional_t<(sizeof(T) > 4), double, float>;
storage_t m_min;
storage_t m_max;

Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/partition/flagged.cu
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ void flagged(nvbench::state& state, nvbench::type_list<T, OffsetT, UseDistinctPa
using equality_op_t = cub::NullType;
using offset_t = OffsetT;
constexpr bool use_distinct_out_partitions = UseDistinctPartitionT::value;
using output_it_t = typename ::cuda::std::
conditional<use_distinct_out_partitions, cub::detail::partition_distinct_output_t<T*, T*>, T*>::type;
using output_it_t =
::cuda::std::conditional_t<use_distinct_out_partitions, cub::detail::partition_distinct_output_t<T*, T*>, T*>;

#if !TUNE_BASE
using policy_t = policy_hub_t<T>;
Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/partition/if.cu
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ void partition(nvbench::state& state, nvbench::type_list<T, OffsetT, UseDistinct
using equality_op_t = cub::NullType;
using offset_t = OffsetT;
constexpr bool use_distinct_out_partitions = UseDistinctPartitionT::value;
using output_it_t = typename ::cuda::std::
conditional<use_distinct_out_partitions, cub::detail::partition_distinct_output_t<T*, T*>, T*>::type;
using output_it_t =
::cuda::std::conditional_t<use_distinct_out_partitions, cub::detail::partition_distinct_output_t<T*, T*>, T*>;

#if !TUNE_BASE
using policy_t = policy_hub_t<T>;
Expand Down
2 changes: 1 addition & 1 deletion cub/benchmarks/bench/radix_sort/keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ struct policy_hub_t
{
static constexpr bool KEYS_ONLY = std::is_same<ValueT, cub::NullType>::value;

using DominantT = ::cuda::std::_If<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>;
using DominantT = ::cuda::std::conditional_t<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>;

struct policy_t : cub::ChainedPolicy<300, policy_t, policy_t>
{
Expand Down
2 changes: 1 addition & 1 deletion cub/benchmarks/bench/radix_sort/pairs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct policy_hub_t
{
static constexpr bool KEYS_ONLY = std::is_same<ValueT, cub::NullType>::value;

using DominantT = ::cuda::std::_If<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>;
using DominantT = ::cuda::std::conditional_t<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>;

struct policy_t : cub::ChainedPolicy<300, policy_t, policy_t>
{
Expand Down
6 changes: 3 additions & 3 deletions cub/benchmarks/bench/transform/babelstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ struct policy_hub_t
static constexpr int min_bif = cub::detail::transform::arch_to_min_bytes_in_flight(__CUDA_ARCH_LIST__);
static constexpr auto algorithm = static_cast<cub::detail::transform::Algorithm>(TUNE_ALGORITHM);
using algo_policy =
::cuda::std::_If<algorithm == cub::detail::transform::Algorithm::prefetch,
cub::detail::transform::prefetch_policy_t<TUNE_THREADS>,
cub::detail::transform::async_copy_policy_t<TUNE_THREADS>>;
::cuda::std::conditional_t<algorithm == cub::detail::transform::Algorithm::prefetch,
cub::detail::transform::prefetch_policy_t<TUNE_THREADS>,
cub::detail::transform::async_copy_policy_t<TUNE_THREADS>>;
};
};
#endif
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/agent/agent_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -556,9 +556,9 @@ private:
//---------------------------------------------------------------------
/// Internal load/store type. For byte-wise memcpy, a single-byte type
using AliasT =
typename ::cuda::std::conditional<IsMemcpy,
std::iterator_traits<char*>,
std::iterator_traits<cub::detail::value_t<InputBufferIt>>>::type::value_type;
typename ::cuda::std::conditional_t<IsMemcpy,
std::iterator_traits<char*>,
std::iterator_traits<cub::detail::value_t<InputBufferIt>>>::value_type;

/// Types of the input and output buffers
using InputBufferT = cub::detail::value_t<InputBufferIt>;
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/agent/agent_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,9 @@ struct AgentHistogram
// Wrap the native input pointer with CacheModifiedInputIterator
// or directly use the supplied input iterator type
using WrappedSampleIteratorT =
::cuda::std::_If<std::is_pointer<SampleIteratorT>::value,
CacheModifiedInputIterator<LOAD_MODIFIER, SampleT, OffsetT>,
SampleIteratorT>;
::cuda::std::conditional_t<std::is_pointer<SampleIteratorT>::value,
CacheModifiedInputIterator<LOAD_MODIFIER, SampleT, OffsetT>,
SampleIteratorT>;

/// Pixel input iterator type (for applying cache modifier)
using WrappedPixelIteratorT = CacheModifiedInputIterator<LOAD_MODIFIER, PixelT, OffsetT>;
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_radix_sort_onesweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ struct AgentRadixSortOnesweep
|| RANK_ALGORITHM == RADIX_RANK_MATCH_EARLY_COUNTS_ATOMIC_OR,
"for onesweep agent, the ranking algorithm must warp-strided key arrangement");

using BlockRadixRankT = ::cuda::std::_If<
using BlockRadixRankT = ::cuda::std::conditional_t<
RANK_ALGORITHM == RADIX_RANK_MATCH_EARLY_COUNTS_ATOMIC_OR,
BlockRadixRankMatchEarlyCounts<BLOCK_THREADS, RADIX_BITS, false, SCAN_ALGORITHM, WARP_MATCH_ATOMIC_OR, RANK_NUM_PARTS>,
::cuda::std::_If<
::cuda::std::conditional_t<
RANK_ALGORITHM == RADIX_RANK_MATCH,
BlockRadixRankMatch<BLOCK_THREADS, RADIX_BITS, false, SCAN_ALGORITHM>,
BlockRadixRankMatchEarlyCounts<BLOCK_THREADS, RADIX_BITS, false, SCAN_ALGORITHM, WARP_MATCH_ANY, RANK_NUM_PARTS>>>;
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/agent/agent_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ struct AgentReduce
// Wrap the native input pointer with CacheModifiedInputIterator
// or directly use the supplied input iterator type
using WrappedInputIteratorT =
::cuda::std::_If<::cuda::std::is_pointer<InputIteratorT>::value,
CacheModifiedInputIterator<AgentReducePolicy::LOAD_MODIFIER, InputT, OffsetT>,
InputIteratorT>;
::cuda::std::conditional_t<::cuda::std::is_pointer<InputIteratorT>::value,
CacheModifiedInputIterator<AgentReducePolicy::LOAD_MODIFIER, InputT, OffsetT>,
InputIteratorT>;

/// Constants
static constexpr int BLOCK_THREADS = AgentReducePolicy::BLOCK_THREADS;
Expand Down
18 changes: 9 additions & 9 deletions cub/cub/agent/agent_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -231,27 +231,27 @@ struct AgentReduceByKey
// CacheModifiedValuesInputIterator or directly use the supplied input
// iterator type
using WrappedKeysInputIteratorT =
::cuda::std::_If<std::is_pointer<KeysInputIteratorT>::value,
CacheModifiedInputIterator<AgentReduceByKeyPolicyT::LOAD_MODIFIER, KeyInputT, OffsetT>,
KeysInputIteratorT>;
::cuda::std::conditional_t<std::is_pointer<KeysInputIteratorT>::value,
CacheModifiedInputIterator<AgentReduceByKeyPolicyT::LOAD_MODIFIER, KeyInputT, OffsetT>,
KeysInputIteratorT>;

// Cache-modified Input iterator wrapper type (for applying cache modifier)
// for values Wrap the native input pointer with
// CacheModifiedValuesInputIterator or directly use the supplied input
// iterator type
using WrappedValuesInputIteratorT =
::cuda::std::_If<std::is_pointer<ValuesInputIteratorT>::value,
CacheModifiedInputIterator<AgentReduceByKeyPolicyT::LOAD_MODIFIER, ValueInputT, OffsetT>,
ValuesInputIteratorT>;
::cuda::std::conditional_t<std::is_pointer<ValuesInputIteratorT>::value,
CacheModifiedInputIterator<AgentReduceByKeyPolicyT::LOAD_MODIFIER, ValueInputT, OffsetT>,
ValuesInputIteratorT>;

// Cache-modified Input iterator wrapper type (for applying cache modifier)
// for fixup values Wrap the native input pointer with
// CacheModifiedValuesInputIterator or directly use the supplied input
// iterator type
using WrappedFixupInputIteratorT =
::cuda::std::_If<std::is_pointer<AggregatesOutputIteratorT>::value,
CacheModifiedInputIterator<AgentReduceByKeyPolicyT::LOAD_MODIFIER, ValueInputT, OffsetT>,
AggregatesOutputIteratorT>;
::cuda::std::conditional_t<std::is_pointer<AggregatesOutputIteratorT>::value,
CacheModifiedInputIterator<AgentReduceByKeyPolicyT::LOAD_MODIFIER, ValueInputT, OffsetT>,
AggregatesOutputIteratorT>;

// Reduce-value-by-segment scan operator
using ReduceBySegmentOpT = ReduceBySegmentOp<ReductionOpT>;
Expand Down
8 changes: 4 additions & 4 deletions cub/cub/agent/agent_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,9 @@ struct AgentRle
// Wrap the native input pointer with CacheModifiedVLengthnputIterator
// Directly use the supplied input iterator type
using WrappedInputIteratorT =
::cuda::std::_If<std::is_pointer<InputIteratorT>::value,
CacheModifiedInputIterator<AgentRlePolicyT::LOAD_MODIFIER, T, OffsetT>,
InputIteratorT>;
::cuda::std::conditional_t<std::is_pointer<InputIteratorT>::value,
CacheModifiedInputIterator<AgentRlePolicyT::LOAD_MODIFIER, T, OffsetT>,
InputIteratorT>;

// Parameterized BlockLoad type for data
using BlockLoadT =
Expand All @@ -259,7 +259,7 @@ struct AgentRle
using WarpExchangePairs = WarpExchange<LengthOffsetPair, ITEMS_PER_THREAD>;

using WarpExchangePairsStorage =
::cuda::std::_If<STORE_WARP_TIME_SLICING, typename WarpExchangePairs::TempStorage, NullType>;
::cuda::std::conditional_t<STORE_WARP_TIME_SLICING, typename WarpExchangePairs::TempStorage, NullType>;

using WarpExchangeOffsets = WarpExchange<OffsetT, ITEMS_PER_THREAD>;
using WarpExchangeLengths = WarpExchange<LengthT, ITEMS_PER_THREAD>;
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/agent/agent_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,9 @@ struct AgentScan
// Wrap the native input pointer with CacheModifiedInputIterator
// or directly use the supplied input iterator type
using WrappedInputIteratorT =
::cuda::std::_If<std::is_pointer<InputIteratorT>::value,
CacheModifiedInputIterator<AgentScanPolicyT::LOAD_MODIFIER, InputT, OffsetT>,
InputIteratorT>;
::cuda::std::conditional_t<std::is_pointer<InputIteratorT>::value,
CacheModifiedInputIterator<AgentScanPolicyT::LOAD_MODIFIER, InputT, OffsetT>,
InputIteratorT>;

// Constants
enum
Expand Down
12 changes: 6 additions & 6 deletions cub/cub/agent/agent_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,14 @@ struct AgentScanByKey
static constexpr int ITEMS_PER_TILE = BLOCK_THREADS * ITEMS_PER_THREAD;

using WrappedKeysInputIteratorT =
::cuda::std::_If<std::is_pointer<KeysInputIteratorT>::value,
CacheModifiedInputIterator<AgentScanByKeyPolicyT::LOAD_MODIFIER, KeyT, OffsetT>,
KeysInputIteratorT>;
::cuda::std::conditional_t<std::is_pointer<KeysInputIteratorT>::value,
CacheModifiedInputIterator<AgentScanByKeyPolicyT::LOAD_MODIFIER, KeyT, OffsetT>,
KeysInputIteratorT>;

using WrappedValuesInputIteratorT =
::cuda::std::_If<std::is_pointer<ValuesInputIteratorT>::value,
CacheModifiedInputIterator<AgentScanByKeyPolicyT::LOAD_MODIFIER, InputT, OffsetT>,
ValuesInputIteratorT>;
::cuda::std::conditional_t<std::is_pointer<ValuesInputIteratorT>::value,
CacheModifiedInputIterator<AgentScanByKeyPolicyT::LOAD_MODIFIER, InputT, OffsetT>,
ValuesInputIteratorT>;

using BlockLoadKeysT = BlockLoad<KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentScanByKeyPolicyT::LOAD_ALGORITHM>;

Expand Down
14 changes: 7 additions & 7 deletions cub/cub/agent/agent_segment_fixup.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -173,18 +173,18 @@ struct AgentSegmentFixup
// Cache-modified Input iterator wrapper type (for applying cache modifier) for keys
// Wrap the native input pointer with CacheModifiedValuesInputIterator
// or directly use the supplied input iterator type
using WrappedPairsInputIteratorT =
::cuda::std::_If<std::is_pointer<PairsInputIteratorT>::value,
CacheModifiedInputIterator<AgentSegmentFixupPolicyT::LOAD_MODIFIER, KeyValuePairT, OffsetT>,
PairsInputIteratorT>;
using WrappedPairsInputIteratorT = ::cuda::std::conditional_t<
std::is_pointer<PairsInputIteratorT>::value,
CacheModifiedInputIterator<AgentSegmentFixupPolicyT::LOAD_MODIFIER, KeyValuePairT, OffsetT>,
PairsInputIteratorT>;

// Cache-modified Input iterator wrapper type (for applying cache modifier) for fixup values
// Wrap the native input pointer with CacheModifiedValuesInputIterator
// or directly use the supplied input iterator type
using WrappedFixupInputIteratorT =
::cuda::std::_If<std::is_pointer<AggregatesOutputIteratorT>::value,
CacheModifiedInputIterator<AgentSegmentFixupPolicyT::LOAD_MODIFIER, ValueT, OffsetT>,
AggregatesOutputIteratorT>;
::cuda::std::conditional_t<std::is_pointer<AggregatesOutputIteratorT>::value,
CacheModifiedInputIterator<AgentSegmentFixupPolicyT::LOAD_MODIFIER, ValueT, OffsetT>,
AggregatesOutputIteratorT>;

// Reduce-value-by-segment scan operator
using ReduceBySegmentOpT = ReduceByKeyOp<::cuda::std::plus<>>;
Expand Down
12 changes: 6 additions & 6 deletions cub/cub/agent/agent_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -245,17 +245,17 @@ struct AgentSelectIf
// Wrap the native input pointer with CacheModifiedValuesInputIterator
// or directly use the supplied input iterator type
using WrappedInputIteratorT =
::cuda::std::_If<::cuda::std::is_pointer<InputIteratorT>::value,
CacheModifiedInputIterator<AgentSelectIfPolicyT::LOAD_MODIFIER, InputT, OffsetT>,
InputIteratorT>;
::cuda::std::conditional_t<::cuda::std::is_pointer<InputIteratorT>::value,
CacheModifiedInputIterator<AgentSelectIfPolicyT::LOAD_MODIFIER, InputT, OffsetT>,
InputIteratorT>;

// Cache-modified Input iterator wrapper type (for applying cache modifier) for values
// Wrap the native input pointer with CacheModifiedValuesInputIterator
// or directly use the supplied input iterator type
using WrappedFlagsInputIteratorT =
::cuda::std::_If<::cuda::std::is_pointer<FlagsInputIteratorT>::value,
CacheModifiedInputIterator<AgentSelectIfPolicyT::LOAD_MODIFIER, FlagT, OffsetT>,
FlagsInputIteratorT>;
::cuda::std::conditional_t<::cuda::std::is_pointer<FlagsInputIteratorT>::value,
CacheModifiedInputIterator<AgentSelectIfPolicyT::LOAD_MODIFIER, FlagT, OffsetT>,
FlagsInputIteratorT>;

// Parameterized BlockLoad type for input data
using BlockLoadT = BlockLoad<InputT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentSelectIfPolicyT::LOAD_ALGORITHM>;
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ struct AgentSpmv
{
// Value type to pair with index type OffsetT
// (NullType if loading values directly during merge)
using MergeValueT = ::cuda::std::_If<AgentSpmvPolicyT::DIRECT_LOAD_NONZEROS, NullType, ValueT>;
using MergeValueT = ::cuda::std::conditional_t<AgentSpmvPolicyT::DIRECT_LOAD_NONZEROS, NullType, ValueT>;

OffsetT row_end_offset;
MergeValueT nonzero;
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/agent/agent_three_way_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,9 @@ struct AgentThreeWayPartition
static constexpr int TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD;

using WrappedInputIteratorT =
::cuda::std::_If<std::is_pointer<InputIteratorT>::value,
cub::CacheModifiedInputIterator<PolicyT::LOAD_MODIFIER, InputT, OffsetT>,
InputIteratorT>;
::cuda::std::conditional_t<std::is_pointer<InputIteratorT>::value,
cub::CacheModifiedInputIterator<PolicyT::LOAD_MODIFIER, InputT, OffsetT>,
InputIteratorT>;

// Parameterized BlockLoad type for input data
using BlockLoadT = cub::BlockLoad<InputT, BLOCK_THREADS, ITEMS_PER_THREAD, PolicyT::LOAD_ALGORITHM>;
Expand Down
8 changes: 4 additions & 4 deletions cub/cub/agent/agent_unique_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -146,20 +146,20 @@ struct AgentUniqueByKey
};

// Cache-modified Input iterator wrapper type (for applying cache modifier) for keys
using WrappedKeyInputIteratorT = typename std::conditional<
using WrappedKeyInputIteratorT = typename ::cuda::std::conditional_t<
std::is_pointer<KeyInputIteratorT>::value,
CacheModifiedInputIterator<AgentUniqueByKeyPolicyT::LOAD_MODIFIER, KeyT, OffsetT>, // Wrap the native input pointer
// with
// CacheModifiedValuesInputIterator
KeyInputIteratorT>::type; // Directly use the supplied input iterator type
KeyInputIteratorT>; // Directly use the supplied input iterator type

// Cache-modified Input iterator wrapper type (for applying cache modifier) for values
using WrappedValueInputIteratorT = typename std::conditional<
using WrappedValueInputIteratorT = typename ::cuda::std::conditional_t<
std::is_pointer<ValueInputIteratorT>::value,
CacheModifiedInputIterator<AgentUniqueByKeyPolicyT::LOAD_MODIFIER, ValueT, OffsetT>, // Wrap the native input
// pointer with
// CacheModifiedValuesInputIterator
ValueInputIteratorT>::type; // Directly use the supplied input iterator type
ValueInputIteratorT>; // Directly use the supplied input iterator type

// Parameterized BlockLoad type for input data
using BlockLoadKeys = BlockLoad<KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentUniqueByKeyPolicyT::LOAD_ALGORITHM>;
Expand Down
Loading
Loading