3#include <cuda_runtime.h>
11 bool use_cluster = true;
14bool is_hopper_architecture();
17void cluster_reduce(const T* input, T* output, size_t n,
18 const ClusterConfig& config,
19 cudaStream_t stream = nullptr);
22void cluster_reduce_fallback(const T* input, T* output, size_t n,
23 const ClusterConfig& config,
24 cudaStream_t stream = nullptr);
26} // namespace hpc::cuda13