3#include <cuda_runtime.h>
4#include <cuda/pipeline>
10 int cluster_height = 1;
11 int pipeline_depth = 2;
15bool is_hopper_architecture();
17template <typename T, int NUM_CHANNELS = 8>
18void tma_copy_2d(const T* src, T* dst,
20 const TMAConfig& config,
21 cudaStream_t stream = nullptr);
24void tma_copy_2d_fallback(const T* src, T* dst,
26 cudaStream_t stream = nullptr);
28} // namespace hpc::cuda13