HPC-AI-Optimization-Lab 1.0.0
High-Performance CUDA Kernels for AI/ML Workloads
Loading...
Searching...
No Matches
conv_winograd.cuh
Go to the documentation of this file.
1#pragma once
2
3#include <cuda_runtime.h>
4
5namespace hpc::convolution {
6
7struct ConvParams;
8struct WinogradConfig {
9 int tile_size = 4;
10 bool use_winograd = true;
11};
12
13void conv2d_winograd(const float* input, const float* weight, float* output,
14 const ConvParams& params,
15 const WinogradConfig& config = {},
16 cudaStream_t stream = nullptr);
17
18void conv2d_winograd_fallback(const float* input, const float* weight, float* output,
19 const ConvParams& params,
20 cudaStream_t stream = nullptr);
21
22} // namespace hpc::convolution