HPC-AI-Optimization-Lab 1.0.0
High-Performance CUDA Kernels for AI/ML Workloads
Loading...
Searching...
No Matches
conv_implicit_gemm.cuh
Go to the documentation of this file.
1#pragma once
2
3#include <cuda_runtime.h>
4#include <cuda_fp16.h>
5
6namespace hpc::convolution {
7
8struct ConvParams {
9 int batch;
10 int in_channels;
11 int out_channels;
12 int in_height;
13 int in_width;
14 int kernel_h;
15 int kernel_w;
16 int stride_h;
17 int stride_w;
18 int pad_h;
19 int pad_w;
20 int dilation_h;
21 int dilation_w;
22};
23
24template <typename T>
25void conv2d_implicit_gemm(const T* input, const T* weight, T* output,
26 const ConvParams& params, cudaStream_t stream = nullptr);
27
28} // namespace hpc::convolution