HPC-AI-Optimization-Lab
1.0.0
High-Performance CUDA Kernels for AI/ML Workloads
Loading...
Searching...
No Matches
conv_implicit_gemm.cuh
Go to the documentation of this file.
1
#pragma once
2
3
#include <cuda_runtime.h>
4
#include <cuda_fp16.h>
5
6
namespace hpc::convolution {
7
8
struct ConvParams {
9
int batch;
10
int in_channels;
11
int out_channels;
12
int in_height;
13
int in_width;
14
int kernel_h;
15
int kernel_w;
16
int stride_h;
17
int stride_w;
18
int pad_h;
19
int pad_w;
20
int dilation_h;
21
int dilation_w;
22
};
23
24
template <typename T>
25
void conv2d_implicit_gemm(const T* input, const T* weight, T* output,
26
const ConvParams& params, cudaStream_t stream = nullptr);
27
28
} // namespace hpc::convolution
04_convolution
conv_implicit_gemm.cuh
Generated by
1.9.8