Usage Examples
Common usage patterns for Mini-ImagePipe.
Table of contents
- Basic Pipeline
- Batch Processing
- Runtime Parameter Updates
- Error Handling
- Pipeline Configuration
- Performance Tips
Basic Pipeline
#include "pipeline.h"
#include "operators/resize.h"
#include "operators/color_convert.h"
#include "operators/gaussian_blur.h"
#include "operators/sobel.h"
using namespace mini_image_pipe;
int main() {
PipelineConfig config;
config.numStreams = 4;
Pipeline pipeline(config);
// Add operators
auto resize = std::make_shared<ResizeOperator>(320, 240, InterpolationMode::BILINEAR);
auto gray = std::make_shared<ColorConvertOperator>(ColorConversionType::RGB_TO_GRAY);
auto blur = std::make_shared<GaussianBlurOperator>(GaussianKernelSize::KERNEL_5x5);
auto sobel = std::make_shared<SobelOperator>();
int n1 = pipeline.addOperator("Resize", resize);
int n2 = pipeline.addOperator("Gray", gray);
int n3 = pipeline.addOperator("Blur", blur);
int n4 = pipeline.addOperator("Sobel", sobel);
// Connect: Resize -> Gray -> Blur -> Sobel
pipeline.connect(n1, n2);
pipeline.connect(n2, n3);
pipeline.connect(n3, n4);
// Set input and execute
pipeline.setInput(n1, d_input, width, height, channels);
pipeline.execute();
// Get output
void* output = pipeline.getOutput(n4);
return 0;
}
Batch Processing
For processing multiple frames efficiently:
std::vector<void*> inputs = {...}; // Array of device pointers
std::vector<void*> outputs;
Pipeline pipeline;
// ... setup pipeline ...
cudaError_t err = pipeline.executeBatch(inputs, outputs, width, height, channels);
The batch executor:
- Processes frames concurrently across multiple streams
- Reuses allocated buffers between frames
- Synchronizes only at the end of each batch
Runtime Parameter Updates
Operators can be reconfigured between executions:
auto resizeOp = std::make_shared<ResizeOperator>(640, 480);
pipeline.addOperator("Resize", resizeOp);
// Later, change target size
resizeOp->setTargetSize(320, 240);
pipeline.reset();
pipeline.setInput(...);
pipeline.execute();
Error Handling
Basic Error Checking
cudaError_t err = pipeline.execute();
if (err != cudaSuccess) {
std::cerr << "Pipeline failed: " << cudaGetErrorString(err) << std::endl;
// Check individual task states
for (const auto& task : pipeline.getTaskGraph().getTasks()) {
if (task.state.load() == TaskState::FAILED) {
std::cerr << "Task " << task.name << " failed" << std::endl;
}
}
}
Custom Error Callback
pipeline.getScheduler().setErrorCallback([](int taskId, cudaError_t err) {
std::cerr << "Task " << taskId << " failed with error: "
<< cudaGetErrorString(err) << std::endl;
});
Pipeline Configuration
PipelineConfig config;
config.numStreams = 4; // CUDA streams for parallelism
config.pinnedPoolSize = 64 * 1024 * 1024; // 64MB pinned memory pool
config.devicePoolSize = 256 * 1024 * 1024; // 256MB device memory pool
config.enableProfiling = true; // Enable CUDA profiling
config.maxBatchSize = 16; // Maximum frames per batch
Pipeline pipeline(config);
Performance Tips
-
Match streams to workload: Use 2-4 streams for most workloads. Too many streams can hurt performance due to context switching.
-
Reuse pipelines: Create once, execute many times. Buffer reuse significantly reduces allocation overhead.
-
Batch processing: Use
executeBatch()for video streams or image sequences. -
Memory pool sizing: Size pools to hold 2-3 frames worth of intermediate buffers.
-
Operator fusion: When possible, combine operations to reduce memory bandwidth:
// Good: Single operation auto op = std::make_shared<ComplexOperator>(); // Avoid: Multiple small operations auto op1 = std::make_shared<Op1>(); auto op2 = std::make_shared<Op2>();