Usage Examples

Common usage patterns for Mini-ImagePipe.

Basic Pipeline
Batch Processing
Runtime Parameter Updates
Error Handling
1. Basic Error Checking
2. Custom Error Callback
Pipeline Configuration
Performance Tips

Basic Pipeline

#include "pipeline.h"
#include "operators/resize.h"
#include "operators/color_convert.h"
#include "operators/gaussian_blur.h"
#include "operators/sobel.h"

using namespace mini_image_pipe;

int main() {
    PipelineConfig config;
    config.numStreams = 4;
    Pipeline pipeline(config);

    // Add operators
    auto resize = std::make_shared<ResizeOperator>(320, 240, InterpolationMode::BILINEAR);
    auto gray   = std::make_shared<ColorConvertOperator>(ColorConversionType::RGB_TO_GRAY);
    auto blur   = std::make_shared<GaussianBlurOperator>(GaussianKernelSize::KERNEL_5x5);
    auto sobel  = std::make_shared<SobelOperator>();

    int n1 = pipeline.addOperator("Resize", resize);
    int n2 = pipeline.addOperator("Gray",   gray);
    int n3 = pipeline.addOperator("Blur",   blur);
    int n4 = pipeline.addOperator("Sobel",  sobel);

    // Connect: Resize -> Gray -> Blur -> Sobel
    pipeline.connect(n1, n2);
    pipeline.connect(n2, n3);
    pipeline.connect(n3, n4);

    // Set input and execute
    pipeline.setInput(n1, d_input, width, height, channels);
    pipeline.execute();

    // Get output
    void* output = pipeline.getOutput(n4);
    return 0;
}

Batch Processing

For processing multiple frames efficiently:

std::vector<void*> inputs = {...};  // Array of device pointers
std::vector<void*> outputs;

Pipeline pipeline;
// ... setup pipeline ...

cudaError_t err = pipeline.executeBatch(inputs, outputs, width, height, channels);

The batch executor:

Processes frames concurrently across multiple streams
Reuses allocated buffers between frames
Synchronizes only at the end of each batch

Runtime Parameter Updates

Operators can be reconfigured between executions:

auto resizeOp = std::make_shared<ResizeOperator>(640, 480);
pipeline.addOperator("Resize", resizeOp);

// Later, change target size
resizeOp->setTargetSize(320, 240);
pipeline.reset();
pipeline.setInput(...);
pipeline.execute();

Error Handling

Basic Error Checking

cudaError_t err = pipeline.execute();
if (err != cudaSuccess) {
    std::cerr << "Pipeline failed: " << cudaGetErrorString(err) << std::endl;
    // Check individual task states
    for (const auto& task : pipeline.getTaskGraph().getTasks()) {
        if (task.state.load() == TaskState::FAILED) {
            std::cerr << "Task " << task.name << " failed" << std::endl;
        }
    }
}

Custom Error Callback

pipeline.getScheduler().setErrorCallback([](int taskId, cudaError_t err) {
    std::cerr << "Task " << taskId << " failed with error: " 
              << cudaGetErrorString(err) << std::endl;
});

Pipeline Configuration

PipelineConfig config;
config.numStreams = 4;                        // CUDA streams for parallelism
config.pinnedPoolSize = 64 * 1024 * 1024;     // 64MB pinned memory pool
config.devicePoolSize = 256 * 1024 * 1024;    // 256MB device memory pool
config.enableProfiling = true;                // Enable CUDA profiling
config.maxBatchSize = 16;                     // Maximum frames per batch

Pipeline pipeline(config);

Performance Tips

Match streams to workload: Use 2-4 streams for most workloads. Too many streams can hurt performance due to context switching.
Reuse pipelines: Create once, execute many times. Buffer reuse significantly reduces allocation overhead.
Batch processing: Use executeBatch() for video streams or image sequences.
Memory pool sizing: Size pools to hold 2-3 frames worth of intermediate buffers.

Operator fusion: When possible, combine operations to reduce memory bandwidth:

// Good: Single operation
auto op = std::make_shared<ComplexOperator>();
   
// Avoid: Multiple small operations
auto op1 = std::make_shared<Op1>();
auto op2 = std::make_shared<Op2>();