API Reference

Complete reference for Mini-ImagePipe classes and functions.

Pipeline Class
1. Methods
PipelineConfig
Operators
MemoryManager
1. Thread Safety
TaskGraph
DAGScheduler
1. Error Callback
Error Codes

Pipeline Class

namespace mini_image_pipe {

class Pipeline {
public:
    explicit Pipeline(const PipelineConfig& config = PipelineConfig());
    ~Pipeline();

    // Add operator to pipeline, returns node ID
    int addOperator(const std::string& name, OperatorPtr op);

    // Connect operators: output of 'from' feeds into 'to'
    bool connect(int from, int to);

    // Set input source for a node
    void setInput(int nodeId, void* data, int width, int height, int channels);

    // Get output from a node
    void* getOutput(int nodeId);

    // Execute pipeline
    cudaError_t execute();

    // Execute batch of frames
    cudaError_t executeBatch(const std::vector<void*>& inputs, 
                             std::vector<void*>& outputs,
                             int width, int height, int channels);

    // Reset pipeline state
    void reset();

    // Access internal components
    TaskGraph& getTaskGraph();
    DAGScheduler& getScheduler();
};

}

Methods

Method	Description
`addOperator`	Registers an operator, returns unique node ID
`connect`	Creates dependency edge between nodes
`setInput`	Sets input data for source nodes
`getOutput`	Retrieves output buffer from any node
`execute`	Runs the pipeline once
`executeBatch`	Processes multiple frames efficiently
`reset`	Clears execution state, keeps graph structure

PipelineConfig

struct PipelineConfig {
    int numStreams = 4;                        // Number of CUDA streams
    size_t pinnedPoolSize = 64 * 1024 * 1024;  // 64MB pinned memory pool
    size_t devicePoolSize = 256 * 1024 * 1024; // 256MB device memory pool
    bool enableProfiling = false;              // Enable CUDA profiling
    int maxBatchSize = 8;                      // Maximum frames in batch
};

Operators

GaussianBlurOperator

class GaussianBlurOperator : public IOperator {
public:
    explicit GaussianBlurOperator(GaussianKernelSize size, float sigma = 0.0f);
    
    void setKernelSize(GaussianKernelSize size);
    void setSigma(float sigma);
    
    GaussianKernelSize getKernelSize() const;
    float getSigma() const;
};

enum class GaussianKernelSize { 
    KERNEL_3x3 = 3, 
    KERNEL_5x5 = 5, 
    KERNEL_7x7 = 7 
};

Parameter	Description
`size`	Kernel size (3×3, 5×5, or 7×7)
`sigma`	Gaussian sigma (0 = auto-calculate from kernel size)

SobelOperator

class SobelOperator : public IOperator {
public:
    SobelOperator();
    // Output: single-channel gradient magnitude
};

Applies 3×3 Sobel kernels to compute gradient magnitude.

ResizeOperator

class ResizeOperator : public IOperator {
public:
    ResizeOperator(int targetWidth, int targetHeight, 
                   InterpolationMode mode = InterpolationMode::BILINEAR);
    
    void setTargetSize(int width, int height);
    void setInterpolationMode(InterpolationMode mode);
    
    int getTargetWidth() const;
    int getTargetHeight() const;
};

enum class InterpolationMode { NEAREST, BILINEAR };

Parameter	Description
`targetWidth`	Output width in pixels
`targetHeight`	Output height in pixels
`mode`	Interpolation mode (nearest or bilinear)

ColorConvertOperator

class ColorConvertOperator : public IOperator {
public:
    explicit ColorConvertOperator(ColorConversionType type);
};

enum class ColorConversionType { 
    RGB_TO_GRAY, 
    BGR_TO_RGB, 
    RGBA_TO_RGB, 
    GRAY_TO_RGB 
};

Type	Input	Output
`RGB_TO_GRAY`	3-channel RGB	1-channel grayscale
`BGR_TO_RGB`	3-channel BGR	3-channel RGB
`RGBA_TO_RGB`	4-channel RGBA	3-channel RGB
`GRAY_TO_RGB`	1-channel grayscale	3-channel RGB

MemoryManager

class MemoryManager {
public:
    static MemoryManager& getInstance();

    // Pinned host memory
    void* allocatePinned(size_t size);
    void freePinned(void* ptr);

    // Device memory
    void* allocateDevice(size_t size);
    void freeDevice(void* ptr);

    // Asynchronous transfers
    cudaError_t copyToDeviceAsync(void* dst, const void* src, 
                                  size_t size, cudaStream_t stream);
    cudaError_t copyToHostAsync(void* dst, const void* src, 
                                size_t size, cudaStream_t stream);

    // Cleanup
    void shutdown();
};

Thread Safety

MemoryManager is thread-safe. All methods can be called concurrently from multiple threads.

TaskGraph

class TaskGraph {
public:
    // Add node to graph
    int addNode(const std::string& name, OperatorPtr op);
    
    // Add edge between nodes
    bool addEdge(int from, int to);
    
    // Get topological sort (execution order)
    std::vector<int> topologicalSort() const;
    
    // Check for cycles
    bool detectCycle() const;
    
    // Get all tasks
    const std::vector<Task>& getTasks() const;
};

DAGScheduler

class DAGScheduler {
public:
    void setNumStreams(int numStreams);
    void setErrorCallback(std::function<void(int, cudaError_t)> callback);
    
    cudaError_t execute(TaskGraph& graph, 
                        const std::vector<int>& executionOrder);
};

Error Callback

scheduler.setErrorCallback([](int taskId, cudaError_t err) {
    // taskId: ID of the failed task
    // err: CUDA error code
});

Error Codes

All CUDA operations return cudaError_t:

Code	Description
`cudaSuccess`	Operation completed successfully
`cudaErrorMemoryAllocation`	Memory allocation failed
`cudaErrorInvalidValue`	Invalid parameter passed
`cudaErrorInvalidDevicePointer`	Invalid device pointer

Check errors with:

cudaError_t err = pipeline.execute();
if (err != cudaSuccess) {
    std::cerr << cudaGetErrorString(err) << std::endl;
}