parallelize a software rasterization algorithm using opencl – Graphics and GPU programming

I wrote a small software rasterizer using OpenCL and would like to optimize it and parallelize it further, currently I scan the whole screen and see if the triangle overlaps the pixels.

I would like to parallelize the loop and do it more efficiently. For example in my idea is to process only the bounding box pixels..?

__kernel void sendImageToPBO(__global uchar4* dst_buffer, __global float* vbo, int vbosize,
    __global int* ibo, int ibosize)

{
    size_t blockIdx = get_group_id(0);
    size_t blockIdy = get_group_id(1);
    size_t blockDimX = get_local_size(0);
    size_t blockDimY = get_local_size(1);
    size_t threadIdX = get_local_id(0);
    size_t threadIdY = get_local_id(1);

    float3 c0 = { 1, 0, 0 };
    float3 c1 = { 0, 1, 0 };
    float3 c2 = { 0, 0, 1 };

    int x = get_global_id(0);
    int y= get_global_id(1);
 


    int imageWidth = 800;
    int imageHeight = 800;



    if (x < vbosize && y < vbosize)
    {

        for (int i = 0; i < vbosize; i += 9)
        {
            float3 v1 = (float3)(vbo[i], vbo[i + 1], vbo[i + 2]);
            float3 v0 = (float3)(vbo[i + 3], vbo[i + 4], vbo[i + 5]);
            float3 v2 = (float3)(vbo[i + 6], vbo[i + 7], vbo[i + 8]);

            float xmin = fmin(v0.x, fmin(v1.x, v2.x));
            float ymin = fmin(v0.y, fmin(v1.y, v2.y));
            float xmax = fmax(v0.x, fmin(v1.x, v2.x));
            float ymax = fmax(v0.y, fmin(v1.y, v2.y));


            // be careful xmin/xmax/ymin/ymax can be negative. Don't cast to unsigned int
            unsigned int x0 = max(0, (int)(floor(xmin)));
            unsigned int x1 = min((int)(imageWidth)-1, (int)(floor(xmax)));
            unsigned int y0 = max(0, (int)(floor(ymin)));
            unsigned int y1 = min((int)(imageHeight)-1, (int)(floor(ymax)));
            
            float3 p = { x + 0.5f, y + 0.5f, 0 };

            float w0 = edgeFunction(v1, v2, p);
            float w1 = edgeFunction(v2, v0, p);
            float w2 = edgeFunction(v0, v1, p);
            if (w0 >= 0 && w1 >= 0 && w2 >= 0) {

                float area = edgeFunction(v0, v1, v2);

                float r = w0 * c0.x + w1 * c1.x + w2 * c2.x;
                float g = w0 * c0.y + w1 * c1.y + w2 * c2.y;
                float b = w0 * c0.z + w1 * c1.z + w2 * c2.z;

                w0 /= area;
                w1 /= area;
                w2 /= area;
                float z = 1 / (w0 * v0.z + w1 * v1.z + w2 * v2.z);
                r *= z, g *= z, b *= z;


                dst_buffer[y * get_global_size(0) + x] = (uchar4)(r * 255, g * 255, b * 255, 255);
            }
            
        }
    }

Game programming is the process of converting dead images into living images.

Comments are closed.