All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bytedeco.javacv.ImageTransformer.cl Maven / Gradle / Ivy

/*
 * Copyright (C) 2011-2012 Samuel Audet
 *
 * Licensed either under the Apache License, Version 2.0, or (at your option)
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation (subject to the "Classpath" exception),
 * either version 2, or any later version (collectively, the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *     http://www.gnu.org/licenses/
 *     http://www.gnu.org/software/classpath/license.html
 *
 * or as provided in the LICENSE.txt file that accompanied this code.
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
                          CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;

//const sampler_t linearSampler = CLK_NORMALIZED_COORDS_FALSE |
//                                CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;

inline float4 readLinear(read_only image2d_t img, float2 xy) {
    float2 xy00 = floor(xy);
    float dx = xy.x - xy00.x;
    float dy = xy.y - xy00.y;
    float4 rgba  = (1-dx)*(1-dy)*read_imagef(img, sampler, xy00);
           rgba +=    dx *(1-dy)*read_imagef(img, sampler, xy00 + (float2)(1, 0));
           rgba += (1-dx)*   dy *read_imagef(img, sampler, xy00 + (float2)(0, 1));
           rgba +=    dx *   dy *read_imagef(img, sampler, xy00 + (float2)(1, 1));
    return rgba;
}

#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable

inline void atomicAddFloat(global float* address, float val) {
    global int* address_as_int = (global int*)address;
    while (val != 0.0f) {
        val += as_float(atom_xchg(address_as_int, as_int(0.0f)));
        val  = as_float(atom_xchg(address_as_int, as_int(val)));
    }
}

// Bit Twiddling Hacks
// http://graphics.stanford.edu/~seander/bithacks.html
inline int ceilPow2(int v) {
    v--;
    v |= v >> 1;
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;
    return ++v;
}

inline int reduceSumInt(float value, int i, int size, local void* scratch) {
    local float *scratchi = (local float*)scratch;
    scratchi[i] = value;
    barrier(CLK_LOCAL_MEM_FENCE);
    for (int offset = ceilPow2(size)/2; offset > 0; offset >>= 1) {
        if (i < offset && i + offset < size) {
            scratchi[i] += scratchi[i + offset];
        }
        barrier(CLK_LOCAL_MEM_FENCE);
    }
    return scratchi[0];
}

inline float reduceSumFloat(float value, int i, int size, local void* scratch) {
    local float *scratchf = (local float*)scratch;
    scratchf[i] = value;
    barrier(CLK_LOCAL_MEM_FENCE);
    for (int offset = ceilPow2(size)/2; offset > 0; offset >>= 1) {
        if (i < offset && i + offset < size) {
            scratchf[i] += scratchf[i + offset];
        }
        barrier(CLK_LOCAL_MEM_FENCE);
    }
    return scratchf[0];
}

struct InputData {
    int roiY, roiHeight;
    float zeroThreshold, outlierThreshold;
};

struct OutputData {
    int groupsFinished, dstCount, dstCountZero, dstCountOutlier;
    float srcDstDot[DOT_SIZE], dstDstDot[DOT_SIZE][DOT_SIZE];
};

inline void multiWarpColorTransform(read_only image2d_t srcImg, read_only image2d_t srcImg2, read_only image2d_t subImg,
        read_only image2d_t dotImg, write_only image2d_t transImg, write_only image2d_t dstImg, read_only image2d_t maskImg,
        int width, int height, constant float _H[][9], constant float _H2[][9], constant float/*4*/ _X[][16/*4*/], int size,
        constant struct InputData *inputData, global struct OutputData *outputData,
        bool haveSubImg, bool haveDotImg, bool haveTransImg, bool haveDstImg, bool haveMaskImg) {
    const int x = get_global_id(0), gx = get_group_id(0), lx = get_local_id(0), lsx = get_local_size(0);
    const int y = get_global_id(1), gy = get_group_id(1), ly = get_local_id(1), lsy = get_local_size(1);
    const int z = get_global_id(2), gz = get_group_id(2), lz = get_local_id(2), lsz = get_local_size(2);

    int dstCount = 0, dstCountZero = 0, dstCountOutlier = 0;
    float srcDstDot = 0, dstDstDot = 0;
    local float scratch[DOT_SIZE + 1][DOT_SIZE | 1][3];

    local float H[DOT_SIZE][9], H2[DOT_SIZE][9], X[DOT_SIZE][12];
    if (lx < size) {
        for (int j = 0; j < 9; j++) {
            H[lx][j] = _H[lx][j];
            if (_H2) H2[lx][j] = _H2[lx][j];
        }
        for (int j = 0; j < 12; j++) {
            if (_X) X[lx][j] = _X[lx][j];
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);

    for (int y = inputData->roiY; y < inputData->roiY + inputData->roiHeight; y++) {
        const int2 xy = (int2)(x, y);
        float4 dotRGB = 0, dstRGB = 0;

        if (x >= width) {
            goto skipPixel;
        }

        if (haveMaskImg) {
            if (read_imagei(maskImg, sampler, xy).x == 0) {
                goto skipPixel;
            } else {
                dstCount++;
            }
        }

        if (haveDotImg) {
            float zeroThreshold2    = inputData->zeroThreshold    * inputData->zeroThreshold;
            float outlierThreshold2 = inputData->outlierThreshold * inputData->outlierThreshold;

            dotRGB = read_imagef(dotImg, sampler, xy);
            float norm2 = dot(dotRGB.xyz, dotRGB.xyz);
            if (norm2 < zeroThreshold2) {
                dstCountZero++;
                goto skipPixel;
            } else if (outlierThreshold2 > 0 && norm2 > outlierThreshold2) {
                dstCountOutlier++;
                goto skipPixel;
            }
        }

        float u = H[lz][0]*x + H[lz][1]*y + H[lz][2];
        float v = H[lz][3]*x + H[lz][4]*y + H[lz][5];
        float w = H[lz][6]*x + H[lz][7]*y + H[lz][8];
        float inv_w = native_recip(w);
        float2 uv = inv_w*(float2)(u, v);// + 0.5f;
//        float4 srcRGB = read_imagef(srcImg, linearSampler, uv);
        float4 srcRGB = readLinear(srcImg, uv);
        if (_X) {
//            srcRGB.w = 1;
//            dstRGB = (float4)(dot(X[lz][0], srcRGB), dot(X[lz][1], srcRGB),
//                              dot(X[lz][2], srcRGB), dot(X[lz][3], srcRGB));
            dstRGB.x = X[lz][0]*srcRGB.x + X[lz][1]*srcRGB.y + X[lz][2] *srcRGB.z + X[lz][3];
            dstRGB.y = X[lz][4]*srcRGB.x + X[lz][5]*srcRGB.y + X[lz][6] *srcRGB.z + X[lz][7];
            dstRGB.z = X[lz][8]*srcRGB.x + X[lz][9]*srcRGB.y + X[lz][10]*srcRGB.z + X[lz][11];
        } else {
            dstRGB = srcRGB;
        }
        if (_H2) {
            float u2 = H2[lz][0]*x + H2[lz][1]*y + H2[lz][2];
            float v2 = H2[lz][3]*x + H2[lz][4]*y + H2[lz][5];
            float w2 = H2[lz][6]*x + H2[lz][7]*y + H2[lz][8];
            float inv_w2 = native_recip(w2);
            float2 uv2 = inv_w2*(float2)(u2, v2);// + 0.5f;
//            dstRGB *= read_imagef(srcImg2, linearSampler, uv2);
            dstRGB *= readLinear(srcImg2, uv2);
        }
        dstRGB.w = 1;
        if (haveTransImg) {
            write_imagef(transImg, xy, dstRGB);
        }
        if (haveSubImg) {
            dstRGB.xyz -= read_imagef(subImg, sampler, xy).xyz;
        }
        if (haveDstImg) {
            write_imagef(dstImg, xy, dstRGB);
        }
        if (haveDotImg) {
            srcDstDot += dot(dotRGB.xyz, dstRGB.xyz);
        }

        if (size == 1) {
            float zeroThreshold2    = inputData->zeroThreshold    * inputData->zeroThreshold;
            float outlierThreshold2 = inputData->outlierThreshold * inputData->outlierThreshold;

            float norm2 = dot(dstRGB.xyz, dstRGB.xyz);
            if (norm2 < zeroThreshold2) {
                dstCountZero++;
            } else if (outlierThreshold2 > 0 && norm2 > outlierThreshold2) {
                dstCountOutlier++;
            } else {
                dstDstDot += norm2;
            }
        }
skipPixel:
        if (size > 1) {
            scratch[lz][lx][0] = dstRGB.x;
            scratch[lz][lx][1] = dstRGB.y;
            scratch[lz][lx][2] = dstRGB.z;
            barrier(CLK_LOCAL_MEM_FENCE);
#pragma unroll
            for (int i = 0; i < size; i++) {
                dstDstDot += scratch[lz][i][0]*scratch[lx][i][0] +
                             scratch[lz][i][1]*scratch[lx][i][1] +
                             scratch[lz][i][2]*scratch[lx][i][2];
            }
            barrier(CLK_LOCAL_MEM_FENCE);
        }
    }

    if (lz == 0) {
        dstCount = reduceSumInt(dstCount, lx, lsx, scratch);
        dstCountZero = reduceSumInt(dstCountZero, lx, lsx, scratch);
        dstCountOutlier = reduceSumInt(dstCountOutlier, lx, lsx, scratch);
        if (lx == 0) {
            outputData[gx].dstCount = dstCount;
            outputData[gx].dstCountZero = dstCountZero;
            outputData[gx].dstCountOutlier = dstCountOutlier;
        }
    }
    if (size == 1) {
        if (haveDotImg) srcDstDot = reduceSumFloat(srcDstDot, lx, lsx, scratch);
        dstDstDot = reduceSumFloat(dstDstDot, lx, lsx, scratch);
        if (lx == 0) {
            if (haveDotImg) outputData[gx].srcDstDot[0] = srcDstDot;
            outputData[gx].dstDstDot[0][0] = dstDstDot;
        }
    } else {
        if (haveDotImg) {
            srcDstDot = reduceSumFloat(srcDstDot, lx, lsx, scratch[lz]);
            if (lx == 0) {
                outputData[gx].srcDstDot[lz] = srcDstDot;
            }
        }
        outputData[gx].dstDstDot[lz][lx] = dstDstDot;
    }
}

kernel void reduceOutputData(global struct OutputData *outputData) {
    const int x = get_global_id(0), gx = get_group_id(0), lx = get_local_id(0), lsx = get_local_size(0);
    local int scratch[256];

    int dstCount = reduceSumInt(outputData[x].dstCount, lx, lsx, scratch);
    int dstCountZero = reduceSumInt(outputData[x].dstCountZero, lx, lsx, scratch);
    int dstCountOutlier = reduceSumInt(outputData[x].dstCountOutlier, lx, lsx, scratch);
    if (lx == 0) {
        outputData[0].dstCount = dstCount;
        outputData[0].dstCountZero = dstCountZero;
        outputData[0].dstCountOutlier = dstCountOutlier;
    }
    for (int i = 0; i < DOT_SIZE; i++) {
        float srcDstDot = reduceSumFloat(outputData[x].srcDstDot[i], lx, lsx, scratch);
        float dstDstDot = reduceSumFloat(outputData[x].dstDstDot[i][i], lx, lsx, scratch);
        if (lx == 0) {
            outputData[0].srcDstDot[i] = srcDstDot;
            outputData[0].dstDstDot[i][i] = dstDstDot;
        }
        for (int j = i+1; j < DOT_SIZE; j++) {
            float dstDstDot = reduceSumFloat(outputData[x].dstDstDot[i][j], lx, lsx, scratch);
            if (lx == 0) {
                outputData[0].dstDstDot[i][j] = dstDstDot;
                outputData[0].dstDstDot[j][i] = dstDstDot;
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy