
srcnativelibs.Include.OpenCV.opencv2.gpu.device.detail.reduce.hpp Maven / Gradle / Ivy
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_REDUCE_DETAIL_HPP__
#define __OPENCV_GPU_REDUCE_DETAIL_HPP__
#include
#include "../warp.hpp"
#include "../warp_shuffle.hpp"
namespace cv { namespace gpu { namespace device
{
namespace reduce_detail
{
template struct GetType;
template struct GetType
{
typedef T type;
};
template struct GetType
{
typedef T type;
};
template struct GetType
{
typedef T type;
};
template
struct For
{
template
static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
{
thrust::get(smem)[tid] = thrust::get(val);
For::loadToSmem(smem, val, tid);
}
template
static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
{
thrust::get(val) = thrust::get(smem)[tid];
For::loadFromSmem(smem, val, tid);
}
template
static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
{
typename GetType::type>::type reg = thrust::get(smem)[tid + delta];
thrust::get(smem)[tid] = thrust::get(val) = thrust::get(op)(thrust::get(val), reg);
For::merge(smem, val, tid, delta, op);
}
template
static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
{
typename GetType::type>::type reg = shfl_down(thrust::get(val), delta, width);
thrust::get(val) = thrust::get(op)(thrust::get(val), reg);
For::mergeShfl(val, delta, width, op);
}
};
template
struct For
{
template
static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
{
}
template
static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
{
}
template
static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
{
}
template
static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
{
}
};
template
__device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
{
smem[tid] = val;
}
template
__device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
{
val = smem[tid];
}
template
__device__ __forceinline__ void loadToSmem(const thrust::tuple& smem,
const thrust::tuple& val,
unsigned int tid)
{
For<0, thrust::tuple_size >::value>::loadToSmem(smem, val, tid);
}
template
__device__ __forceinline__ void loadFromSmem(const thrust::tuple& smem,
const thrust::tuple& val,
unsigned int tid)
{
For<0, thrust::tuple_size >::value>::loadFromSmem(smem, val, tid);
}
template
__device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
{
T reg = smem[tid + delta];
smem[tid] = val = op(val, reg);
}
template
__device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
{
T reg = shfl_down(val, delta, width);
val = op(val, reg);
}
template
__device__ __forceinline__ void merge(const thrust::tuple& smem,
const thrust::tuple& val,
unsigned int tid,
unsigned int delta,
const thrust::tuple& op)
{
For<0, thrust::tuple_size >::value>::merge(smem, val, tid, delta, op);
}
template
__device__ __forceinline__ void mergeShfl(const thrust::tuple& val,
unsigned int delta,
unsigned int width,
const thrust::tuple& op)
{
For<0, thrust::tuple_size >::value>::mergeShfl(val, delta, width, op);
}
template struct Generic
{
template
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
{
loadToSmem(smem, val, tid);
if (N >= 32)
__syncthreads();
if (N >= 2048)
{
if (tid < 1024)
merge(smem, val, tid, 1024, op);
__syncthreads();
}
if (N >= 1024)
{
if (tid < 512)
merge(smem, val, tid, 512, op);
__syncthreads();
}
if (N >= 512)
{
if (tid < 256)
merge(smem, val, tid, 256, op);
__syncthreads();
}
if (N >= 256)
{
if (tid < 128)
merge(smem, val, tid, 128, op);
__syncthreads();
}
if (N >= 128)
{
if (tid < 64)
merge(smem, val, tid, 64, op);
__syncthreads();
}
if (N >= 64)
{
if (tid < 32)
merge(smem, val, tid, 32, op);
}
if (tid < 16)
{
merge(smem, val, tid, 16, op);
merge(smem, val, tid, 8, op);
merge(smem, val, tid, 4, op);
merge(smem, val, tid, 2, op);
merge(smem, val, tid, 1, op);
}
}
};
template
struct Unroll
{
static __device__ void loopShfl(Reference val, Op op, unsigned int N)
{
mergeShfl(val, I, N, op);
Unroll::loopShfl(val, op, N);
}
static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
{
merge(smem, val, tid, I, op);
Unroll::loop(smem, val, tid, op);
}
};
template
struct Unroll<0, Pointer, Reference, Op>
{
static __device__ void loopShfl(Reference, Op, unsigned int)
{
}
static __device__ void loop(Pointer, Reference, unsigned int, Op)
{
}
};
template struct WarpOptimized
{
template
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
{
#if __CUDA_ARCH__ >= 300
(void) smem;
(void) tid;
Unroll::loopShfl(val, op, N);
#else
loadToSmem(smem, val, tid);
if (tid < N / 2)
Unroll::loop(smem, val, tid, op);
#endif
}
};
template struct GenericOptimized32
{
enum { M = N / 32 };
template
static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
{
const unsigned int laneId = Warp::laneId();
#if __CUDA_ARCH__ >= 300
Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
if (laneId == 0)
loadToSmem(smem, val, tid / 32);
#else
loadToSmem(smem, val, tid);
if (laneId < 16)
Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
__syncthreads();
if (laneId == 0)
loadToSmem(smem, val, tid / 32);
#endif
__syncthreads();
loadFromSmem(smem, val, tid);
if (tid < 32)
{
#if __CUDA_ARCH__ >= 300
Unroll::loopShfl(val, op, M);
#else
Unroll::loop(smem, val, tid, op);
#endif
}
}
};
template struct StaticIf;
template struct StaticIf
{
typedef T1 type;
};
template struct StaticIf
{
typedef T2 type;
};
template struct IsPowerOf2
{
enum { value = ((N != 0) && !(N & (N - 1))) };
};
template struct Dispatcher
{
typedef typename StaticIf<
(N <= 32) && IsPowerOf2::value,
WarpOptimized,
typename StaticIf<
(N <= 1024) && IsPowerOf2::value,
GenericOptimized32,
Generic
>::type
>::type reductor;
};
}
}}}
#endif // __OPENCV_GPU_REDUCE_DETAIL_HPP__
© 2015 - 2025 Weber Informatics LLC | Privacy Policy