
graphics.scenery.volumes.VolumeRenderer.cl Maven / Gradle / Ivy
/*
volume and iso surface rendering
volume rendering adapted from the Nvidia sdk sample
http://developer.download.nvidia.com/compute/cuda/4_2/rel/sdk/website/OpenCL/html/samples.html
Author: Martin Weigert ([email protected])
Loic Royer ([email protected])
*/
// Loop unrolling length:
#define LOOPUNROLL 16
// Typedefs:
//typedef unsigned int uint;
//typedef unsigned char uchar;
// random number generator for dithering
inline
float random(uint x, uint y)
{
uint a = 4421 +(1+x)*(1+y) +x +y;
for(uint i=0; i < 10; i++)
{
a = ((uint)1664525 * a + (uint)1013904223) % (uint)79197919;
}
float rnd = (a*1.0f)/(79197919.f);
return rnd-0.5f;
}
// intersect ray with a box
// http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm
inline
int intersectBox(float4 r_o, float4 r_d, float4 boxmin, float4 boxmax, float *tnear, float *tfar)
{
// compute intersection of ray with all six bbox planes
float4 invR = (float4)(1.0f,1.0f,1.0f,1.0f) / r_d;
float4 tbot = invR * (boxmin - r_o);
float4 ttop = invR * (boxmax - r_o);
// re-order intersections to find smallest and largest on each axis
float4 tmin = min(ttop, tbot);
float4 tmax = max(ttop, tbot);
// find the largest tmin and the smallest tmax
float largest_tmin = max(max(tmin.x, tmin.y), max(tmin.x, tmin.z));
float smallest_tmax = min(min(tmax.x, tmax.y), min(tmax.x, tmax.z));
*tnear = largest_tmin;
*tfar = smallest_tmax;
return smallest_tmax > largest_tmin;
}
// convert float4 into uint:
inline
uint rgbaFloatToInt(float4 rgba)
{
rgba = clamp(rgba,(float4)(0.f,0.f,0.f,0.f),(float4)(1.f,1.f,1.f,1.f));
return ((uint)(rgba.w*255)<<24) | ((uint)(rgba.z*255)<<16) | ((uint)(rgba.y*255)<<8) | (uint)(rgba.x*255);
}
// convert float4 into uint and take the max with an existing RGBA value in uint form:
inline
uint rgbaFloatToIntAndMax(uint existing, float4 rgba)
{
rgba = clamp(rgba,(float4)(0.f,0.f,0.f,0.f),(float4)(1.f,1.f,1.f,1.f));
const uint nr = (uint)(rgba.x*255);
const uint ng = (uint)(rgba.y*255);
const uint nb = (uint)(rgba.z*255);
const uint na = (uint)(rgba.w*255);
const uint er = existing&0xFF;
const uint eg = (existing>>8)&0xFF;
const uint eb = (existing>>16)&0xFF;
const uint ea = (existing>>24)&0xFF;
const uint r = max(nr,er);
const uint g = max(ng,eg);
const uint b = max(nb,eb);
const uint a = max(na,ea);
return a<<24|b<<16|g<<8|r ;
}
// multiply matrix with vector
float4 mult(__constant float* M, float4 v){
float4 res;
res.x = dot(v, (float4)(M[0],M[1],M[2],M[3]));
res.y = dot(v, (float4)(M[4],M[5],M[6],M[7]));
res.z = dot(v, (float4)(M[8],M[9],M[10],M[11]));
res.w = dot(v, (float4)(M[12],M[13],M[14],M[15]));
return res;
}
__kernel void
dummy_render(
__global float4* d_output,
const uint imageW,
const uint imageH,
const float brightness,
const float trangemin,
const float trangemax,
const float gamma,
const float alpha_blending,
const int maxsteps,
const float dithering,
const float phase,
const int clear,
const float boxMin_x,
const float boxMax_x,
const float boxMin_y,
const float boxMax_y,
const float boxMin_z,
const float boxMax_z,
__constant float* invP,
__constant float* invM,
__read_only image3d_t volume)
{
const uint x = get_global_id(0);
const uint y = get_global_id(1);
d_output[x + y*imageW] = (float4)(0.0f, 0.0f, 1.0f, 1.0f);
}
// Render function,
// performs max projection and then uses the transfert function to obtain a color per pixel:
__kernel void
maxproj_render(
__global float4 *d_output,
const uint imageW,
const uint imageH,
const float brightness,
const float trangemin,
const float trangemax,
const float gamma,
const float alpha_blending,
const int maxsteps,
const float dithering,
const float phase,
const int clear,
const float boxMin_x,
const float boxMax_x,
const float boxMin_y,
const float boxMax_y,
const float boxMin_z,
const float boxMax_z,
__read_only image2d_t transferColor4,
__constant float* invP,
__constant float* invM,
__read_only image3d_t volume)
{
// samplers:
const sampler_t volumeSampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR ;
const sampler_t transferSampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR ;
// convert range bounds to linear map:
const float ta = 1.f/(trangemax-trangemin);
const float tb = trangemin/(trangemin-trangemax);
// box bounds using the clipping box
const float4 boxMin = (float4)(boxMin_x,boxMin_y,boxMin_z,1.f);
const float4 boxMax = (float4)(boxMax_x,boxMax_y,boxMax_z,1.f);
// thread int coordinates:
const uint x = get_global_id(0);
const uint y = get_global_id(1);
if ((x >= imageW) || (y >= imageH)) return;
// thread float coordinates:
const float u = (x / (float) imageW)*2.0f-1.0f;
const float v = (y / (float) imageH)*2.0f-1.0f;
// front and back:
const float4 front = (float4)(u,v,-1.f,1.f);
const float4 back = (float4)(u,v,1.f,1.f);
// calculate eye ray in world space
float4 orig0, orig;
float4 direc0, direc;
orig0.x = dot(front, ((float4)(invP[0],invP[1],invP[2],invP[3])));
orig0.y = dot(front, ((float4)(invP[4],invP[5],invP[6],invP[7])));
orig0.z = dot(front, ((float4)(invP[8],invP[9],invP[10],invP[11])));
orig0.w = dot(front, ((float4)(invP[12],invP[13],invP[14],invP[15])));
orig0 *= 1.f/orig0.w;
orig.x = dot(orig0, ((float4)(invM[0],invM[1],invM[2],invM[3])));
orig.y = dot(orig0, ((float4)(invM[4],invM[5],invM[6],invM[7])));
orig.z = dot(orig0, ((float4)(invM[8],invM[9],invM[10],invM[11])));
orig.w = dot(orig0, ((float4)(invM[12],invM[13],invM[14],invM[15])));
orig *= 1.f/orig.w;
direc0.x = dot(back, ((float4)(invP[0],invP[1],invP[2],invP[3])));
direc0.y = dot(back, ((float4)(invP[4],invP[5],invP[6],invP[7])));
direc0.z = dot(back, ((float4)(invP[8],invP[9],invP[10],invP[11])));
direc0.w = dot(back, ((float4)(invP[12],invP[13],invP[14],invP[15])));
direc0 *= 1.f/direc0.w;
direc0 = normalize(direc0-orig0);
direc.x = dot(direc0, ((float4)(invM[0],invM[1],invM[2],invM[3])));
direc.y = dot(direc0, ((float4)(invM[4],invM[5],invM[6],invM[7])));
direc.z = dot(direc0, ((float4)(invM[8],invM[9],invM[10],invM[11])));
direc.w = 0.0f;
// printf("%f %f %f %f\n", invP[0], invP[5], invP[10], invP[15]);
//printf("orig: %f %f %f\n", orig.x, orig.y, orig.z);
//printf("dir: %f %f %f\n", direc.x, direc.y, direc.z);
// find intersection with box
float tnear, tfar;
const int hit = intersectBox(orig,direc, boxMin, boxMax, &tnear, &tfar);
if (!hit || tfar<=0)
{
// d_output[x+imageW*y] = (float4)(orig.x, orig.y, orig.z, 1.0f);
d_output[x+imageW*y] = (float4)(0.0f, 0.0f, 0.0f, 1.0f);
return;
}
// d_output[x+imageW*y] = (float4)(tnear, tfar, 0.0f, 1.0f);
// return;
// clamp to near plane:
if (tnear < 0.0f) tnear = 0.0f;
// compute step size:
const float tstep = fabs(tnear-tfar)/((maxsteps/LOOPUNROLL)*LOOPUNROLL);
// apply phase:
orig += phase*tstep*direc;
// randomize origin point a bit:
const uint entropy = (uint)( 6779514*fast_length(orig) + 6257327*fast_length(direc) );
orig += dithering*tstep*random(entropy+x,entropy+y)*direc;
// precompute vectors:
const float4 vecstep = 0.5f*tstep*direc;
float4 pos = orig*0.5f+0.5f + tnear*0.5f*direc;
// Loop unrolling setup:
const int unrolledmaxsteps = (maxsteps/LOOPUNROLL);
// raycasting loop:
float maxp = 0.0f;
float mappedVal;
if (alpha_blending<=0.f)
{
// No alpha blending:
for(int i=0; i= imageW) || (y >= imageH)) return;
// thread float coordinates:
const float u = (x / (float) imageW)*2.0f-1.0f;
const float v = (y / (float) imageH)*2.0f-1.0f;
// front and back:
const float4 front = (float4)(u,v,-1.f,1.f);
const float4 back = (float4)(u,v,1.f,1.f);
// calculate eye ray in world space
float4 orig0, orig;
float4 direc0, direc;
orig0.x = dot(front, ((float4)(invP[0],invP[1],invP[2],invP[3])));
orig0.y = dot(front, ((float4)(invP[4],invP[5],invP[6],invP[7])));
orig0.z = dot(front, ((float4)(invP[8],invP[9],invP[10],invP[11])));
orig0.w = dot(front, ((float4)(invP[12],invP[13],invP[14],invP[15])));
orig0 *= 1.f/orig0.w;
orig.x = dot(orig0, ((float4)(invM[0],invM[1],invM[2],invM[3])));
orig.y = dot(orig0, ((float4)(invM[4],invM[5],invM[6],invM[7])));
orig.z = dot(orig0, ((float4)(invM[8],invM[9],invM[10],invM[11])));
orig.w = dot(orig0, ((float4)(invM[12],invM[13],invM[14],invM[15])));
orig *= 1.f/orig.w;
direc0.x = dot(back, ((float4)(invP[0],invP[1],invP[2],invP[3])));
direc0.y = dot(back, ((float4)(invP[4],invP[5],invP[6],invP[7])));
direc0.z = dot(back, ((float4)(invP[8],invP[9],invP[10],invP[11])));
direc0.w = dot(back, ((float4)(invP[12],invP[13],invP[14],invP[15])));
direc0 *= 1.f/direc0.w;
direc0 = normalize(direc0-orig0);
direc.x = dot(direc0, ((float4)(invM[0],invM[1],invM[2],invM[3])));
direc.y = dot(direc0, ((float4)(invM[4],invM[5],invM[6],invM[7])));
direc.z = dot(direc0, ((float4)(invM[8],invM[9],invM[10],invM[11])));
direc.w = 0.0f;
// find intersection with box
float tnear, tfar;
const int hit = intersectBox(orig,direc, boxMin, boxMax, &tnear, &tfar);
if (!hit || tfar<=0)
{
d_output[x+imageW*y] = 0.f;
return;
}
// clamp to near plane:
if (tnear < 0.0f) tnear = 0.0f;
// compute step size:
const float tstep = fabs(tnear-tfar)/((maxsteps/LOOPUNROLL)*LOOPUNROLL);
// randomize origin point a bit:
const uint entropy = (uint)( 6779514*fast_length(orig) + 6257327*fast_length(direc) );
orig += dithering*tstep*random(entropy+x,entropy+y)*direc;
// precompute vectors:
const float4 vecstep = 0.5f*tstep*direc;
float4 pos = orig*0.5f+0.5f + tnear*0.5f*direc;
// Loop unrolling setup:
const int unrolledmaxsteps = (maxsteps/LOOPUNROLL);
// iso value:
float isoVal = mad(1.0f/ta,pow(0.5f,1.0f/gamma),-tb);
// starting value:
float newVal = 0; //read_imagef(volume, volumeSampler, pos).x;
// is iso surface value greater or lower:
bool isGreater = newVal>isoVal;
bool hitIso = false;
// first pass:
for(int i=0; iisoVal) != isGreater)
{
hitIso = true;
break;
}
pos+=vecstep;
}
if (hitIso) break;
}
//early termination if iso surface not hit:
if (!hitIso)
{
d_output[x+imageW*y] = 0.f;
return;
}
//second pass:
hitIso = false;
pos-=2*vecstep;
const float4 finevecstep = 3*vecstep/maxsteps;
for(int i=0; iisoVal) != isGreater)
{
hitIso = true;
break;
}
pos+=finevecstep;
}
if (hitIso) break;
}
/**/
// find the real intersection point
float oldVal = read_imagef(volume, volumeSampler, pos-finevecstep).x;
float lam = (newVal - isoVal)/(newVal-oldVal);
pos += lam*vecstep;
// getting the normals and do some nice phong shading
// build light vector:
float4 light = (float4)(-lightX,-lightY,-lightZ,0);
float c_diffuse = 0.2;
float c_specular = 0.6;
light = mult(invM,light);
light = fast_normalize(light);
// compute lateral step for normal calculation:
const float latx = 1.0f/(get_image_width(volume));
const float laty = 1.0f/(get_image_height(volume));
const float latz = 1.0f/(get_image_depth(volume));
// robust 2nd order normal estimation:
float4 normal;
normal.x = 2.f*read_imagef(volume,volumeSampler,pos+(float4)(latx,0,0,0)).x-
2.f*read_imagef(volume,volumeSampler,pos+(float4)(-latx,0,0,0)).x+
read_imagef(volume,volumeSampler,pos+(float4)(2.f*latx,0,0,0)).x-
read_imagef(volume,volumeSampler,pos+(float4)(-2.f*latx,0,0,0)).x;
normal.y = 2.f*read_imagef(volume,volumeSampler,pos+(float4)(0,laty,0,0)).x-
2.f*read_imagef(volume,volumeSampler,pos+(float4)(0,-laty,0,0)).x+
read_imagef(volume,volumeSampler,pos+(float4)(0,2.f*laty,0,0)).x-
read_imagef(volume,volumeSampler,pos+(float4)(0,-2.f*laty,0,0)).x;
normal.z = 2.f*read_imagef(volume,volumeSampler,pos+(float4)(0,0,latz,0)).x-
2.f*read_imagef(volume,volumeSampler,pos+(float4)(0,0,-latz,0)).x+
read_imagef(volume,volumeSampler,pos+(float4)(0,0,2.f*latz,0)).x-
read_imagef(volume,volumeSampler,pos+(float4)(0,0,-2.f*latz,0)).x;
normal.w = 0;
// flip normal if we are comming from values greater than isoVal...
normal = (1.f-2*isGreater)*fast_normalize(normal);
// Blinn-Phong specular reflection:
//float diffuse = fmax(0.f,dot(light,normal));
//float specular = native_powr(fmax(0.f,dot(fast_normalize(light+fast_normalize(direc)),fast_normalize(normal))),45);
// Phong specular reflection:
float diffuse = fmax(0.f,dot(light,normal));
float4 reflect = 2*dot(light,normal)*normal-light;
float specular = native_powr(fmax(0.f,dot(fast_normalize(reflect),fast_normalize(direc))),10);
// Mapping to transfert function range and gamma correction:
const float mappedVal = gamma;
// lookup in transfer function texture:
float4 color = read_imagef(transferColor4, transferSampler, (float2)(mappedVal,0.0f));
const float lighting = (c_diffuse*diffuse + (diffuse>0)*c_specular*specular);
// Apply lighting:
const float3 lightcolor = (float3)(1.0f,1.0f,1.0f);
color.x = mad(lightcolor.x,lighting,color.x);
color.y = mad(lightcolor.y,lighting,color.y);
color.z = mad(lightcolor.z,lighting,color.z);
color = brightness*color;
//d_output[x + y*imageW] = rgbaFloatToIntAndMax(0,color);
d_output[x + y*imageW] = rgbaFloatToIntAndMax(clear*d_output[x + y*imageW],color);
}
// clears a buffer
__kernel void
clearbuffer(__global uint *buffer,
uint imageW,
uint imageH)
{
// thread int coordinates:
const uint x = get_global_id(0);
const uint y = get_global_id(1);
// clears buffer:
if ((x < imageW) || (y < imageH))
buffer[x + y*imageW] = 0;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy