
gust.linalg.cuda.map_kernels_double.ptx Maven / Gradle / Ivy
The newest version!
//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Thu Sep 5 10:08:11 2013 (1378400891)
// Cuda compilation tools, release 5.5, V5.5.0
//
.version 3.2
.target sm_30
.address_size 64
.file 1 "/Users/dlwh/src/gust/src/main/resources/gust/linalg/cuda/map_kernels_float.cu", 1395631680, 12755
.file 2 "/Developer/NVIDIA/CUDA-5.5/bin//../include/math_functions_dbl_ptx3.h", 1378419394, 118830
.file 3 "/Developer/NVIDIA/CUDA-5.5/bin//../include/device_functions.h", 1378419394, 185228
.file 4 "/Developer/NVIDIA/CUDA-5.5/bin//../include/sm_30_intrinsics.h", 1378419394, 6616
.extern .func (.param .b32 func_retval0) vprintf
(
.param .b64 vprintf_param_0,
.param .b64 vprintf_param_1
)
;
.func (.param .b64 func_retval0) __internal_trig_reduction_slowpathd
(
.param .b64 __internal_trig_reduction_slowpathd_param_0,
.param .b64 __internal_trig_reduction_slowpathd_param_1
)
;
.func (.param .b64 func_retval0) __internal_accurate_pow
(
.param .b64 __internal_accurate_pow_param_0,
.param .b64 __internal_accurate_pow_param_1
)
;
.global .align 1 .b8 $str[5] = {97, 97, 97, 10, 0};
.const .align 8 .b8 __cudart_i2opi_d[144] = {8, 93, 141, 31, 177, 95, 251, 107, 234, 146, 82, 138, 247, 57, 7, 61, 123, 241, 229, 235, 199, 186, 39, 117, 45, 234, 95, 158, 102, 63, 70, 79, 183, 9, 203, 39, 207, 126, 54, 109, 31, 109, 10, 90, 139, 17, 47, 239, 15, 152, 5, 222, 255, 151, 248, 31, 59, 40, 249, 189, 139, 95, 132, 156, 244, 57, 83, 131, 57, 214, 145, 57, 65, 126, 95, 180, 38, 112, 156, 233, 132, 68, 187, 46, 245, 53, 130, 232, 62, 167, 41, 177, 28, 235, 29, 254, 28, 146, 209, 9, 234, 46, 73, 6, 224, 210, 77, 66, 58, 110, 36, 183, 97, 197, 187, 222, 171, 99, 81, 254, 65, 144, 67, 60, 153, 149, 98, 219, 192, 221, 52, 245, 209, 87, 39, 252, 41, 21, 68, 78, 110, 131, 249, 162};
.const .align 8 .b8 __cudart_sin_cos_coeffs[128] = {186, 94, 120, 249, 101, 219, 229, 61, 70, 210, 176, 44, 241, 229, 90, 190, 146, 227, 172, 105, 227, 29, 199, 62, 161, 98, 219, 25, 160, 1, 42, 191, 24, 8, 17, 17, 17, 17, 129, 63, 84, 85, 85, 85, 85, 85, 197, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 129, 253, 32, 131, 255, 168, 189, 40, 133, 239, 193, 167, 238, 33, 62, 217, 230, 6, 142, 79, 126, 146, 190, 233, 188, 221, 25, 160, 1, 250, 62, 71, 93, 193, 22, 108, 193, 86, 191, 81, 85, 85, 85, 85, 85, 165, 63, 0, 0, 0, 0, 0, 0, 224, 191, 0, 0, 0, 0, 0, 0, 240, 63};
.global .align 1 .b8 $str1[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};
.visible .entry map_acos_double(
.param .u32 map_acos_double_param_0,
.param .u32 map_acos_double_param_1,
.param .u64 map_acos_double_param_2,
.param .u32 map_acos_double_param_3,
.param .u64 map_acos_double_param_4,
.param .u32 map_acos_double_param_5
)
{
.reg .pred %p<10>;
.reg .f32 %f<5>;
.reg .s32 %r<35>;
.reg .s64 %rd<9>;
.reg .f64 %fd<94>;
ld.param.u32 %r10, [map_acos_double_param_0];
ld.param.u32 %r11, [map_acos_double_param_1];
ld.param.u64 %rd1, [map_acos_double_param_2];
ld.param.u32 %r12, [map_acos_double_param_3];
ld.param.u64 %rd2, [map_acos_double_param_4];
ld.param.u32 %r13, [map_acos_double_param_5];
.loc 1 17 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r33, %r14, %r15, %r16;
.loc 1 17 1
setp.ge.s32 %p1, %r33, %r11;
@%p1 bra BB0_16;
.loc 1 17 1
mov.u32 %r17, %ntid.y;
.loc 1 17 22
mov.u32 %r18, %nctaid.y;
mul.lo.s32 %r2, %r18, %r17;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB0_2:
.loc 1 17 1
mov.u32 %r19, %ctaid.y;
mov.u32 %r21, %tid.y;
mad.lo.s32 %r34, %r17, %r19, %r21;
.loc 1 17 1
setp.ge.s32 %p2, %r34, %r10;
@%p2 bra BB0_15;
BB0_3:
.loc 1 17 1
mad.lo.s32 %r26, %r33, %r13, %r34;
mul.wide.s32 %rd4, %r26, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 1 17 1
ld.global.f64 %fd15, [%rd5];
.loc 2 293 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r6}, %fd15;
}
abs.f64 %fd1, %fd15;
{
.reg .b32 %temp;
mov.b64 {%temp, %r27}, %fd1;
}
setp.lt.s32 %p3, %r27, 1071801958;
@%p3 bra BB0_11;
mov.f64 %fd18, 0d3FF0000000000000;
.loc 2 293 10
sub.f64 %fd2, %fd18, %fd1;
{
.reg .b32 %temp;
mov.b64 {%r28, %temp}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r7}, %fd2;
}
add.s32 %r29, %r7, -1048576;
mov.b64 %fd16, {%r28, %r29};
// inline asm
cvt.rn.f32.f64 %f1, %fd16;
// inline asm
// inline asm
rsqrt.approx.ftz.f32 %f2, %f1;
// inline asm
// inline asm
cvt.f64.f32 %fd17, %f2;
// inline asm
mul.rn.f64 %fd19, %fd17, %fd17;
neg.f64 %fd20, %fd19;
fma.rn.f64 %fd21, %fd16, %fd20, %fd18;
mov.f64 %fd22, 0d3FE0000000000000;
mov.f64 %fd23, 0d3FD8000000000000;
.loc 2 293 10
fma.rn.f64 %fd24, %fd23, %fd21, %fd22;
mul.rn.f64 %fd25, %fd21, %fd17;
fma.rn.f64 %fd3, %fd24, %fd25, %fd17;
mov.f64 %fd26, 0dBEBAC2FE66FAAC4B;
mov.f64 %fd27, 0d3EC715B371155F70;
.loc 2 293 10
fma.rn.f64 %fd28, %fd27, %fd2, %fd26;
mov.f64 %fd29, 0d3ED9A9B88EFCD9B8;
.loc 2 293 10
fma.rn.f64 %fd30, %fd28, %fd2, %fd29;
mov.f64 %fd31, 0d3EDD0F40A8A0C4C3;
.loc 2 293 10
fma.rn.f64 %fd32, %fd30, %fd2, %fd31;
mov.f64 %fd33, 0d3EF46D4CFA9E0E1F;
.loc 2 293 10
fma.rn.f64 %fd34, %fd32, %fd2, %fd33;
mov.f64 %fd35, 0d3F079C168D1E2422;
.loc 2 293 10
fma.rn.f64 %fd36, %fd34, %fd2, %fd35;
mov.f64 %fd37, 0d3F1C9A88C3BCA540;
.loc 2 293 10
fma.rn.f64 %fd38, %fd36, %fd2, %fd37;
mov.f64 %fd39, 0d3F31C4E64BD476DF;
.loc 2 293 10
fma.rn.f64 %fd40, %fd38, %fd2, %fd39;
mov.f64 %fd41, 0d3F46E8BA60009C8F;
.loc 2 293 10
fma.rn.f64 %fd42, %fd40, %fd2, %fd41;
mov.f64 %fd43, 0d3F5F1C71C62B05A2;
.loc 2 293 10
fma.rn.f64 %fd44, %fd42, %fd2, %fd43;
mov.f64 %fd45, 0d3F76DB6DB6DC9F2C;
.loc 2 293 10
fma.rn.f64 %fd46, %fd44, %fd2, %fd45;
mov.f64 %fd47, 0d3F9333333333329C;
.loc 2 293 10
fma.rn.f64 %fd48, %fd46, %fd2, %fd47;
mov.f64 %fd49, 0d3FB5555555555555;
.loc 2 293 10
fma.rn.f64 %fd50, %fd48, %fd2, %fd49;
mul.f64 %fd51, %fd50, %fd2;
mul.f64 %fd52, %fd51, %fd2;
mul.f64 %fd4, %fd52, %fd3;
setp.lt.s32 %p4, %r7, 1;
@%p4 bra BB0_6;
fma.rn.f64 %fd93, %fd3, %fd2, %fd4;
bra.uni BB0_7;
BB0_6:
mov.f64 %fd53, 0d0000000000000000;
.loc 2 293 10
mul.rn.f64 %fd93, %fd1, %fd53;
BB0_7:
setp.gt.s32 %p5, %r7, -1;
@%p5 bra BB0_9;
mov.f64 %fd54, 0d7FF0000000000000;
.loc 2 293 10
mul.rn.f64 %fd93, %fd93, %fd54;
BB0_9:
setp.gt.s32 %p6, %r6, -1;
@%p6 bra BB0_14;
mov.f64 %fd55, 0dBCA1A62633145C07;
.loc 2 293 10
add.rn.f64 %fd56, %fd93, %fd55;
neg.f64 %fd57, %fd56;
mov.f64 %fd58, 0d400921FB54442D18;
.loc 2 293 10
add.rn.f64 %fd93, %fd58, %fd57;
bra.uni BB0_14;
BB0_11:
.loc 2 293 10
mul.f64 %fd59, %fd1, %fd1;
mov.f64 %fd60, 0dBFB3823B180754AF;
mov.f64 %fd61, 0d3FB0066BDC1895E9;
.loc 2 293 10
fma.rn.f64 %fd62, %fd61, %fd59, %fd60;
mov.f64 %fd63, 0d3FB11E52CC2F79AE;
.loc 2 293 10
fma.rn.f64 %fd64, %fd62, %fd59, %fd63;
mov.f64 %fd65, 0dBF924EAF3526861B;
.loc 2 293 10
fma.rn.f64 %fd66, %fd64, %fd59, %fd65;
mov.f64 %fd67, 0d3F91DF02A31E6CB7;
.loc 2 293 10
fma.rn.f64 %fd68, %fd66, %fd59, %fd67;
mov.f64 %fd69, 0d3F847D18B0EEC6CC;
.loc 2 293 10
fma.rn.f64 %fd70, %fd68, %fd59, %fd69;
mov.f64 %fd71, 0d3F8D0AF961BA53B0;
.loc 2 293 10
fma.rn.f64 %fd72, %fd70, %fd59, %fd71;
mov.f64 %fd73, 0d3F91BF7734CF1C48;
.loc 2 293 10
fma.rn.f64 %fd74, %fd72, %fd59, %fd73;
mov.f64 %fd75, 0d3F96E91483144EF7;
.loc 2 293 10
fma.rn.f64 %fd76, %fd74, %fd59, %fd75;
mov.f64 %fd77, 0d3F9F1C6E0A4F9F81;
.loc 2 293 10
fma.rn.f64 %fd78, %fd76, %fd59, %fd77;
mov.f64 %fd79, 0d3FA6DB6DC27FA92B;
.loc 2 293 10
fma.rn.f64 %fd80, %fd78, %fd59, %fd79;
mov.f64 %fd81, 0d3FB333333320F91B;
.loc 2 293 10
fma.rn.f64 %fd82, %fd80, %fd59, %fd81;
mov.f64 %fd83, 0d3FC5555555555F4D;
.loc 2 293 10
fma.rn.f64 %fd84, %fd82, %fd59, %fd83;
mul.f64 %fd85, %fd84, %fd59;
fma.rn.f64 %fd11, %fd85, %fd1, %fd1;
setp.lt.s32 %p7, %r6, 0;
@%p7 bra BB0_13;
mov.f64 %fd86, 0dBC91A62633145C07;
.loc 2 293 10
add.rn.f64 %fd87, %fd11, %fd86;
neg.f64 %fd88, %fd87;
mov.f64 %fd89, 0d3FF921FB54442D18;
.loc 2 293 10
add.rn.f64 %fd93, %fd89, %fd88;
bra.uni BB0_14;
BB0_13:
mov.f64 %fd90, 0d3C91A62633145C07;
.loc 2 293 10
add.rn.f64 %fd91, %fd11, %fd90;
mov.f64 %fd92, 0d3FF921FB54442D18;
.loc 2 293 10
add.rn.f64 %fd93, %fd92, %fd91;
BB0_14:
.loc 1 17 42
mad.lo.s32 %r30, %r33, %r12, %r34;
mul.wide.s32 %rd7, %r30, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 1 17 42
st.global.f64 [%rd8], %fd93;
.loc 1 17 22
add.s32 %r34, %r2, %r34;
.loc 1 17 1
setp.lt.s32 %p8, %r34, %r10;
@%p8 bra BB0_3;
BB0_15:
.loc 1 17 22
mov.u32 %r31, %nctaid.x;
mad.lo.s32 %r33, %r31, %r14, %r33;
.loc 1 17 1
setp.lt.s32 %p9, %r33, %r11;
@%p9 bra BB0_2;
BB0_16:
.loc 1 17 2
ret;
}
.visible .entry map_acosh_double(
.param .u32 map_acosh_double_param_0,
.param .u32 map_acosh_double_param_1,
.param .u64 map_acosh_double_param_2,
.param .u32 map_acosh_double_param_3,
.param .u64 map_acosh_double_param_4,
.param .u32 map_acosh_double_param_5
)
{
.reg .pred %p<25>;
.reg .f32 %f<9>;
.reg .s32 %r<79>;
.reg .s64 %rd<9>;
.reg .f64 %fd<141>;
ld.param.u32 %r30, [map_acosh_double_param_0];
ld.param.u32 %r31, [map_acosh_double_param_1];
ld.param.u64 %rd3, [map_acosh_double_param_2];
ld.param.u32 %r32, [map_acosh_double_param_3];
ld.param.u64 %rd4, [map_acosh_double_param_4];
ld.param.u32 %r33, [map_acosh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 18 1
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %ctaid.x;
mov.u32 %r36, %tid.x;
mad.lo.s32 %r69, %r34, %r35, %r36;
.loc 1 18 1
setp.ge.s32 %p1, %r69, %r31;
@%p1 bra BB1_34;
.loc 1 18 1
mov.u32 %r37, %ntid.y;
.loc 1 18 22
mov.u32 %r38, %nctaid.y;
mul.lo.s32 %r2, %r38, %r37;
BB1_2:
.loc 1 18 1
mov.u32 %r39, %ctaid.y;
mov.u32 %r41, %tid.y;
mad.lo.s32 %r70, %r37, %r39, %r41;
.loc 1 18 1
setp.ge.s32 %p2, %r70, %r30;
@%p2 bra BB1_33;
.loc 1 18 1
mul.lo.s32 %r4, %r69, %r33;
.loc 1 18 42
mul.lo.s32 %r5, %r69, %r32;
BB1_4:
.loc 1 18 1
add.s32 %r46, %r70, %r4;
mul.wide.s32 %rd5, %r46, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 18 1
ld.global.f64 %fd1, [%rd6];
.loc 2 298 10
add.f64 %fd2, %fd1, 0dBFF0000000000000;
abs.f64 %fd21, %fd2;
setp.gt.f64 %p3, %fd21, 0d4330000000000000;
@%p3 bra BB1_19;
fma.rn.f64 %fd22, %fd1, %fd2, %fd2;
sqrt.rn.f64 %fd23, %fd22;
add.f64 %fd3, %fd2, %fd23;
{
.reg .b32 %temp;
mov.b64 {%temp, %r47}, %fd3;
}
setp.lt.u32 %p4, %r47, 1071994197;
setp.lt.s32 %p5, %r47, -1076258407;
or.pred %p6, %p4, %p5;
@%p6 bra BB1_18;
add.f64 %fd4, %fd3, 0d3FF0000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r71}, %fd4;
}
{
.reg .b32 %temp;
mov.b64 {%r72, %temp}, %fd4;
}
setp.gt.f64 %p7, %fd4, 0d0000000000000000;
setp.lt.f64 %p8, %fd4, 0d7FF0000000000000;
and.pred %p9, %p7, %p8;
@%p9 bra BB1_12;
abs.f64 %fd24, %fd4;
setp.gtu.f64 %p10, %fd24, 0d7FF0000000000000;
@%p10 bra BB1_11;
setp.neu.f64 %p11, %fd4, 0d0000000000000000;
@%p11 bra BB1_10;
mov.f64 %fd140, 0dFFF0000000000000;
bra.uni BB1_32;
BB1_10:
.loc 2 298 10
setp.eq.f64 %p12, %fd4, 0d7FF0000000000000;
selp.f64 %fd140, %fd4, 0dFFF8000000000000, %p12;
bra.uni BB1_32;
BB1_11:
.loc 2 298 10
add.f64 %fd140, %fd4, %fd4;
bra.uni BB1_32;
BB1_12:
.loc 2 298 10
setp.lt.u32 %p13, %r71, 1048576;
@%p13 bra BB1_14;
mov.u32 %r73, -1023;
bra.uni BB1_15;
BB1_14:
.loc 2 298 10
mul.f64 %fd26, %fd4, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r71}, %fd26;
}
{
.reg .b32 %temp;
mov.b64 {%r72, %temp}, %fd26;
}
mov.u32 %r73, -1077;
BB1_15:
.loc 2 298 10
shr.s32 %r50, %r71, 20;
add.s32 %r74, %r73, %r50;
and.b32 %r51, %r71, -2146435073;
or.b32 %r52, %r51, 1072693248;
mov.b64 %fd137, {%r72, %r52};
setp.lt.u32 %p14, %r52, 1073127583;
@%p14 bra BB1_17;
{
.reg .b32 %temp;
mov.b64 {%r53, %temp}, %fd137;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r54}, %fd137;
}
add.s32 %r55, %r54, -1048576;
mov.b64 %fd137, {%r53, %r55};
add.s32 %r74, %r74, 1;
BB1_17:
add.f64 %fd27, %fd137, 0d3FF0000000000000;
mov.f64 %fd29, 0d3FF0000000000000;
.loc 2 298 10
// inline asm
cvt.rn.f32.f64 %f1,%fd27;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd28,%f2;
// inline asm
neg.f64 %fd30, %fd27;
fma.rn.f64 %fd31, %fd30, %fd28, %fd29;
fma.rn.f64 %fd32, %fd31, %fd31, %fd31;
fma.rn.f64 %fd33, %fd32, %fd28, %fd28;
add.f64 %fd34, %fd137, 0dBFF0000000000000;
mul.f64 %fd35, %fd34, %fd33;
fma.rn.f64 %fd36, %fd34, %fd33, %fd35;
mul.f64 %fd37, %fd36, %fd36;
mov.f64 %fd38, 0d3ED0EE258B7A8B04;
mov.f64 %fd39, 0d3EB1380B3AE80F1E;
.loc 2 298 10
fma.rn.f64 %fd40, %fd39, %fd37, %fd38;
mov.f64 %fd41, 0d3EF3B2669F02676F;
.loc 2 298 10
fma.rn.f64 %fd42, %fd40, %fd37, %fd41;
mov.f64 %fd43, 0d3F1745CBA9AB0956;
.loc 2 298 10
fma.rn.f64 %fd44, %fd42, %fd37, %fd43;
mov.f64 %fd45, 0d3F3C71C72D1B5154;
.loc 2 298 10
fma.rn.f64 %fd46, %fd44, %fd37, %fd45;
mov.f64 %fd47, 0d3F624924923BE72D;
.loc 2 298 10
fma.rn.f64 %fd48, %fd46, %fd37, %fd47;
mov.f64 %fd49, 0d3F8999999999A3C4;
.loc 2 298 10
fma.rn.f64 %fd50, %fd48, %fd37, %fd49;
mov.f64 %fd51, 0d3FB5555555555554;
.loc 2 298 10
fma.rn.f64 %fd52, %fd50, %fd37, %fd51;
sub.f64 %fd53, %fd34, %fd36;
add.f64 %fd54, %fd53, %fd53;
neg.f64 %fd55, %fd36;
fma.rn.f64 %fd56, %fd55, %fd34, %fd54;
mul.f64 %fd57, %fd33, %fd56;
mul.f64 %fd58, %fd52, %fd37;
fma.rn.f64 %fd59, %fd58, %fd36, %fd57;
cvt.rn.f64.s32 %fd60, %r74;
mov.f64 %fd61, 0d3FE62E42FEFA39EF;
.loc 2 298 10
fma.rn.f64 %fd62, %fd60, %fd61, %fd36;
neg.s32 %r56, %r74;
cvt.rn.f64.s32 %fd63, %r56;
fma.rn.f64 %fd64, %fd63, %fd61, %fd62;
sub.f64 %fd65, %fd64, %fd36;
sub.f64 %fd66, %fd59, %fd65;
mov.f64 %fd67, 0d3C7ABC9E3B39803F;
.loc 2 298 10
fma.rn.f64 %fd68, %fd60, %fd67, %fd66;
add.f64 %fd140, %fd62, %fd68;
bra.uni BB1_32;
BB1_18:
.loc 2 298 10
add.f64 %fd69, %fd3, 0d4000000000000000;
div.rn.f64 %fd70, %fd3, %fd69;
neg.f64 %fd71, %fd3;
mul.f64 %fd72, %fd70, %fd71;
add.f64 %fd73, %fd3, %fd72;
mul.f64 %fd74, %fd73, %fd73;
mov.f64 %fd75, 0d3ED087FFCEB2DC44;
mov.f64 %fd76, 0d3EB372FB2FBE14B5;
.loc 2 298 10
fma.rn.f64 %fd77, %fd76, %fd74, %fd75;
mov.f64 %fd78, 0d3EF3B9FF890F468C;
.loc 2 298 10
fma.rn.f64 %fd79, %fd77, %fd74, %fd78;
mov.f64 %fd80, 0d3F17457EFD51BAF8;
.loc 2 298 10
fma.rn.f64 %fd81, %fd79, %fd74, %fd80;
mov.f64 %fd82, 0d3F3C71C8DE3CE825;
.loc 2 298 10
fma.rn.f64 %fd83, %fd81, %fd74, %fd82;
mov.f64 %fd84, 0d3F6249248FA4661F;
.loc 2 298 10
fma.rn.f64 %fd85, %fd83, %fd74, %fd84;
mov.f64 %fd86, 0d3F899999999D70C4;
.loc 2 298 10
fma.rn.f64 %fd87, %fd85, %fd74, %fd86;
mov.f64 %fd88, 0d3FB5555555555462;
.loc 2 298 10
fma.rn.f64 %fd89, %fd87, %fd74, %fd88;
mul.f64 %fd90, %fd89, %fd74;
fma.rn.f64 %fd91, %fd90, %fd73, %fd72;
add.f64 %fd140, %fd91, %fd3;
bra.uni BB1_32;
BB1_19:
.loc 2 298 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r75}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%r76, %temp}, %fd1;
}
setp.lt.f64 %p15, %fd1, 0d7FF0000000000000;
setp.gt.f64 %p16, %fd1, 0d0000000000000000;
and.pred %p17, %p16, %p15;
@%p17 bra BB1_25;
abs.f64 %fd92, %fd1;
setp.gtu.f64 %p18, %fd92, 0d7FF0000000000000;
@%p18 bra BB1_24;
setp.neu.f64 %p19, %fd1, 0d0000000000000000;
@%p19 bra BB1_23;
mov.f64 %fd139, 0dFFF0000000000000;
bra.uni BB1_31;
BB1_23:
.loc 2 298 10
setp.eq.f64 %p20, %fd1, 0d7FF0000000000000;
selp.f64 %fd139, %fd1, 0dFFF8000000000000, %p20;
bra.uni BB1_31;
BB1_24:
.loc 2 298 10
add.f64 %fd139, %fd1, %fd1;
bra.uni BB1_31;
BB1_25:
.loc 2 298 10
setp.lt.u32 %p21, %r75, 1048576;
@%p21 bra BB1_27;
mov.u32 %r77, -1023;
bra.uni BB1_28;
BB1_27:
.loc 2 298 10
mul.f64 %fd94, %fd1, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r75}, %fd94;
}
{
.reg .b32 %temp;
mov.b64 {%r76, %temp}, %fd94;
}
mov.u32 %r77, -1077;
BB1_28:
.loc 2 298 10
shr.s32 %r59, %r75, 20;
add.s32 %r78, %r77, %r59;
and.b32 %r60, %r75, -2146435073;
or.b32 %r61, %r60, 1072693248;
mov.b64 %fd138, {%r76, %r61};
setp.lt.u32 %p22, %r61, 1073127583;
@%p22 bra BB1_30;
{
.reg .b32 %temp;
mov.b64 {%r62, %temp}, %fd138;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r63}, %fd138;
}
add.s32 %r64, %r63, -1048576;
mov.b64 %fd138, {%r62, %r64};
add.s32 %r78, %r78, 1;
BB1_30:
add.f64 %fd95, %fd138, 0d3FF0000000000000;
mov.f64 %fd97, 0d3FF0000000000000;
.loc 2 298 10
// inline asm
cvt.rn.f32.f64 %f5,%fd95;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd96,%f6;
// inline asm
neg.f64 %fd98, %fd95;
fma.rn.f64 %fd99, %fd98, %fd96, %fd97;
fma.rn.f64 %fd100, %fd99, %fd99, %fd99;
fma.rn.f64 %fd101, %fd100, %fd96, %fd96;
add.f64 %fd102, %fd138, 0dBFF0000000000000;
mul.f64 %fd103, %fd102, %fd101;
fma.rn.f64 %fd104, %fd102, %fd101, %fd103;
mul.f64 %fd105, %fd104, %fd104;
mov.f64 %fd106, 0d3ED0EE258B7A8B04;
mov.f64 %fd107, 0d3EB1380B3AE80F1E;
.loc 2 298 10
fma.rn.f64 %fd108, %fd107, %fd105, %fd106;
mov.f64 %fd109, 0d3EF3B2669F02676F;
.loc 2 298 10
fma.rn.f64 %fd110, %fd108, %fd105, %fd109;
mov.f64 %fd111, 0d3F1745CBA9AB0956;
.loc 2 298 10
fma.rn.f64 %fd112, %fd110, %fd105, %fd111;
mov.f64 %fd113, 0d3F3C71C72D1B5154;
.loc 2 298 10
fma.rn.f64 %fd114, %fd112, %fd105, %fd113;
mov.f64 %fd115, 0d3F624924923BE72D;
.loc 2 298 10
fma.rn.f64 %fd116, %fd114, %fd105, %fd115;
mov.f64 %fd117, 0d3F8999999999A3C4;
.loc 2 298 10
fma.rn.f64 %fd118, %fd116, %fd105, %fd117;
mov.f64 %fd119, 0d3FB5555555555554;
.loc 2 298 10
fma.rn.f64 %fd120, %fd118, %fd105, %fd119;
sub.f64 %fd121, %fd102, %fd104;
add.f64 %fd122, %fd121, %fd121;
neg.f64 %fd123, %fd104;
fma.rn.f64 %fd124, %fd123, %fd102, %fd122;
mul.f64 %fd125, %fd101, %fd124;
mul.f64 %fd126, %fd120, %fd105;
fma.rn.f64 %fd127, %fd126, %fd104, %fd125;
cvt.rn.f64.s32 %fd128, %r78;
mov.f64 %fd129, 0d3FE62E42FEFA39EF;
.loc 2 298 10
fma.rn.f64 %fd130, %fd128, %fd129, %fd104;
neg.s32 %r65, %r78;
cvt.rn.f64.s32 %fd131, %r65;
fma.rn.f64 %fd132, %fd131, %fd129, %fd130;
sub.f64 %fd133, %fd132, %fd104;
sub.f64 %fd134, %fd127, %fd133;
mov.f64 %fd135, 0d3C7ABC9E3B39803F;
.loc 2 298 10
fma.rn.f64 %fd136, %fd128, %fd135, %fd134;
add.f64 %fd139, %fd130, %fd136;
BB1_31:
add.f64 %fd140, %fd139, 0d3FE62E42FEFA39EF;
BB1_32:
.loc 1 18 42
add.s32 %r66, %r70, %r5;
mul.wide.s32 %rd7, %r66, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 18 42
st.global.f64 [%rd8], %fd140;
.loc 1 18 22
add.s32 %r70, %r2, %r70;
.loc 1 18 1
setp.lt.s32 %p23, %r70, %r30;
@%p23 bra BB1_4;
BB1_33:
.loc 1 18 22
mov.u32 %r67, %nctaid.x;
mad.lo.s32 %r69, %r67, %r34, %r69;
.loc 1 18 1
setp.lt.s32 %p24, %r69, %r31;
@%p24 bra BB1_2;
BB1_34:
.loc 1 18 2
ret;
}
.visible .entry map_asin_double(
.param .u32 map_asin_double_param_0,
.param .u32 map_asin_double_param_1,
.param .u64 map_asin_double_param_2,
.param .u32 map_asin_double_param_3,
.param .u64 map_asin_double_param_4,
.param .u32 map_asin_double_param_5
)
{
.reg .pred %p<7>;
.reg .s32 %r<37>;
.reg .s64 %rd<9>;
.reg .f64 %fd<71>;
ld.param.u32 %r11, [map_asin_double_param_0];
ld.param.u32 %r12, [map_asin_double_param_1];
ld.param.u64 %rd3, [map_asin_double_param_2];
ld.param.u32 %r13, [map_asin_double_param_3];
ld.param.u64 %rd4, [map_asin_double_param_4];
ld.param.u32 %r14, [map_asin_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 19 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r35, %r15, %r16, %r17;
.loc 1 19 1
setp.ge.s32 %p1, %r35, %r12;
@%p1 bra BB2_10;
.loc 1 19 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r2, %r19, %r20, %r18;
.loc 1 19 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r3, %r21, %r19;
BB2_2:
.loc 1 19 1
setp.ge.s32 %p2, %r2, %r11;
@%p2 bra BB2_9;
.loc 1 19 1
mul.lo.s32 %r5, %r35, %r14;
.loc 1 19 42
mul.lo.s32 %r6, %r35, %r13;
mov.u32 %r36, %r2;
BB2_4:
.loc 1 19 1
mov.u32 %r7, %r36;
add.s32 %r22, %r7, %r5;
mul.wide.s32 %rd5, %r22, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 19 1
ld.global.f64 %fd6, [%rd6];
.loc 2 288 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd6;
}
abs.f64 %fd1, %fd6;
{
.reg .b32 %temp;
mov.b64 {%temp, %r23}, %fd1;
}
setp.lt.s32 %p3, %r23, 1071801958;
@%p3 bra BB2_7;
mov.f64 %fd7, 0d3FE0000000000000;
mov.f64 %fd8, 0dBFE0000000000000;
.loc 2 288 10
fma.rn.f64 %fd9, %fd8, %fd1, %fd7;
sqrt.rn.f64 %fd10, %fd9;
mov.f64 %fd11, 0dBFB3823B180754AF;
mov.f64 %fd12, 0d3FB0066BDC1895E9;
.loc 2 288 10
fma.rn.f64 %fd13, %fd12, %fd9, %fd11;
mov.f64 %fd14, 0d3FB11E52CC2F79AE;
.loc 2 288 10
fma.rn.f64 %fd15, %fd13, %fd9, %fd14;
mov.f64 %fd16, 0dBF924EAF3526861B;
.loc 2 288 10
fma.rn.f64 %fd17, %fd15, %fd9, %fd16;
mov.f64 %fd18, 0d3F91DF02A31E6CB7;
.loc 2 288 10
fma.rn.f64 %fd19, %fd17, %fd9, %fd18;
mov.f64 %fd20, 0d3F847D18B0EEC6CC;
.loc 2 288 10
fma.rn.f64 %fd21, %fd19, %fd9, %fd20;
mov.f64 %fd22, 0d3F8D0AF961BA53B0;
.loc 2 288 10
fma.rn.f64 %fd23, %fd21, %fd9, %fd22;
mov.f64 %fd24, 0d3F91BF7734CF1C48;
.loc 2 288 10
fma.rn.f64 %fd25, %fd23, %fd9, %fd24;
mov.f64 %fd26, 0d3F96E91483144EF7;
.loc 2 288 10
fma.rn.f64 %fd27, %fd25, %fd9, %fd26;
mov.f64 %fd28, 0d3F9F1C6E0A4F9F81;
.loc 2 288 10
fma.rn.f64 %fd29, %fd27, %fd9, %fd28;
mov.f64 %fd30, 0d3FA6DB6DC27FA92B;
.loc 2 288 10
fma.rn.f64 %fd31, %fd29, %fd9, %fd30;
mov.f64 %fd32, 0d3FB333333320F91B;
.loc 2 288 10
fma.rn.f64 %fd33, %fd31, %fd9, %fd32;
mov.f64 %fd34, 0d3FC5555555555F4D;
.loc 2 288 10
fma.rn.f64 %fd35, %fd33, %fd9, %fd34;
mul.f64 %fd36, %fd35, %fd9;
mul.f64 %fd37, %fd10, 0dC000000000000000;
mov.f64 %fd38, 0d3C91A62633145C07;
.loc 2 288 10
fma.rn.f64 %fd39, %fd37, %fd36, %fd38;
add.f64 %fd40, %fd37, 0d3FE921FB54442D18;
add.f64 %fd41, %fd40, %fd39;
add.f64 %fd70, %fd41, 0d3FE921FB54442D18;
setp.gt.s32 %p4, %r8, 1072693247;
@%p4 bra BB2_8;
{
.reg .b32 %temp;
mov.b64 {%r24, %temp}, %fd70;
}
and.b32 %r25, %r8, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%temp, %r26}, %fd70;
}
or.b32 %r27, %r26, %r25;
mov.b64 %fd70, {%r24, %r27};
bra.uni BB2_8;
BB2_7:
.loc 2 288 10
mul.f64 %fd42, %fd1, %fd1;
mov.f64 %fd43, 0dBFB3823B180754AF;
mov.f64 %fd44, 0d3FB0066BDC1895E9;
.loc 2 288 10
fma.rn.f64 %fd45, %fd44, %fd42, %fd43;
mov.f64 %fd46, 0d3FB11E52CC2F79AE;
.loc 2 288 10
fma.rn.f64 %fd47, %fd45, %fd42, %fd46;
mov.f64 %fd48, 0dBF924EAF3526861B;
.loc 2 288 10
fma.rn.f64 %fd49, %fd47, %fd42, %fd48;
mov.f64 %fd50, 0d3F91DF02A31E6CB7;
.loc 2 288 10
fma.rn.f64 %fd51, %fd49, %fd42, %fd50;
mov.f64 %fd52, 0d3F847D18B0EEC6CC;
.loc 2 288 10
fma.rn.f64 %fd53, %fd51, %fd42, %fd52;
mov.f64 %fd54, 0d3F8D0AF961BA53B0;
.loc 2 288 10
fma.rn.f64 %fd55, %fd53, %fd42, %fd54;
mov.f64 %fd56, 0d3F91BF7734CF1C48;
.loc 2 288 10
fma.rn.f64 %fd57, %fd55, %fd42, %fd56;
mov.f64 %fd58, 0d3F96E91483144EF7;
.loc 2 288 10
fma.rn.f64 %fd59, %fd57, %fd42, %fd58;
mov.f64 %fd60, 0d3F9F1C6E0A4F9F81;
.loc 2 288 10
fma.rn.f64 %fd61, %fd59, %fd42, %fd60;
mov.f64 %fd62, 0d3FA6DB6DC27FA92B;
.loc 2 288 10
fma.rn.f64 %fd63, %fd61, %fd42, %fd62;
mov.f64 %fd64, 0d3FB333333320F91B;
.loc 2 288 10
fma.rn.f64 %fd65, %fd63, %fd42, %fd64;
mov.f64 %fd66, 0d3FC5555555555F4D;
.loc 2 288 10
fma.rn.f64 %fd67, %fd65, %fd42, %fd66;
mul.f64 %fd68, %fd67, %fd42;
fma.rn.f64 %fd69, %fd68, %fd1, %fd1;
{
.reg .b32 %temp;
mov.b64 {%r28, %temp}, %fd69;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r29}, %fd69;
}
and.b32 %r30, %r8, -2147483648;
or.b32 %r31, %r29, %r30;
mov.b64 %fd70, {%r28, %r31};
BB2_8:
.loc 1 19 42
add.s32 %r32, %r7, %r6;
mul.wide.s32 %rd7, %r32, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 19 42
st.global.f64 [%rd8], %fd70;
.loc 1 19 22
add.s32 %r9, %r3, %r7;
.loc 1 19 1
setp.lt.s32 %p5, %r9, %r11;
mov.u32 %r36, %r9;
@%p5 bra BB2_4;
BB2_9:
.loc 1 19 22
mov.u32 %r33, %nctaid.x;
mad.lo.s32 %r35, %r33, %r15, %r35;
.loc 1 19 1
setp.lt.s32 %p6, %r35, %r12;
@%p6 bra BB2_2;
BB2_10:
.loc 1 19 2
ret;
}
.visible .entry map_asinh_double(
.param .u32 map_asinh_double_param_0,
.param .u32 map_asinh_double_param_1,
.param .u64 map_asinh_double_param_2,
.param .u32 map_asinh_double_param_3,
.param .u64 map_asinh_double_param_4,
.param .u32 map_asinh_double_param_5
)
{
.reg .pred %p<25>;
.reg .f32 %f<9>;
.reg .s32 %r<84>;
.reg .s64 %rd<9>;
.reg .f64 %fd<144>;
ld.param.u32 %r30, [map_asinh_double_param_0];
ld.param.u32 %r31, [map_asinh_double_param_1];
ld.param.u64 %rd2, [map_asinh_double_param_2];
ld.param.u32 %r32, [map_asinh_double_param_3];
ld.param.u64 %rd3, [map_asinh_double_param_4];
ld.param.u32 %r33, [map_asinh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 1 20 1
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %ctaid.x;
mov.u32 %r36, %tid.x;
mad.lo.s32 %r74, %r34, %r35, %r36;
.loc 1 20 1
setp.ge.s32 %p1, %r74, %r31;
@%p1 bra BB3_34;
.loc 1 20 1
mov.u32 %r37, %ntid.y;
.loc 1 20 22
mov.u32 %r38, %nctaid.y;
mul.lo.s32 %r2, %r38, %r37;
cvta.to.global.u64 %rd6, %rd2;
BB3_2:
.loc 1 20 1
mov.u32 %r39, %ctaid.y;
mov.u32 %r41, %tid.y;
mad.lo.s32 %r75, %r37, %r39, %r41;
.loc 1 20 1
setp.ge.s32 %p2, %r75, %r30;
@%p2 bra BB3_33;
.loc 1 20 1
mul.lo.s32 %r4, %r74, %r33;
.loc 1 20 42
mul.lo.s32 %r5, %r74, %r32;
BB3_4:
.loc 1 20 1
add.s32 %r46, %r75, %r4;
mul.wide.s32 %rd4, %r46, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 1 20 1
ld.global.f64 %fd1, [%rd5];
.loc 2 303 10
abs.f64 %fd2, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r80}, %fd2;
}
setp.gt.s32 %p3, %r80, 1609564159;
@%p3 bra BB3_19;
mul.f64 %fd21, %fd2, %fd2;
add.f64 %fd22, %fd21, 0d3FF0000000000000;
sqrt.rn.f64 %fd23, %fd22;
add.f64 %fd24, %fd23, 0d3FF0000000000000;
div.rn.f64 %fd25, %fd21, %fd24;
add.f64 %fd3, %fd2, %fd25;
{
.reg .b32 %temp;
mov.b64 {%temp, %r47}, %fd3;
}
setp.lt.u32 %p4, %r47, 1071994197;
setp.lt.s32 %p5, %r47, -1076258407;
or.pred %p6, %p4, %p5;
@%p6 bra BB3_18;
add.f64 %fd4, %fd3, 0d3FF0000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r76}, %fd4;
}
{
.reg .b32 %temp;
mov.b64 {%r77, %temp}, %fd4;
}
setp.gt.f64 %p7, %fd4, 0d0000000000000000;
setp.lt.f64 %p8, %fd4, 0d7FF0000000000000;
and.pred %p9, %p7, %p8;
@%p9 bra BB3_12;
abs.f64 %fd26, %fd4;
setp.gtu.f64 %p10, %fd26, 0d7FF0000000000000;
@%p10 bra BB3_11;
setp.neu.f64 %p11, %fd4, 0d0000000000000000;
@%p11 bra BB3_10;
mov.f64 %fd143, 0dFFF0000000000000;
bra.uni BB3_32;
BB3_10:
.loc 2 303 10
setp.eq.f64 %p12, %fd4, 0d7FF0000000000000;
selp.f64 %fd143, %fd4, 0dFFF8000000000000, %p12;
bra.uni BB3_32;
BB3_11:
.loc 2 303 10
add.f64 %fd143, %fd4, %fd4;
bra.uni BB3_32;
BB3_12:
.loc 2 303 10
setp.lt.u32 %p13, %r76, 1048576;
@%p13 bra BB3_14;
mov.u32 %r78, -1023;
bra.uni BB3_15;
BB3_14:
.loc 2 303 10
mul.f64 %fd28, %fd4, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r76}, %fd28;
}
{
.reg .b32 %temp;
mov.b64 {%r77, %temp}, %fd28;
}
mov.u32 %r78, -1077;
BB3_15:
.loc 2 303 10
shr.s32 %r50, %r76, 20;
add.s32 %r79, %r78, %r50;
and.b32 %r51, %r76, -2146435073;
or.b32 %r52, %r51, 1072693248;
mov.b64 %fd140, {%r77, %r52};
setp.lt.u32 %p14, %r52, 1073127583;
@%p14 bra BB3_17;
{
.reg .b32 %temp;
mov.b64 {%r53, %temp}, %fd140;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r54}, %fd140;
}
add.s32 %r55, %r54, -1048576;
mov.b64 %fd140, {%r53, %r55};
add.s32 %r79, %r79, 1;
BB3_17:
add.f64 %fd29, %fd140, 0d3FF0000000000000;
mov.f64 %fd31, 0d3FF0000000000000;
.loc 2 303 10
// inline asm
cvt.rn.f32.f64 %f1,%fd29;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd30,%f2;
// inline asm
neg.f64 %fd32, %fd29;
fma.rn.f64 %fd33, %fd32, %fd30, %fd31;
fma.rn.f64 %fd34, %fd33, %fd33, %fd33;
fma.rn.f64 %fd35, %fd34, %fd30, %fd30;
add.f64 %fd36, %fd140, 0dBFF0000000000000;
mul.f64 %fd37, %fd36, %fd35;
fma.rn.f64 %fd38, %fd36, %fd35, %fd37;
mul.f64 %fd39, %fd38, %fd38;
mov.f64 %fd40, 0d3ED0EE258B7A8B04;
mov.f64 %fd41, 0d3EB1380B3AE80F1E;
.loc 2 303 10
fma.rn.f64 %fd42, %fd41, %fd39, %fd40;
mov.f64 %fd43, 0d3EF3B2669F02676F;
.loc 2 303 10
fma.rn.f64 %fd44, %fd42, %fd39, %fd43;
mov.f64 %fd45, 0d3F1745CBA9AB0956;
.loc 2 303 10
fma.rn.f64 %fd46, %fd44, %fd39, %fd45;
mov.f64 %fd47, 0d3F3C71C72D1B5154;
.loc 2 303 10
fma.rn.f64 %fd48, %fd46, %fd39, %fd47;
mov.f64 %fd49, 0d3F624924923BE72D;
.loc 2 303 10
fma.rn.f64 %fd50, %fd48, %fd39, %fd49;
mov.f64 %fd51, 0d3F8999999999A3C4;
.loc 2 303 10
fma.rn.f64 %fd52, %fd50, %fd39, %fd51;
mov.f64 %fd53, 0d3FB5555555555554;
.loc 2 303 10
fma.rn.f64 %fd54, %fd52, %fd39, %fd53;
sub.f64 %fd55, %fd36, %fd38;
add.f64 %fd56, %fd55, %fd55;
neg.f64 %fd57, %fd38;
fma.rn.f64 %fd58, %fd57, %fd36, %fd56;
mul.f64 %fd59, %fd35, %fd58;
mul.f64 %fd60, %fd54, %fd39;
fma.rn.f64 %fd61, %fd60, %fd38, %fd59;
cvt.rn.f64.s32 %fd62, %r79;
mov.f64 %fd63, 0d3FE62E42FEFA39EF;
.loc 2 303 10
fma.rn.f64 %fd64, %fd62, %fd63, %fd38;
neg.s32 %r56, %r79;
cvt.rn.f64.s32 %fd65, %r56;
fma.rn.f64 %fd66, %fd65, %fd63, %fd64;
sub.f64 %fd67, %fd66, %fd38;
sub.f64 %fd68, %fd61, %fd67;
mov.f64 %fd69, 0d3C7ABC9E3B39803F;
.loc 2 303 10
fma.rn.f64 %fd70, %fd62, %fd69, %fd68;
add.f64 %fd143, %fd64, %fd70;
bra.uni BB3_32;
BB3_18:
.loc 2 303 10
add.f64 %fd71, %fd3, 0d4000000000000000;
div.rn.f64 %fd72, %fd3, %fd71;
neg.f64 %fd73, %fd3;
mul.f64 %fd74, %fd72, %fd73;
add.f64 %fd75, %fd3, %fd74;
mul.f64 %fd76, %fd75, %fd75;
mov.f64 %fd77, 0d3ED087FFCEB2DC44;
mov.f64 %fd78, 0d3EB372FB2FBE14B5;
.loc 2 303 10
fma.rn.f64 %fd79, %fd78, %fd76, %fd77;
mov.f64 %fd80, 0d3EF3B9FF890F468C;
.loc 2 303 10
fma.rn.f64 %fd81, %fd79, %fd76, %fd80;
mov.f64 %fd82, 0d3F17457EFD51BAF8;
.loc 2 303 10
fma.rn.f64 %fd83, %fd81, %fd76, %fd82;
mov.f64 %fd84, 0d3F3C71C8DE3CE825;
.loc 2 303 10
fma.rn.f64 %fd85, %fd83, %fd76, %fd84;
mov.f64 %fd86, 0d3F6249248FA4661F;
.loc 2 303 10
fma.rn.f64 %fd87, %fd85, %fd76, %fd86;
mov.f64 %fd88, 0d3F899999999D70C4;
.loc 2 303 10
fma.rn.f64 %fd89, %fd87, %fd76, %fd88;
mov.f64 %fd90, 0d3FB5555555555462;
.loc 2 303 10
fma.rn.f64 %fd91, %fd89, %fd76, %fd90;
mul.f64 %fd92, %fd91, %fd76;
fma.rn.f64 %fd93, %fd92, %fd75, %fd74;
add.f64 %fd143, %fd93, %fd3;
bra.uni BB3_32;
BB3_19:
.loc 2 303 10
{
.reg .b32 %temp;
mov.b64 {%r81, %temp}, %fd2;
}
setp.lt.f64 %p15, %fd2, 0d7FF0000000000000;
setp.gt.f64 %p16, %fd2, 0d0000000000000000;
and.pred %p17, %p16, %p15;
@%p17 bra BB3_25;
abs.f64 %fd94, %fd2;
setp.gtu.f64 %p18, %fd94, 0d7FF0000000000000;
@%p18 bra BB3_24;
setp.neu.f64 %p19, %fd2, 0d0000000000000000;
@%p19 bra BB3_23;
mov.f64 %fd142, 0dFFF0000000000000;
bra.uni BB3_31;
BB3_23:
.loc 2 303 10
setp.eq.f64 %p20, %fd2, 0d7FF0000000000000;
selp.f64 %fd142, %fd2, 0dFFF8000000000000, %p20;
bra.uni BB3_31;
BB3_24:
.loc 2 303 10
add.f64 %fd142, %fd2, %fd2;
bra.uni BB3_31;
BB3_25:
.loc 2 303 10
setp.lt.u32 %p21, %r80, 1048576;
@%p21 bra BB3_27;
mov.u32 %r82, -1023;
bra.uni BB3_28;
BB3_27:
.loc 2 303 10
mul.f64 %fd96, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r80}, %fd96;
}
{
.reg .b32 %temp;
mov.b64 {%r81, %temp}, %fd96;
}
mov.u32 %r82, -1077;
BB3_28:
.loc 2 303 10
shr.s32 %r59, %r80, 20;
add.s32 %r83, %r82, %r59;
and.b32 %r60, %r80, -2146435073;
or.b32 %r61, %r60, 1072693248;
mov.b64 %fd141, {%r81, %r61};
setp.lt.u32 %p22, %r61, 1073127583;
@%p22 bra BB3_30;
{
.reg .b32 %temp;
mov.b64 {%r62, %temp}, %fd141;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r63}, %fd141;
}
add.s32 %r64, %r63, -1048576;
mov.b64 %fd141, {%r62, %r64};
add.s32 %r83, %r83, 1;
BB3_30:
add.f64 %fd97, %fd141, 0d3FF0000000000000;
mov.f64 %fd99, 0d3FF0000000000000;
.loc 2 303 10
// inline asm
cvt.rn.f32.f64 %f5,%fd97;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd98,%f6;
// inline asm
neg.f64 %fd100, %fd97;
fma.rn.f64 %fd101, %fd100, %fd98, %fd99;
fma.rn.f64 %fd102, %fd101, %fd101, %fd101;
fma.rn.f64 %fd103, %fd102, %fd98, %fd98;
add.f64 %fd104, %fd141, 0dBFF0000000000000;
mul.f64 %fd105, %fd104, %fd103;
fma.rn.f64 %fd106, %fd104, %fd103, %fd105;
mul.f64 %fd107, %fd106, %fd106;
mov.f64 %fd108, 0d3ED0EE258B7A8B04;
mov.f64 %fd109, 0d3EB1380B3AE80F1E;
.loc 2 303 10
fma.rn.f64 %fd110, %fd109, %fd107, %fd108;
mov.f64 %fd111, 0d3EF3B2669F02676F;
.loc 2 303 10
fma.rn.f64 %fd112, %fd110, %fd107, %fd111;
mov.f64 %fd113, 0d3F1745CBA9AB0956;
.loc 2 303 10
fma.rn.f64 %fd114, %fd112, %fd107, %fd113;
mov.f64 %fd115, 0d3F3C71C72D1B5154;
.loc 2 303 10
fma.rn.f64 %fd116, %fd114, %fd107, %fd115;
mov.f64 %fd117, 0d3F624924923BE72D;
.loc 2 303 10
fma.rn.f64 %fd118, %fd116, %fd107, %fd117;
mov.f64 %fd119, 0d3F8999999999A3C4;
.loc 2 303 10
fma.rn.f64 %fd120, %fd118, %fd107, %fd119;
mov.f64 %fd121, 0d3FB5555555555554;
.loc 2 303 10
fma.rn.f64 %fd122, %fd120, %fd107, %fd121;
sub.f64 %fd123, %fd104, %fd106;
add.f64 %fd124, %fd123, %fd123;
neg.f64 %fd125, %fd106;
fma.rn.f64 %fd126, %fd125, %fd104, %fd124;
mul.f64 %fd127, %fd103, %fd126;
mul.f64 %fd128, %fd122, %fd107;
fma.rn.f64 %fd129, %fd128, %fd106, %fd127;
cvt.rn.f64.s32 %fd130, %r83;
mov.f64 %fd131, 0d3FE62E42FEFA39EF;
.loc 2 303 10
fma.rn.f64 %fd132, %fd130, %fd131, %fd106;
neg.s32 %r65, %r83;
cvt.rn.f64.s32 %fd133, %r65;
fma.rn.f64 %fd134, %fd133, %fd131, %fd132;
sub.f64 %fd135, %fd134, %fd106;
sub.f64 %fd136, %fd129, %fd135;
mov.f64 %fd137, 0d3C7ABC9E3B39803F;
.loc 2 303 10
fma.rn.f64 %fd138, %fd130, %fd137, %fd136;
add.f64 %fd142, %fd132, %fd138;
BB3_31:
add.f64 %fd143, %fd142, 0d3FE62E42FEFA39EF;
BB3_32:
{
.reg .b32 %temp;
mov.b64 {%temp, %r66}, %fd1;
}
and.b32 %r67, %r66, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%temp, %r68}, %fd143;
}
or.b32 %r69, %r68, %r67;
{
.reg .b32 %temp;
mov.b64 {%r70, %temp}, %fd143;
}
mov.b64 %fd139, {%r70, %r69};
.loc 1 20 42
add.s32 %r71, %r75, %r5;
mul.wide.s32 %rd7, %r71, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 1 20 42
st.global.f64 [%rd8], %fd139;
.loc 1 20 22
add.s32 %r75, %r2, %r75;
.loc 1 20 1
setp.lt.s32 %p23, %r75, %r30;
@%p23 bra BB3_4;
BB3_33:
.loc 1 20 22
mov.u32 %r72, %nctaid.x;
mad.lo.s32 %r74, %r72, %r34, %r74;
.loc 1 20 1
setp.lt.s32 %p24, %r74, %r31;
@%p24 bra BB3_2;
BB3_34:
.loc 1 20 2
ret;
}
.visible .entry map_atan_double(
.param .u32 map_atan_double_param_0,
.param .u32 map_atan_double_param_1,
.param .u64 map_atan_double_param_2,
.param .u32 map_atan_double_param_3,
.param .u64 map_atan_double_param_4,
.param .u32 map_atan_double_param_5
)
{
.reg .pred %p<8>;
.reg .f32 %f<5>;
.reg .s32 %r<32>;
.reg .s64 %rd<9>;
.reg .f64 %fd<57>;
ld.param.u32 %r10, [map_atan_double_param_0];
ld.param.u32 %r11, [map_atan_double_param_1];
ld.param.u64 %rd3, [map_atan_double_param_2];
ld.param.u32 %r12, [map_atan_double_param_3];
ld.param.u64 %rd4, [map_atan_double_param_4];
ld.param.u32 %r13, [map_atan_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 21 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r30, %r14, %r15, %r16;
.loc 1 21 1
setp.ge.s32 %p1, %r30, %r11;
@%p1 bra BB4_8;
.loc 1 21 1
mov.u32 %r17, %tid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %ctaid.y;
mad.lo.s32 %r2, %r18, %r19, %r17;
.loc 1 21 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r3, %r20, %r18;
BB4_2:
.loc 1 21 1
setp.ge.s32 %p2, %r2, %r10;
@%p2 bra BB4_7;
.loc 1 21 1
mul.lo.s32 %r5, %r30, %r13;
.loc 1 21 42
mul.lo.s32 %r6, %r30, %r12;
mov.u32 %r31, %r2;
BB4_4:
.loc 1 21 1
mov.u32 %r7, %r31;
add.s32 %r21, %r7, %r5;
mul.wide.s32 %rd5, %r21, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 21 1
ld.global.f64 %fd1, [%rd6];
.loc 2 283 10
abs.f64 %fd2, %fd1;
setp.leu.f64 %p3, %fd2, 0d3FF0000000000000;
mov.f64 %fd56, %fd2;
@%p3 bra BB4_6;
// inline asm
cvt.rn.f32.f64 %f1,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd6,%f2;
// inline asm
neg.f64 %fd7, %fd2;
mov.f64 %fd8, 0d3FF0000000000000;
.loc 2 283 10
fma.rn.f64 %fd9, %fd7, %fd6, %fd8;
fma.rn.f64 %fd10, %fd9, %fd9, %fd9;
fma.rn.f64 %fd11, %fd10, %fd6, %fd6;
setp.eq.f64 %p4, %fd2, 0d7FF0000000000000;
selp.f64 %fd3, 0d0000000000000000, %fd11, %p4;
mov.f64 %fd56, %fd3;
BB4_6:
.loc 2 283 10
mov.f64 %fd4, %fd56;
mul.f64 %fd12, %fd4, %fd4;
mov.f64 %fd13, 0d3F2D3B63DBB65B49;
mov.f64 %fd14, 0dBEF53E1D2A25FF7E;
.loc 2 283 10
fma.rn.f64 %fd15, %fd14, %fd12, %fd13;
mov.f64 %fd16, 0dBF5312788DDE082E;
.loc 2 283 10
fma.rn.f64 %fd17, %fd15, %fd12, %fd16;
mov.f64 %fd18, 0d3F6F9690C8249315;
.loc 2 283 10
fma.rn.f64 %fd19, %fd17, %fd12, %fd18;
mov.f64 %fd20, 0dBF82CF5AABC7CF0D;
.loc 2 283 10
fma.rn.f64 %fd21, %fd19, %fd12, %fd20;
mov.f64 %fd22, 0d3F9162B0B2A3BFDE;
.loc 2 283 10
fma.rn.f64 %fd23, %fd21, %fd12, %fd22;
mov.f64 %fd24, 0dBF9A7256FEB6FC6B;
.loc 2 283 10
fma.rn.f64 %fd25, %fd23, %fd12, %fd24;
mov.f64 %fd26, 0d3FA171560CE4A489;
.loc 2 283 10
fma.rn.f64 %fd27, %fd25, %fd12, %fd26;
mov.f64 %fd28, 0dBFA4F44D841450E4;
.loc 2 283 10
fma.rn.f64 %fd29, %fd27, %fd12, %fd28;
mov.f64 %fd30, 0d3FA7EE3D3F36BB95;
.loc 2 283 10
fma.rn.f64 %fd31, %fd29, %fd12, %fd30;
mov.f64 %fd32, 0dBFAAD32AE04A9FD1;
.loc 2 283 10
fma.rn.f64 %fd33, %fd31, %fd12, %fd32;
mov.f64 %fd34, 0d3FAE17813D66954F;
.loc 2 283 10
fma.rn.f64 %fd35, %fd33, %fd12, %fd34;
mov.f64 %fd36, 0dBFB11089CA9A5BCD;
.loc 2 283 10
fma.rn.f64 %fd37, %fd35, %fd12, %fd36;
mov.f64 %fd38, 0d3FB3B12B2DB51738;
.loc 2 283 10
fma.rn.f64 %fd39, %fd37, %fd12, %fd38;
mov.f64 %fd40, 0dBFB745D022F8DC5C;
.loc 2 283 10
fma.rn.f64 %fd41, %fd39, %fd12, %fd40;
mov.f64 %fd42, 0d3FBC71C709DFE927;
.loc 2 283 10
fma.rn.f64 %fd43, %fd41, %fd12, %fd42;
mov.f64 %fd44, 0dBFC2492491FA1744;
.loc 2 283 10
fma.rn.f64 %fd45, %fd43, %fd12, %fd44;
mov.f64 %fd46, 0d3FC99999999840D2;
.loc 2 283 10
fma.rn.f64 %fd47, %fd45, %fd12, %fd46;
mov.f64 %fd48, 0dBFD555555555544C;
.loc 2 283 10
fma.rn.f64 %fd49, %fd47, %fd12, %fd48;
mul.f64 %fd50, %fd49, %fd12;
fma.rn.f64 %fd51, %fd50, %fd4, %fd4;
mov.f64 %fd52, 0d3FF921FB54442D18;
.loc 2 283 10
sub.f64 %fd53, %fd52, %fd51;
setp.gt.f64 %p5, %fd2, 0d3FF0000000000000;
.loc 2 283 10
selp.f64 %fd54, %fd53, %fd51, %p5;
{
.reg .b32 %temp;
mov.b64 {%r22, %temp}, %fd54;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r23}, %fd54;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r24}, %fd1;
}
and.b32 %r25, %r24, -2147483648;
or.b32 %r26, %r23, %r25;
mov.b64 %fd55, {%r22, %r26};
.loc 1 21 42
add.s32 %r27, %r7, %r6;
mul.wide.s32 %rd7, %r27, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 21 42
st.global.f64 [%rd8], %fd55;
.loc 1 21 22
add.s32 %r8, %r3, %r7;
.loc 1 21 1
setp.lt.s32 %p6, %r8, %r10;
mov.u32 %r31, %r8;
@%p6 bra BB4_4;
BB4_7:
.loc 1 21 22
mov.u32 %r28, %nctaid.x;
mad.lo.s32 %r30, %r28, %r14, %r30;
.loc 1 21 1
setp.lt.s32 %p7, %r30, %r11;
@%p7 bra BB4_2;
BB4_8:
.loc 1 21 2
ret;
}
.visible .entry map_atanh_double(
.param .u32 map_atanh_double_param_0,
.param .u32 map_atanh_double_param_1,
.param .u64 map_atanh_double_param_2,
.param .u32 map_atanh_double_param_3,
.param .u64 map_atanh_double_param_4,
.param .u32 map_atanh_double_param_5
)
{
.reg .pred %p<17>;
.reg .f32 %f<5>;
.reg .s32 %r<57>;
.reg .s64 %rd<9>;
.reg .f64 %fd<89>;
ld.param.u32 %r20, [map_atanh_double_param_0];
ld.param.u32 %r21, [map_atanh_double_param_1];
ld.param.u64 %rd2, [map_atanh_double_param_2];
ld.param.u32 %r22, [map_atanh_double_param_3];
ld.param.u64 %rd3, [map_atanh_double_param_4];
ld.param.u32 %r23, [map_atanh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 1 22 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r51, %r24, %r25, %r26;
.loc 1 22 1
setp.ge.s32 %p1, %r51, %r21;
@%p1 bra BB5_20;
.loc 1 22 1
mov.u32 %r27, %ntid.y;
.loc 1 22 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
cvta.to.global.u64 %rd6, %rd2;
BB5_2:
.loc 1 22 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r52, %r27, %r29, %r31;
.loc 1 22 1
setp.ge.s32 %p2, %r52, %r20;
@%p2 bra BB5_19;
.loc 1 22 1
mul.lo.s32 %r4, %r51, %r23;
.loc 1 22 42
mul.lo.s32 %r5, %r51, %r22;
BB5_4:
.loc 1 22 1
add.s32 %r36, %r52, %r4;
mul.wide.s32 %rd4, %r36, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 1 22 1
ld.global.f64 %fd1, [%rd5];
.loc 2 308 10
abs.f64 %fd12, %fd1;
add.f64 %fd13, %fd12, %fd12;
mov.f64 %fd14, 0d3FF0000000000000;
.loc 2 308 10
sub.f64 %fd15, %fd14, %fd12;
div.rn.f64 %fd2, %fd13, %fd15;
{
.reg .b32 %temp;
mov.b64 {%temp, %r37}, %fd2;
}
setp.lt.u32 %p3, %r37, 1071994197;
setp.lt.s32 %p4, %r37, -1076258407;
or.pred %p5, %p3, %p4;
@%p5 bra BB5_17;
add.f64 %fd3, %fd2, 0d3FF0000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r53}, %fd3;
}
{
.reg .b32 %temp;
mov.b64 {%r54, %temp}, %fd3;
}
setp.gt.f64 %p6, %fd3, 0d0000000000000000;
setp.lt.f64 %p7, %fd3, 0d7FF0000000000000;
and.pred %p8, %p6, %p7;
@%p8 bra BB5_11;
abs.f64 %fd16, %fd3;
setp.gtu.f64 %p9, %fd16, 0d7FF0000000000000;
@%p9 bra BB5_10;
setp.neu.f64 %p10, %fd3, 0d0000000000000000;
@%p10 bra BB5_9;
mov.f64 %fd88, 0dFFF0000000000000;
bra.uni BB5_18;
BB5_9:
.loc 2 308 10
setp.eq.f64 %p11, %fd3, 0d7FF0000000000000;
selp.f64 %fd88, %fd3, 0dFFF8000000000000, %p11;
bra.uni BB5_18;
BB5_10:
.loc 2 308 10
add.f64 %fd88, %fd3, %fd3;
bra.uni BB5_18;
BB5_11:
.loc 2 308 10
setp.lt.u32 %p12, %r53, 1048576;
@%p12 bra BB5_13;
mov.u32 %r55, -1023;
bra.uni BB5_14;
BB5_13:
.loc 2 308 10
mul.f64 %fd18, %fd3, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r53}, %fd18;
}
{
.reg .b32 %temp;
mov.b64 {%r54, %temp}, %fd18;
}
mov.u32 %r55, -1077;
BB5_14:
.loc 2 308 10
shr.s32 %r40, %r53, 20;
add.s32 %r56, %r55, %r40;
and.b32 %r41, %r53, -2146435073;
or.b32 %r42, %r41, 1072693248;
mov.b64 %fd87, {%r54, %r42};
setp.lt.u32 %p13, %r42, 1073127583;
@%p13 bra BB5_16;
{
.reg .b32 %temp;
mov.b64 {%r43, %temp}, %fd87;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd87;
}
add.s32 %r45, %r44, -1048576;
mov.b64 %fd87, {%r43, %r45};
add.s32 %r56, %r56, 1;
BB5_16:
add.f64 %fd19, %fd87, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd19;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd20,%f2;
// inline asm
neg.f64 %fd22, %fd19;
fma.rn.f64 %fd23, %fd22, %fd20, %fd14;
fma.rn.f64 %fd24, %fd23, %fd23, %fd23;
fma.rn.f64 %fd25, %fd24, %fd20, %fd20;
add.f64 %fd26, %fd87, 0dBFF0000000000000;
mul.f64 %fd27, %fd26, %fd25;
fma.rn.f64 %fd28, %fd26, %fd25, %fd27;
mul.f64 %fd29, %fd28, %fd28;
mov.f64 %fd30, 0d3ED0EE258B7A8B04;
mov.f64 %fd31, 0d3EB1380B3AE80F1E;
.loc 2 308 10
fma.rn.f64 %fd32, %fd31, %fd29, %fd30;
mov.f64 %fd33, 0d3EF3B2669F02676F;
.loc 2 308 10
fma.rn.f64 %fd34, %fd32, %fd29, %fd33;
mov.f64 %fd35, 0d3F1745CBA9AB0956;
.loc 2 308 10
fma.rn.f64 %fd36, %fd34, %fd29, %fd35;
mov.f64 %fd37, 0d3F3C71C72D1B5154;
.loc 2 308 10
fma.rn.f64 %fd38, %fd36, %fd29, %fd37;
mov.f64 %fd39, 0d3F624924923BE72D;
.loc 2 308 10
fma.rn.f64 %fd40, %fd38, %fd29, %fd39;
mov.f64 %fd41, 0d3F8999999999A3C4;
.loc 2 308 10
fma.rn.f64 %fd42, %fd40, %fd29, %fd41;
mov.f64 %fd43, 0d3FB5555555555554;
.loc 2 308 10
fma.rn.f64 %fd44, %fd42, %fd29, %fd43;
sub.f64 %fd45, %fd26, %fd28;
add.f64 %fd46, %fd45, %fd45;
neg.f64 %fd47, %fd28;
fma.rn.f64 %fd48, %fd47, %fd26, %fd46;
mul.f64 %fd49, %fd25, %fd48;
mul.f64 %fd50, %fd44, %fd29;
fma.rn.f64 %fd51, %fd50, %fd28, %fd49;
cvt.rn.f64.s32 %fd52, %r56;
mov.f64 %fd53, 0d3FE62E42FEFA39EF;
.loc 2 308 10
fma.rn.f64 %fd54, %fd52, %fd53, %fd28;
neg.s32 %r46, %r56;
cvt.rn.f64.s32 %fd55, %r46;
fma.rn.f64 %fd56, %fd55, %fd53, %fd54;
sub.f64 %fd57, %fd56, %fd28;
sub.f64 %fd58, %fd51, %fd57;
mov.f64 %fd59, 0d3C7ABC9E3B39803F;
.loc 2 308 10
fma.rn.f64 %fd60, %fd52, %fd59, %fd58;
add.f64 %fd88, %fd54, %fd60;
bra.uni BB5_18;
BB5_17:
.loc 2 308 10
add.f64 %fd61, %fd2, 0d4000000000000000;
div.rn.f64 %fd62, %fd2, %fd61;
neg.f64 %fd63, %fd2;
mul.f64 %fd64, %fd62, %fd63;
add.f64 %fd65, %fd2, %fd64;
mul.f64 %fd66, %fd65, %fd65;
mov.f64 %fd67, 0d3ED087FFCEB2DC44;
mov.f64 %fd68, 0d3EB372FB2FBE14B5;
.loc 2 308 10
fma.rn.f64 %fd69, %fd68, %fd66, %fd67;
mov.f64 %fd70, 0d3EF3B9FF890F468C;
.loc 2 308 10
fma.rn.f64 %fd71, %fd69, %fd66, %fd70;
mov.f64 %fd72, 0d3F17457EFD51BAF8;
.loc 2 308 10
fma.rn.f64 %fd73, %fd71, %fd66, %fd72;
mov.f64 %fd74, 0d3F3C71C8DE3CE825;
.loc 2 308 10
fma.rn.f64 %fd75, %fd73, %fd66, %fd74;
mov.f64 %fd76, 0d3F6249248FA4661F;
.loc 2 308 10
fma.rn.f64 %fd77, %fd75, %fd66, %fd76;
mov.f64 %fd78, 0d3F899999999D70C4;
.loc 2 308 10
fma.rn.f64 %fd79, %fd77, %fd66, %fd78;
mov.f64 %fd80, 0d3FB5555555555462;
.loc 2 308 10
fma.rn.f64 %fd81, %fd79, %fd66, %fd80;
mul.f64 %fd82, %fd81, %fd66;
fma.rn.f64 %fd83, %fd82, %fd65, %fd64;
add.f64 %fd88, %fd83, %fd2;
BB5_18:
{
.reg .b32 %temp;
mov.b64 {%temp, %r47}, %fd1;
}
setp.lt.s32 %p14, %r47, 0;
mul.f64 %fd84, %fd88, 0d3FE0000000000000;
neg.f64 %fd85, %fd84;
selp.f64 %fd86, %fd85, %fd84, %p14;
.loc 1 22 42
add.s32 %r48, %r52, %r5;
mul.wide.s32 %rd7, %r48, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 1 22 42
st.global.f64 [%rd8], %fd86;
.loc 1 22 22
add.s32 %r52, %r2, %r52;
.loc 1 22 1
setp.lt.s32 %p15, %r52, %r20;
@%p15 bra BB5_4;
BB5_19:
.loc 1 22 22
mov.u32 %r49, %nctaid.x;
mad.lo.s32 %r51, %r49, %r24, %r51;
.loc 1 22 1
setp.lt.s32 %p16, %r51, %r21;
@%p16 bra BB5_2;
BB5_20:
.loc 1 22 2
ret;
}
.visible .entry map_cbrt_double(
.param .u32 map_cbrt_double_param_0,
.param .u32 map_cbrt_double_param_1,
.param .u64 map_cbrt_double_param_2,
.param .u32 map_cbrt_double_param_3,
.param .u64 map_cbrt_double_param_4,
.param .u32 map_cbrt_double_param_5
)
{
.reg .pred %p<9>;
.reg .f32 %f<7>;
.reg .s32 %r<54>;
.reg .s64 %rd<9>;
.reg .f64 %fd<26>;
ld.param.u32 %r20, [map_cbrt_double_param_0];
ld.param.u32 %r21, [map_cbrt_double_param_1];
ld.param.u64 %rd3, [map_cbrt_double_param_2];
ld.param.u32 %r22, [map_cbrt_double_param_3];
ld.param.u64 %rd4, [map_cbrt_double_param_4];
ld.param.u32 %r23, [map_cbrt_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 23 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r48, %r24, %r25, %r26;
.loc 1 23 1
setp.ge.s32 %p1, %r48, %r21;
@%p1 bra BB6_13;
.loc 1 23 1
mov.u32 %r27, %tid.y;
mov.u32 %r28, %ntid.y;
mov.u32 %r29, %ctaid.y;
mad.lo.s32 %r2, %r28, %r29, %r27;
.loc 1 23 22
mov.u32 %r30, %nctaid.y;
mul.lo.s32 %r3, %r30, %r28;
BB6_2:
.loc 1 23 1
setp.ge.s32 %p2, %r2, %r20;
@%p2 bra BB6_12;
.loc 1 23 1
mul.lo.s32 %r5, %r48, %r23;
.loc 1 23 42
mul.lo.s32 %r6, %r48, %r22;
mov.u32 %r49, %r2;
BB6_4:
.loc 1 23 1
mov.u32 %r7, %r49;
add.s32 %r31, %r7, %r5;
mul.wide.s32 %rd5, %r31, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 23 1
ld.global.f64 %fd1, [%rd6];
.loc 2 318 10
setp.eq.f64 %p3, %fd1, 0d0000000000000000;
@%p3 bra BB6_10;
abs.f64 %fd2, %fd1;
setp.geu.f64 %p4, %fd2, 0d7FF0000000000000;
@%p4 bra BB6_10;
{
.reg .b32 %temp;
mov.b64 {%r50, %temp}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd2;
}
shr.u32 %r32, %r51, 20;
and.b32 %r52, %r32, 2047;
setp.eq.s32 %p5, %r52, 0;
@%p5 bra BB6_8;
mov.u32 %r53, 0;
bra.uni BB6_9;
BB6_8:
.loc 2 318 10
mul.f64 %fd6, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%r50, %temp}, %fd6;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd6;
}
shr.u32 %r35, %r51, 20;
and.b32 %r52, %r35, 2047;
mov.u32 %r53, 18;
BB6_9:
.loc 2 318 10
add.s32 %r36, %r52, -1022;
cvt.rn.f32.s32 %f1, %r36;
mul.f32 %f2, %f1, 0f3EAAAAAB;
cvt.rni.s32.f32 %r37, %f2;
mad.lo.s32 %r38, %r37, -3145728, %r51;
mov.b64 %fd7, {%r50, %r38};
cvt.rn.f32.f64 %f3, %fd7;
lg2.approx.f32 %f4, %f3;
mul.f32 %f5, %f4, 0fBEAAAAAB;
ex2.approx.f32 %f6, %f5;
cvt.f64.f32 %fd8, %f6;
mul.f64 %fd9, %fd8, %fd8;
neg.f64 %fd10, %fd7;
mul.f64 %fd11, %fd8, %fd10;
mov.f64 %fd12, 0d3FF0000000000000;
.loc 2 318 10
fma.rn.f64 %fd13, %fd9, %fd11, %fd12;
mul.f64 %fd14, %fd8, 0d3FD5555555555555;
fma.rn.f64 %fd15, %fd13, %fd14, %fd8;
mul.f64 %fd16, %fd7, %fd15;
mul.f64 %fd17, %fd16, %fd15;
mul.f64 %fd18, %fd17, %fd17;
div.rn.f64 %fd19, %fd7, %fd18;
sub.f64 %fd20, %fd17, %fd19;
mov.f64 %fd21, 0dBFD5555555555555;
.loc 2 318 10
fma.rn.f64 %fd22, %fd20, %fd21, %fd17;
{
.reg .b32 %temp;
mov.b64 {%r39, %temp}, %fd22;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd22;
}
sub.s32 %r41, %r37, %r53;
shl.b32 %r42, %r41, 20;
add.s32 %r43, %r40, %r42;
mov.b64 %fd23, {%r39, %r43};
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd1;
}
setp.lt.s32 %p6, %r44, 0;
neg.f64 %fd24, %fd23;
selp.f64 %fd25, %fd24, %fd23, %p6;
bra.uni BB6_11;
BB6_10:
.loc 2 318 10
add.f64 %fd25, %fd1, %fd1;
BB6_11:
.loc 1 23 42
add.s32 %r45, %r7, %r6;
mul.wide.s32 %rd7, %r45, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 23 42
st.global.f64 [%rd8], %fd25;
.loc 1 23 22
add.s32 %r18, %r3, %r7;
.loc 1 23 1
setp.lt.s32 %p7, %r18, %r20;
mov.u32 %r49, %r18;
@%p7 bra BB6_4;
BB6_12:
.loc 1 23 22
mov.u32 %r46, %nctaid.x;
mad.lo.s32 %r48, %r46, %r24, %r48;
.loc 1 23 1
setp.lt.s32 %p8, %r48, %r21;
@%p8 bra BB6_2;
BB6_13:
.loc 1 23 2
ret;
}
.visible .entry map_ceil_double(
.param .u32 map_ceil_double_param_0,
.param .u32 map_ceil_double_param_1,
.param .u64 map_ceil_double_param_2,
.param .u32 map_ceil_double_param_3,
.param .u64 map_ceil_double_param_4,
.param .u32 map_ceil_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_ceil_double_param_0];
ld.param.u32 %r13, [map_ceil_double_param_1];
ld.param.u64 %rd3, [map_ceil_double_param_2];
ld.param.u32 %r14, [map_ceil_double_param_3];
ld.param.u64 %rd4, [map_ceil_double_param_4];
ld.param.u32 %r15, [map_ceil_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 24 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 24 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB7_6;
.loc 1 24 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 24 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 24 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB7_2:
.loc 1 24 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB7_5;
.loc 1 24 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 24 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB7_4:
.loc 1 24 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 24 1
ld.global.f64 %fd1, [%rd6];
.loc 3 2795 10
cvt.rpi.f64.f64 %fd2, %fd1;
.loc 1 24 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 24 42
st.global.f64 [%rd8], %fd2;
.loc 1 24 22
add.s32 %r10, %r5, %r9;
.loc 1 24 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB7_4;
BB7_5:
.loc 1 24 22
add.s32 %r25, %r4, %r25;
.loc 1 24 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB7_2;
BB7_6:
.loc 1 24 2
ret;
}
.visible .entry map_cos_double(
.param .u32 map_cos_double_param_0,
.param .u32 map_cos_double_param_1,
.param .u64 map_cos_double_param_2,
.param .u32 map_cos_double_param_3,
.param .u64 map_cos_double_param_4,
.param .u32 map_cos_double_param_5
)
{
.local .align 4 .b8 __local_depot8[4];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<10>;
.reg .s32 %r<41>;
.reg .s64 %rd<16>;
.reg .f64 %fd<43>;
mov.u64 %SPL, __local_depot8;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r14, [map_cos_double_param_0];
ld.param.u32 %r15, [map_cos_double_param_1];
ld.param.u64 %rd1, [map_cos_double_param_2];
ld.param.u32 %r16, [map_cos_double_param_3];
ld.param.u64 %rd2, [map_cos_double_param_4];
ld.param.u32 %r17, [map_cos_double_param_5];
.loc 1 25 1
mov.u32 %r18, %ntid.x;
mov.u32 %r19, %ctaid.x;
mov.u32 %r20, %tid.x;
mad.lo.s32 %r38, %r18, %r19, %r20;
.loc 1 25 1
setp.ge.s32 %p1, %r38, %r15;
@%p1 bra BB8_14;
.loc 1 25 1
mov.u32 %r21, %ntid.y;
.loc 1 25 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r2, %r22, %r21;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd13, %rd1;
BB8_2:
.loc 1 25 1
mov.u32 %r23, %ctaid.y;
mov.u32 %r25, %tid.y;
mad.lo.s32 %r39, %r21, %r23, %r25;
.loc 1 25 1
setp.ge.s32 %p2, %r39, %r14;
@%p2 bra BB8_13;
.loc 1 25 1
mul.lo.s32 %r4, %r38, %r17;
.loc 1 25 42
mul.lo.s32 %r5, %r38, %r16;
BB8_4:
.loc 1 25 1
add.s32 %r30, %r39, %r4;
mul.wide.s32 %rd4, %r30, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd40, [%rd5];
.loc 2 203 10
abs.f64 %fd14, %fd40;
setp.neu.f64 %p3, %fd14, 0d7FF0000000000000;
@%p3 bra BB8_6;
mov.f64 %fd15, 0d0000000000000000;
.loc 2 203 10
mul.rn.f64 %fd40, %fd40, %fd15;
BB8_6:
add.u64 %rd6, %SP, 0;
.loc 2 203 10
mul.f64 %fd16, %fd40, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r40, %fd16;
cvta.to.local.u64 %rd7, %rd6;
.loc 2 203 10
st.local.u32 [%rd7], %r40;
cvt.rn.f64.s32 %fd17, %r40;
neg.f64 %fd18, %fd17;
mov.f64 %fd19, 0d3FF921FB54442D18;
.loc 2 203 10
fma.rn.f64 %fd20, %fd18, %fd19, %fd40;
mov.f64 %fd21, 0d3C91A62633145C00;
.loc 2 203 10
fma.rn.f64 %fd22, %fd18, %fd21, %fd20;
mov.f64 %fd23, 0d397B839A252049C0;
.loc 2 203 10
fma.rn.f64 %fd41, %fd18, %fd23, %fd22;
abs.f64 %fd24, %fd40;
setp.leu.f64 %p4, %fd24, 0d41E0000000000000;
@%p4 bra BB8_8;
// Callseq Start 0
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd40;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 2 203 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd41, [retval0+0];
}
// Callseq End 0
ld.local.u32 %r40, [%rd7];
BB8_8:
add.s32 %r11, %r40, 1;
shl.b32 %r31, %r11, 3;
and.b32 %r32, %r31, 8;
and.b32 %r33, %r11, 1;
setp.eq.b32 %p5, %r33, 1;
not.pred %p6, %p5;
selp.f64 %fd25, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p6;
mul.wide.u32 %rd10, %r32, 8;
mov.u64 %rd11, __cudart_sin_cos_coeffs;
add.s64 %rd12, %rd11, %rd10;
.loc 2 203 10
ld.const.f64 %fd26, [%rd12+8];
mul.rn.f64 %fd7, %fd41, %fd41;
fma.rn.f64 %fd27, %fd25, %fd7, %fd26;
ld.const.f64 %fd28, [%rd12+16];
fma.rn.f64 %fd29, %fd27, %fd7, %fd28;
ld.const.f64 %fd30, [%rd12+24];
fma.rn.f64 %fd31, %fd29, %fd7, %fd30;
ld.const.f64 %fd32, [%rd12+32];
fma.rn.f64 %fd33, %fd31, %fd7, %fd32;
ld.const.f64 %fd34, [%rd12+40];
fma.rn.f64 %fd35, %fd33, %fd7, %fd34;
ld.const.f64 %fd36, [%rd12+48];
fma.rn.f64 %fd8, %fd35, %fd7, %fd36;
fma.rn.f64 %fd42, %fd8, %fd41, %fd41;
@%p6 bra BB8_10;
mov.f64 %fd37, 0d3FF0000000000000;
.loc 2 203 10
fma.rn.f64 %fd42, %fd8, %fd7, %fd37;
BB8_10:
and.b32 %r34, %r11, 2;
setp.eq.s32 %p7, %r34, 0;
@%p7 bra BB8_12;
mov.f64 %fd38, 0d0000000000000000;
mov.f64 %fd39, 0dBFF0000000000000;
.loc 2 203 10
fma.rn.f64 %fd42, %fd42, %fd39, %fd38;
BB8_12:
.loc 1 25 42
add.s32 %r35, %r39, %r5;
mul.wide.s32 %rd14, %r35, 8;
add.s64 %rd15, %rd13, %rd14;
st.global.f64 [%rd15], %fd42;
.loc 1 25 22
add.s32 %r39, %r2, %r39;
.loc 1 25 1
setp.lt.s32 %p8, %r39, %r14;
@%p8 bra BB8_4;
BB8_13:
.loc 1 25 22
mov.u32 %r36, %nctaid.x;
mad.lo.s32 %r38, %r36, %r18, %r38;
.loc 1 25 1
setp.lt.s32 %p9, %r38, %r15;
@%p9 bra BB8_2;
BB8_14:
.loc 1 25 2
ret;
}
.visible .entry map_cosh_double(
.param .u32 map_cosh_double_param_0,
.param .u32 map_cosh_double_param_1,
.param .u64 map_cosh_double_param_2,
.param .u32 map_cosh_double_param_3,
.param .u64 map_cosh_double_param_4,
.param .u32 map_cosh_double_param_5
)
{
.reg .pred %p<8>;
.reg .f32 %f<5>;
.reg .s32 %r<42>;
.reg .s64 %rd<9>;
.reg .f64 %fd<50>;
ld.param.u32 %r15, [map_cosh_double_param_0];
ld.param.u32 %r16, [map_cosh_double_param_1];
ld.param.u64 %rd3, [map_cosh_double_param_2];
ld.param.u32 %r17, [map_cosh_double_param_3];
ld.param.u64 %rd4, [map_cosh_double_param_4];
ld.param.u32 %r18, [map_cosh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 26 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r39, %r19, %r20, %r21;
.loc 1 26 1
setp.ge.s32 %p1, %r39, %r16;
@%p1 bra BB9_12;
.loc 1 26 1
mov.u32 %r22, %tid.y;
mov.u32 %r23, %ntid.y;
mov.u32 %r24, %ctaid.y;
mad.lo.s32 %r2, %r23, %r24, %r22;
.loc 1 26 22
mov.u32 %r25, %nctaid.y;
mul.lo.s32 %r3, %r25, %r23;
BB9_2:
.loc 1 26 1
setp.ge.s32 %p2, %r2, %r15;
@%p2 bra BB9_11;
.loc 1 26 1
mul.lo.s32 %r5, %r39, %r18;
.loc 1 26 42
mul.lo.s32 %r6, %r39, %r17;
mov.u32 %r40, %r2;
BB9_4:
.loc 1 26 1
mov.u32 %r7, %r40;
add.s32 %r26, %r7, %r5;
mul.wide.s32 %rd5, %r26, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 26 1
ld.global.f64 %fd1, [%rd6];
.loc 2 263 10
abs.f64 %fd2, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r27}, %fd2;
}
setp.lt.u32 %p3, %r27, 1082536911;
@%p3 bra BB9_6;
setp.gt.f64 %p4, %fd2, 0d0000000000000000;
selp.f64 %fd9, 0d7FF0000000000000, %fd1, %p4;
add.f64 %fd49, %fd9, %fd9;
bra.uni BB9_10;
BB9_6:
.loc 2 263 10
mul.f64 %fd10, %fd2, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd11, %fd10;
cvt.rzi.s32.f64 %r8, %fd11;
mov.f64 %fd12, 0dBFE62E42FEFA39EF;
.loc 2 263 10
fma.rn.f64 %fd13, %fd11, %fd12, %fd2;
mov.f64 %fd14, 0dBC7ABC9E3B39803F;
.loc 2 263 10
fma.rn.f64 %fd15, %fd11, %fd14, %fd13;
mov.f64 %fd16, 0d3E928A27E30F5561;
mov.f64 %fd17, 0d3E5AE6449C0686C0;
.loc 2 263 10
fma.rn.f64 %fd18, %fd17, %fd15, %fd16;
mov.f64 %fd19, 0d3EC71DE8E6486D6B;
.loc 2 263 10
fma.rn.f64 %fd20, %fd18, %fd15, %fd19;
mov.f64 %fd21, 0d3EFA019A6B2464C5;
.loc 2 263 10
fma.rn.f64 %fd22, %fd20, %fd15, %fd21;
mov.f64 %fd23, 0d3F2A01A0171064A5;
.loc 2 263 10
fma.rn.f64 %fd24, %fd22, %fd15, %fd23;
mov.f64 %fd25, 0d3F56C16C17F29C8D;
.loc 2 263 10
fma.rn.f64 %fd26, %fd24, %fd15, %fd25;
mov.f64 %fd27, 0d3F8111111111A24E;
.loc 2 263 10
fma.rn.f64 %fd28, %fd26, %fd15, %fd27;
mov.f64 %fd29, 0d3FA555555555211D;
.loc 2 263 10
fma.rn.f64 %fd30, %fd28, %fd15, %fd29;
mov.f64 %fd31, 0d3FC5555555555530;
.loc 2 263 10
fma.rn.f64 %fd32, %fd30, %fd15, %fd31;
mov.f64 %fd33, 0d3FE0000000000005;
.loc 2 263 10
fma.rn.f64 %fd34, %fd32, %fd15, %fd33;
mov.f64 %fd35, 0d3FF0000000000000;
.loc 2 263 10
fma.rn.f64 %fd36, %fd34, %fd15, %fd35;
fma.rn.f64 %fd48, %fd36, %fd15, %fd35;
add.s32 %r9, %r8, -2;
abs.s32 %r28, %r9;
setp.lt.s32 %p5, %r28, 1023;
@%p5 bra BB9_8;
add.s32 %r29, %r8, 2044;
shl.b32 %r30, %r29, 19;
and.b32 %r31, %r30, -1048576;
shl.b32 %r32, %r29, 20;
sub.s32 %r41, %r32, %r31;
mov.u32 %r33, 0;
.loc 2 263 10
mov.b64 %fd37, {%r33, %r31};
mul.f64 %fd48, %fd48, %fd37;
bra.uni BB9_9;
BB9_8:
.loc 2 263 10
shl.b32 %r34, %r9, 20;
add.s32 %r41, %r34, 1072693248;
BB9_9:
mov.u32 %r35, 0;
.loc 2 263 10
mov.b64 %fd40, {%r35, %r41};
mul.f64 %fd38, %fd48, %fd40;
// inline asm
cvt.rn.f32.f64 %f1,%fd38;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd39,%f2;
// inline asm
neg.f64 %fd41, %fd38;
fma.rn.f64 %fd43, %fd41, %fd39, %fd35;
fma.rn.f64 %fd44, %fd43, %fd43, %fd43;
fma.rn.f64 %fd45, %fd44, %fd39, %fd39;
mul.f64 %fd46, %fd45, 0d3FC0000000000000;
mov.f64 %fd47, 0d4000000000000000;
.loc 2 263 10
fma.rn.f64 %fd49, %fd47, %fd38, %fd46;
BB9_10:
.loc 1 26 42
add.s32 %r36, %r7, %r6;
mul.wide.s32 %rd7, %r36, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 26 42
st.global.f64 [%rd8], %fd49;
.loc 1 26 22
add.s32 %r13, %r3, %r7;
.loc 1 26 1
setp.lt.s32 %p6, %r13, %r15;
mov.u32 %r40, %r13;
@%p6 bra BB9_4;
BB9_11:
.loc 1 26 22
mov.u32 %r37, %nctaid.x;
mad.lo.s32 %r39, %r37, %r19, %r39;
.loc 1 26 1
setp.lt.s32 %p7, %r39, %r16;
@%p7 bra BB9_2;
BB9_12:
.loc 1 26 2
ret;
}
.visible .entry map_cospi_double(
.param .u32 map_cospi_double_param_0,
.param .u32 map_cospi_double_param_1,
.param .u64 map_cospi_double_param_2,
.param .u32 map_cospi_double_param_3,
.param .u64 map_cospi_double_param_4,
.param .u32 map_cospi_double_param_5
)
{
.reg .pred %p<9>;
.reg .s32 %r<43>;
.reg .s64 %rd<13>;
.reg .f64 %fd<37>;
ld.param.u32 %r11, [map_cospi_double_param_0];
ld.param.u32 %r12, [map_cospi_double_param_1];
ld.param.u64 %rd2, [map_cospi_double_param_2];
ld.param.u32 %r13, [map_cospi_double_param_3];
ld.param.u64 %rd3, [map_cospi_double_param_4];
ld.param.u32 %r14, [map_cospi_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 1 27 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r41, %r15, %r16, %r17;
.loc 1 27 1
setp.ge.s32 %p1, %r41, %r12;
@%p1 bra BB10_12;
.loc 1 27 1
mov.u32 %r18, %ntid.y;
.loc 1 27 22
mov.u32 %r19, %nctaid.y;
mul.lo.s32 %r2, %r19, %r18;
cvta.to.global.u64 %rd10, %rd2;
BB10_2:
.loc 1 27 1
mov.u32 %r20, %ctaid.y;
mov.u32 %r22, %tid.y;
mad.lo.s32 %r42, %r18, %r20, %r22;
.loc 1 27 1
setp.ge.s32 %p2, %r42, %r11;
@%p2 bra BB10_11;
.loc 1 27 1
mul.lo.s32 %r4, %r41, %r14;
.loc 1 27 42
mul.lo.s32 %r5, %r41, %r13;
BB10_4:
.loc 1 27 1
add.s32 %r27, %r42, %r4;
mul.wide.s32 %rd4, %r27, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 1 27 1
ld.global.f64 %fd35, [%rd5];
.loc 2 213 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r28}, %fd35;
}
shl.b32 %r29, %r28, 1;
setp.lt.u32 %p3, %r29, -2038431743;
@%p3 bra BB10_6;
mov.f64 %fd11, 0d0000000000000000;
.loc 2 213 10
mul.rn.f64 %fd35, %fd35, %fd11;
BB10_6:
{
.reg .b32 %temp;
mov.b64 {%temp, %r30}, %fd35;
}
add.s32 %r31, %r30, 1048576;
{
.reg .b32 %temp;
mov.b64 {%r32, %temp}, %fd35;
}
mov.b64 %fd12, {%r32, %r31};
cvt.rni.f64.f64 %fd13, %fd12;
cvt.rzi.s64.f64 %rd6, %fd13;
cvt.u32.u64 %r33, %rd6;
neg.f64 %fd14, %fd13;
mov.f64 %fd15, 0d3FE0000000000000;
.loc 2 213 10
fma.rn.f64 %fd16, %fd14, %fd15, %fd35;
mul.f64 %fd17, %fd16, 0d3CA1A62633145C07;
mov.f64 %fd18, 0d400921FB54442D18;
.loc 2 213 10
fma.rn.f64 %fd19, %fd16, %fd18, %fd17;
add.s32 %r8, %r33, 1;
shl.b32 %r34, %r8, 3;
and.b32 %r35, %r34, 8;
mul.rn.f64 %fd4, %fd19, %fd19;
and.b32 %r36, %r8, 1;
setp.eq.b32 %p4, %r36, 1;
not.pred %p5, %p4;
selp.f64 %fd20, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p5;
mul.wide.u32 %rd7, %r35, 8;
mov.u64 %rd8, __cudart_sin_cos_coeffs;
add.s64 %rd9, %rd8, %rd7;
.loc 2 213 10
ld.const.f64 %fd21, [%rd9+8];
fma.rn.f64 %fd22, %fd20, %fd4, %fd21;
ld.const.f64 %fd23, [%rd9+16];
fma.rn.f64 %fd24, %fd22, %fd4, %fd23;
ld.const.f64 %fd25, [%rd9+24];
fma.rn.f64 %fd26, %fd24, %fd4, %fd25;
ld.const.f64 %fd27, [%rd9+32];
fma.rn.f64 %fd28, %fd26, %fd4, %fd27;
ld.const.f64 %fd29, [%rd9+40];
fma.rn.f64 %fd30, %fd28, %fd4, %fd29;
ld.const.f64 %fd31, [%rd9+48];
fma.rn.f64 %fd5, %fd30, %fd4, %fd31;
fma.rn.f64 %fd36, %fd5, %fd19, %fd19;
@%p5 bra BB10_8;
mov.f64 %fd32, 0d3FF0000000000000;
.loc 2 213 10
fma.rn.f64 %fd36, %fd5, %fd4, %fd32;
BB10_8:
and.b32 %r37, %r8, 2;
setp.eq.s32 %p6, %r37, 0;
@%p6 bra BB10_10;
mov.f64 %fd33, 0d0000000000000000;
mov.f64 %fd34, 0dBFF0000000000000;
.loc 2 213 10
fma.rn.f64 %fd36, %fd36, %fd34, %fd33;
BB10_10:
.loc 1 27 42
add.s32 %r38, %r42, %r5;
mul.wide.s32 %rd11, %r38, 8;
add.s64 %rd12, %rd10, %rd11;
.loc 1 27 42
st.global.f64 [%rd12], %fd36;
.loc 1 27 22
add.s32 %r42, %r2, %r42;
.loc 1 27 1
setp.lt.s32 %p7, %r42, %r11;
@%p7 bra BB10_4;
BB10_11:
.loc 1 27 22
mov.u32 %r39, %nctaid.x;
mad.lo.s32 %r41, %r39, %r15, %r41;
.loc 1 27 1
setp.lt.s32 %p8, %r41, %r12;
@%p8 bra BB10_2;
BB10_12:
.loc 1 27 2
ret;
}
.visible .entry map_erfc_double(
.param .u32 map_erfc_double_param_0,
.param .u32 map_erfc_double_param_1,
.param .u64 map_erfc_double_param_2,
.param .u32 map_erfc_double_param_3,
.param .u64 map_erfc_double_param_4,
.param .u32 map_erfc_double_param_5
)
{
.reg .pred %p<9>;
.reg .f32 %f<9>;
.reg .s32 %r<47>;
.reg .s64 %rd<9>;
.reg .f64 %fd<127>;
ld.param.u32 %r13, [map_erfc_double_param_0];
ld.param.u32 %r14, [map_erfc_double_param_1];
ld.param.u64 %rd1, [map_erfc_double_param_2];
ld.param.u32 %r15, [map_erfc_double_param_3];
ld.param.u64 %rd2, [map_erfc_double_param_4];
ld.param.u32 %r16, [map_erfc_double_param_5];
.loc 1 28 1
mov.u32 %r17, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r44, %r17, %r18, %r19;
.loc 1 28 1
setp.ge.s32 %p1, %r44, %r14;
@%p1 bra BB11_11;
.loc 1 28 1
mov.u32 %r20, %ntid.y;
.loc 1 28 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r2, %r21, %r20;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB11_2:
.loc 1 28 1
mov.u32 %r22, %ctaid.y;
mov.u32 %r24, %tid.y;
mad.lo.s32 %r45, %r20, %r22, %r24;
.loc 1 28 1
setp.ge.s32 %p2, %r45, %r13;
@%p2 bra BB11_10;
BB11_3:
.loc 1 28 1
mul.lo.s32 %r43, %r44, %r16;
add.s32 %r29, %r45, %r43;
mul.wide.s32 %rd4, %r29, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 1 28 1
ld.global.f64 %fd1, [%rd5];
.loc 2 399 10
abs.f64 %fd2, %fd1;
setp.gtu.f64 %p3, %fd2, 0d7FF0000000000000;
@%p3 bra BB11_8;
abs.f64 %fd124, %fd1;
add.f64 %fd15, %fd124, 0dC010000000000000;
mov.f64 %fd16, 0dC010000000000000;
.loc 2 399 10
add.f64 %fd11, %fd124, 0d4010000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd11;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd12,%f2;
// inline asm
neg.f64 %fd17, %fd11;
mov.f64 %fd18, 0d3FF0000000000000;
.loc 2 399 10
fma.rn.f64 %fd19, %fd17, %fd12, %fd18;
fma.rn.f64 %fd20, %fd19, %fd19, %fd19;
fma.rn.f64 %fd21, %fd20, %fd12, %fd12;
mul.f64 %fd22, %fd15, %fd21;
add.rn.f64 %fd23, %fd22, %fd18;
fma.rn.f64 %fd24, %fd16, %fd23, %fd124;
neg.f64 %fd25, %fd22;
fma.rn.f64 %fd26, %fd25, %fd124, %fd24;
fma.rn.f64 %fd27, %fd21, %fd26, %fd22;
mov.f64 %fd28, 0dBE44E1C6FD03D328;
mov.f64 %fd29, 0dBDF8774AD4E0BFD7;
.loc 2 399 10
fma.rn.f64 %fd30, %fd29, %fd27, %fd28;
mov.f64 %fd31, 0dBE4330149F7A56B6;
.loc 2 399 10
fma.rn.f64 %fd32, %fd30, %fd27, %fd31;
mov.f64 %fd33, 0d3E7BEDDED8376273;
.loc 2 399 10
fma.rn.f64 %fd34, %fd32, %fd27, %fd33;
mov.f64 %fd35, 0d3E6F9254C3ABF22B;
.loc 2 399 10
fma.rn.f64 %fd36, %fd34, %fd27, %fd35;
mov.f64 %fd37, 0dBEAB9068C2148CF0;
.loc 2 399 10
fma.rn.f64 %fd38, %fd36, %fd27, %fd37;
mov.f64 %fd39, 0d3E94C6454DB34009;
.loc 2 399 10
fma.rn.f64 %fd40, %fd38, %fd27, %fd39;
mov.f64 %fd41, 0d3ED7F1C378F2311D;
.loc 2 399 10
fma.rn.f64 %fd42, %fd40, %fd27, %fd41;
mov.f64 %fd43, 0dBEE78E051C6D5C58;
.loc 2 399 10
fma.rn.f64 %fd44, %fd42, %fd27, %fd43;
mov.f64 %fd45, 0dBEF995B4EAD14A90;
.loc 2 399 10
fma.rn.f64 %fd46, %fd44, %fd27, %fd45;
mov.f64 %fd47, 0d3F23BE27CF0A29B2;
.loc 2 399 10
fma.rn.f64 %fd48, %fd46, %fd27, %fd47;
mov.f64 %fd49, 0dBF2A1DEF3E81672E;
.loc 2 399 10
fma.rn.f64 %fd50, %fd48, %fd27, %fd49;
mov.f64 %fd51, 0dBF48D4ABE68C1713;
.loc 2 399 10
fma.rn.f64 %fd52, %fd50, %fd27, %fd51;
mov.f64 %fd53, 0d3F749C67210DD6B4;
.loc 2 399 10
fma.rn.f64 %fd54, %fd52, %fd27, %fd53;
mov.f64 %fd55, 0dBF9096238568E357;
.loc 2 399 10
fma.rn.f64 %fd56, %fd54, %fd27, %fd55;
mov.f64 %fd57, 0d3FA3079EDF8C2DC9;
.loc 2 399 10
fma.rn.f64 %fd58, %fd56, %fd27, %fd57;
mov.f64 %fd59, 0dBFB0FB06DFF601FC;
.loc 2 399 10
fma.rn.f64 %fd60, %fd58, %fd27, %fd59;
mov.f64 %fd61, 0d3FB7FEE004DFBCDC;
.loc 2 399 10
fma.rn.f64 %fd62, %fd60, %fd27, %fd61;
mov.f64 %fd63, 0dBFB9DDB23C3DB8C6;
.loc 2 399 10
fma.rn.f64 %fd64, %fd62, %fd27, %fd63;
mov.f64 %fd65, 0d3FB16ECEFCFA5FDA;
.loc 2 399 10
fma.rn.f64 %fd66, %fd64, %fd27, %fd65;
mov.f64 %fd67, 0d3F8F7F5DF66FB6D6;
.loc 2 399 10
fma.rn.f64 %fd68, %fd66, %fd27, %fd67;
mov.f64 %fd69, 0dBFC1DF1AD154A29D;
.loc 2 399 10
fma.rn.f64 %fd70, %fd68, %fd27, %fd69;
mov.f64 %fd71, 0d3FF3BA5916E9FD7F;
.loc 2 399 10
fma.rn.f64 %fd72, %fd70, %fd27, %fd71;
mov.f64 %fd73, 0d4000000000000000;
.loc 2 399 10
fma.rn.f64 %fd13, %fd73, %fd124, %fd18;
// inline asm
cvt.rn.f32.f64 %f5,%fd13;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd14,%f6;
// inline asm
neg.f64 %fd74, %fd13;
fma.rn.f64 %fd75, %fd74, %fd14, %fd18;
fma.rn.f64 %fd76, %fd75, %fd75, %fd75;
fma.rn.f64 %fd77, %fd76, %fd14, %fd14;
mul.f64 %fd78, %fd72, %fd77;
mul.f64 %fd79, %fd78, 0dC000000000000000;
fma.rn.f64 %fd80, %fd124, %fd79, %fd72;
neg.f64 %fd81, %fd78;
add.rn.f64 %fd82, %fd80, %fd81;
fma.rn.f64 %fd3, %fd82, %fd77, %fd78;
neg.f64 %fd83, %fd124;
mul.f64 %fd4, %fd124, %fd83;
mul.f64 %fd84, %fd4, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd85, %fd84;
cvt.rzi.s32.f64 %r7, %fd85;
mov.f64 %fd86, 0dBFE62E42FEFA39EF;
.loc 2 399 10
fma.rn.f64 %fd87, %fd85, %fd86, %fd4;
mov.f64 %fd88, 0dBC7ABC9E3B39803F;
.loc 2 399 10
fma.rn.f64 %fd89, %fd85, %fd88, %fd87;
mov.f64 %fd90, 0d3E928A27E30F5561;
mov.f64 %fd91, 0d3E5AE6449C0686C0;
.loc 2 399 10
fma.rn.f64 %fd92, %fd91, %fd89, %fd90;
mov.f64 %fd93, 0d3EC71DE8E6486D6B;
.loc 2 399 10
fma.rn.f64 %fd94, %fd92, %fd89, %fd93;
mov.f64 %fd95, 0d3EFA019A6B2464C5;
.loc 2 399 10
fma.rn.f64 %fd96, %fd94, %fd89, %fd95;
mov.f64 %fd97, 0d3F2A01A0171064A5;
.loc 2 399 10
fma.rn.f64 %fd98, %fd96, %fd89, %fd97;
mov.f64 %fd99, 0d3F56C16C17F29C8D;
.loc 2 399 10
fma.rn.f64 %fd100, %fd98, %fd89, %fd99;
mov.f64 %fd101, 0d3F8111111111A24E;
.loc 2 399 10
fma.rn.f64 %fd102, %fd100, %fd89, %fd101;
mov.f64 %fd103, 0d3FA555555555211D;
.loc 2 399 10
fma.rn.f64 %fd104, %fd102, %fd89, %fd103;
mov.f64 %fd105, 0d3FC5555555555530;
.loc 2 399 10
fma.rn.f64 %fd106, %fd104, %fd89, %fd105;
mov.f64 %fd107, 0d3FE0000000000005;
.loc 2 399 10
fma.rn.f64 %fd108, %fd106, %fd89, %fd107;
fma.rn.f64 %fd109, %fd108, %fd89, %fd18;
fma.rn.f64 %fd125, %fd109, %fd89, %fd18;
abs.s32 %r30, %r7;
setp.lt.s32 %p4, %r30, 1023;
@%p4 bra BB11_6;
add.s32 %r31, %r7, 2046;
shl.b32 %r32, %r31, 19;
and.b32 %r33, %r32, -1048576;
shl.b32 %r34, %r31, 20;
sub.s32 %r46, %r34, %r33;
mov.u32 %r35, 0;
.loc 2 399 10
mov.b64 %fd110, {%r35, %r33};
mul.f64 %fd125, %fd125, %fd110;
bra.uni BB11_7;
BB11_6:
.loc 2 399 10
shl.b32 %r36, %r7, 20;
add.s32 %r46, %r36, 1072693248;
BB11_7:
abs.f64 %fd123, %fd1;
mov.f64 %fd122, 0d4000000000000000;
.loc 2 399 10
neg.f64 %fd121, %fd123;
mov.u32 %r37, 0;
.loc 2 399 10
mov.b64 %fd111, {%r37, %r46};
mul.f64 %fd112, %fd125, %fd111;
neg.f64 %fd114, %fd4;
fma.rn.f64 %fd115, %fd121, %fd123, %fd114;
fma.rn.f64 %fd116, %fd112, %fd115, %fd112;
mul.f64 %fd117, %fd3, %fd116;
{
.reg .b32 %temp;
mov.b64 {%temp, %r38}, %fd123;
}
setp.gt.s32 %p5, %r38, 1077624832;
selp.f64 %fd118, 0d0000000000000000, %fd117, %p5;
{
.reg .b32 %temp;
mov.b64 {%temp, %r39}, %fd1;
}
setp.lt.s32 %p6, %r39, 0;
sub.f64 %fd120, %fd122, %fd118;
selp.f64 %fd126, %fd120, %fd118, %p6;
bra.uni BB11_9;
BB11_8:
.loc 2 399 10
add.f64 %fd126, %fd1, %fd1;
BB11_9:
.loc 1 28 42
mad.lo.s32 %r40, %r44, %r15, %r45;
mul.wide.s32 %rd7, %r40, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 1 28 42
st.global.f64 [%rd8], %fd126;
.loc 1 28 22
add.s32 %r45, %r2, %r45;
.loc 1 28 1
setp.lt.s32 %p7, %r45, %r13;
@%p7 bra BB11_3;
BB11_10:
.loc 1 28 22
mov.u32 %r41, %nctaid.x;
mad.lo.s32 %r44, %r41, %r17, %r44;
.loc 1 28 1
setp.lt.s32 %p8, %r44, %r14;
@%p8 bra BB11_2;
BB11_11:
.loc 1 28 2
ret;
}
.visible .entry map_erfcinv_double(
.param .u32 map_erfcinv_double_param_0,
.param .u32 map_erfcinv_double_param_1,
.param .u64 map_erfcinv_double_param_2,
.param .u32 map_erfcinv_double_param_3,
.param .u64 map_erfcinv_double_param_4,
.param .u32 map_erfcinv_double_param_5
)
{
.reg .pred %p<19>;
.reg .f32 %f<17>;
.reg .s32 %r<69>;
.reg .s64 %rd<9>;
.reg .f64 %fd<261>;
ld.param.u32 %r20, [map_erfcinv_double_param_0];
ld.param.u32 %r21, [map_erfcinv_double_param_1];
ld.param.u64 %rd2, [map_erfcinv_double_param_2];
ld.param.u32 %r22, [map_erfcinv_double_param_3];
ld.param.u64 %rd3, [map_erfcinv_double_param_4];
ld.param.u32 %r23, [map_erfcinv_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 1 29 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r63, %r24, %r25, %r26;
.loc 1 29 1
setp.ge.s32 %p1, %r63, %r21;
@%p1 bra BB12_23;
.loc 1 29 1
mov.u32 %r27, %ntid.y;
.loc 1 29 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
cvta.to.global.u64 %rd6, %rd2;
BB12_2:
.loc 1 29 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r64, %r27, %r29, %r31;
.loc 1 29 1
setp.ge.s32 %p2, %r64, %r20;
@%p2 bra BB12_22;
BB12_3:
.loc 1 29 1
mul.lo.s32 %r62, %r63, %r23;
add.s32 %r36, %r64, %r62;
mul.wide.s32 %rd4, %r36, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 1 29 1
ld.global.f64 %fd1, [%rd5];
.loc 2 389 10
neg.f64 %fd2, %fd1;
mov.f64 %fd18, 0d4000000000000000;
.loc 2 389 10
add.rn.f64 %fd3, %fd18, %fd2;
setp.le.f64 %p3, %fd1, 0d3FFFFC0B65AA4E0E;
setp.ge.f64 %p4, %fd1, 0d3F4FA4D2AD8F904D;
and.pred %p5, %p4, %p3;
@%p5 bra BB12_20;
setp.gt.f64 %p6, %fd1, 0d3FF0000000000000;
selp.f64 %fd4, %fd3, %fd1, %p6;
setp.ltu.f64 %p7, %fd4, 0d2B2BFF2EE48E0530;
{
.reg .b32 %temp;
mov.b64 {%temp, %r65}, %fd4;
}
{
.reg .b32 %temp;
mov.b64 {%r66, %temp}, %fd4;
}
@%p7 bra BB12_6;
shr.u32 %r37, %r65, 20;
and.b32 %r38, %r37, 2046;
add.s32 %r39, %r38, -1022;
cvt.rn.f64.s32 %fd23, %r39;
and.b32 %r40, %r65, -2145386497;
add.s32 %r41, %r40, 1071644672;
mov.b64 %fd24, {%r66, %r41};
add.f64 %fd25, %fd24, 0dBFF0000000000000;
add.f64 %fd19, %fd24, 0d3FF0000000000000;
mov.f64 %fd26, 0d3FF0000000000000;
.loc 2 389 10
// inline asm
cvt.rn.f32.f64 %f1,%fd19;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd20,%f2;
// inline asm
neg.f64 %fd27, %fd19;
fma.rn.f64 %fd28, %fd27, %fd20, %fd26;
fma.rn.f64 %fd29, %fd28, %fd28, %fd28;
fma.rn.f64 %fd30, %fd29, %fd20, %fd20;
mul.f64 %fd31, %fd25, %fd30;
mov.f64 %fd32, 0dC000000000000000;
.loc 2 389 10
fma.rn.f64 %fd33, %fd32, %fd31, %fd25;
neg.f64 %fd34, %fd31;
fma.rn.f64 %fd35, %fd34, %fd25, %fd33;
fma.rn.f64 %fd36, %fd35, %fd30, %fd31;
mul.f64 %fd37, %fd36, %fd36;
mov.f64 %fd38, 0d3FA55CF59CDC5D89;
mov.f64 %fd39, 0d3FB5C5C218C775C9;
.loc 2 389 10
fma.rn.f64 %fd40, %fd39, %fd37, %fd38;
mov.f64 %fd41, 0d3FAEFD18CF6EBB9C;
.loc 2 389 10
fma.rn.f64 %fd42, %fd40, %fd37, %fd41;
mov.f64 %fd43, 0d3FB10682EDCB8D1B;
.loc 2 389 10
fma.rn.f64 %fd44, %fd42, %fd37, %fd43;
mov.f64 %fd45, 0d3FB3B1DD3AC7FC96;
.loc 2 389 10
fma.rn.f64 %fd46, %fd44, %fd37, %fd45;
mov.f64 %fd47, 0d3FB745CB459B54A6;
.loc 2 389 10
fma.rn.f64 %fd48, %fd46, %fd37, %fd47;
mov.f64 %fd49, 0d3FBC71C741A0669F;
.loc 2 389 10
fma.rn.f64 %fd50, %fd48, %fd37, %fd49;
mov.f64 %fd51, 0d3FC249249209112E;
.loc 2 389 10
fma.rn.f64 %fd52, %fd50, %fd37, %fd51;
mov.f64 %fd53, 0d3FC99999999A06C1;
.loc 2 389 10
fma.rn.f64 %fd54, %fd52, %fd37, %fd53;
mov.f64 %fd55, 0d3FD5555555555535;
.loc 2 389 10
fma.rn.f64 %fd56, %fd54, %fd37, %fd55;
mul.f64 %fd57, %fd56, %fd37;
fma.rn.f64 %fd58, %fd57, %fd36, %fd36;
add.f64 %fd59, %fd58, %fd58;
mov.f64 %fd60, 0d3FE62E42FEFA39EF;
.loc 2 389 10
fma.rn.f64 %fd61, %fd23, %fd60, %fd59;
neg.f64 %fd21, %fd61;
// inline asm
cvt.rn.f32.f64 %f5, %fd21;
// inline asm
// inline asm
rsqrt.approx.ftz.f32 %f6, %f5;
// inline asm
// inline asm
cvt.f64.f32 %fd22, %f6;
// inline asm
mul.rn.f64 %fd62, %fd22, %fd22;
neg.f64 %fd63, %fd62;
fma.rn.f64 %fd64, %fd21, %fd63, %fd26;
mov.f64 %fd65, 0d3FE0000000000000;
mov.f64 %fd66, 0d3FD8000000000000;
.loc 2 389 10
fma.rn.f64 %fd67, %fd66, %fd64, %fd65;
mul.rn.f64 %fd68, %fd64, %fd22;
fma.rn.f64 %fd69, %fd67, %fd68, %fd22;
mov.f64 %fd70, 0d4000A0E7333839AA;
mov.f64 %fd71, 0d3FEBE9222591AFAB;
.loc 2 389 10
fma.rn.f64 %fd72, %fd71, %fd69, %fd70;
mov.f64 %fd73, 0d4008768CF7E57D5C;
.loc 2 389 10
fma.rn.f64 %fd74, %fd72, %fd69, %fd73;
mov.f64 %fd75, 0d400B77E7E28DA583;
.loc 2 389 10
fma.rn.f64 %fd76, %fd74, %fd69, %fd75;
mov.f64 %fd77, 0d3FF34F26A4F99CF9;
.loc 2 389 10
fma.rn.f64 %fd78, %fd76, %fd69, %fd77;
mov.f64 %fd79, 0d3FC1F674ADB019ED;
.loc 2 389 10
fma.rn.f64 %fd80, %fd78, %fd69, %fd79;
mov.f64 %fd81, 0d3F75DDAE9506431D;
.loc 2 389 10
fma.rn.f64 %fd82, %fd80, %fd69, %fd81;
mov.f64 %fd83, 0d3F0ADA49AA32489C;
.loc 2 389 10
fma.rn.f64 %fd84, %fd82, %fd69, %fd83;
add.f64 %fd85, %fd69, 0d4001E90FF51C2197;
mov.f64 %fd86, 0d40111EA3A7CF3820;
.loc 2 389 10
fma.rn.f64 %fd87, %fd85, %fd69, %fd86;
mov.f64 %fd88, 0d4011A0E4A4749594;
.loc 2 389 10
fma.rn.f64 %fd89, %fd87, %fd69, %fd88;
mov.f64 %fd90, 0d400D4E977D38C14D;
.loc 2 389 10
fma.rn.f64 %fd91, %fd89, %fd69, %fd90;
mov.f64 %fd92, 0d3FF37FD567EC0D5F;
.loc 2 389 10
fma.rn.f64 %fd93, %fd91, %fd69, %fd92;
mov.f64 %fd94, 0d3FC1FB9D7F676033;
.loc 2 389 10
fma.rn.f64 %fd95, %fd93, %fd69, %fd94;
mov.f64 %fd96, 0d3F75DDCDF98946E4;
.loc 2 389 10
fma.rn.f64 %fd97, %fd95, %fd69, %fd96;
mov.f64 %fd98, 0d3F0ADA42D79D8DBB;
.loc 2 389 10
fma.rn.f64 %fd99, %fd97, %fd69, %fd98;
mul.f64 %fd100, %fd99, %fd69;
div.rn.f64 %fd259, %fd84, %fd100;
bra.uni BB12_19;
BB12_6:
.loc 2 389 10
setp.gt.f64 %p8, %fd4, 0d0000000000000000;
setp.lt.f64 %p9, %fd4, 0d7FF0000000000000;
and.pred %p10, %p8, %p9;
@%p10 bra BB12_12;
abs.f64 %fd101, %fd4;
setp.gtu.f64 %p11, %fd101, 0d7FF0000000000000;
@%p11 bra BB12_11;
setp.neu.f64 %p12, %fd4, 0d0000000000000000;
@%p12 bra BB12_10;
mov.f64 %fd258, 0dFFF0000000000000;
bra.uni BB12_18;
BB12_10:
.loc 2 389 10
setp.eq.f64 %p13, %fd4, 0d7FF0000000000000;
selp.f64 %fd258, %fd4, 0dFFF8000000000000, %p13;
bra.uni BB12_18;
BB12_11:
.loc 2 389 10
add.f64 %fd258, %fd4, %fd4;
bra.uni BB12_18;
BB12_12:
.loc 2 389 10
setp.lt.u32 %p14, %r65, 1048576;
@%p14 bra BB12_14;
mov.u32 %r67, -1023;
bra.uni BB12_15;
BB12_14:
.loc 2 389 10
mul.f64 %fd103, %fd4, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r65}, %fd103;
}
{
.reg .b32 %temp;
mov.b64 {%r66, %temp}, %fd103;
}
mov.u32 %r67, -1077;
BB12_15:
.loc 2 389 10
shr.s32 %r44, %r65, 20;
add.s32 %r68, %r67, %r44;
and.b32 %r45, %r65, -2146435073;
or.b32 %r46, %r45, 1072693248;
mov.b64 %fd257, {%r66, %r46};
setp.lt.u32 %p15, %r46, 1073127583;
@%p15 bra BB12_17;
{
.reg .b32 %temp;
mov.b64 {%r47, %temp}, %fd257;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r48}, %fd257;
}
add.s32 %r49, %r48, -1048576;
mov.b64 %fd257, {%r47, %r49};
add.s32 %r68, %r68, 1;
BB12_17:
add.f64 %fd104, %fd257, 0d3FF0000000000000;
mov.f64 %fd106, 0d3FF0000000000000;
.loc 2 389 10
// inline asm
cvt.rn.f32.f64 %f9,%fd104;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd105,%f10;
// inline asm
neg.f64 %fd107, %fd104;
fma.rn.f64 %fd108, %fd107, %fd105, %fd106;
fma.rn.f64 %fd109, %fd108, %fd108, %fd108;
fma.rn.f64 %fd110, %fd109, %fd105, %fd105;
add.f64 %fd111, %fd257, 0dBFF0000000000000;
mul.f64 %fd112, %fd111, %fd110;
fma.rn.f64 %fd113, %fd111, %fd110, %fd112;
mul.f64 %fd114, %fd113, %fd113;
mov.f64 %fd115, 0d3ED0EE258B7A8B04;
mov.f64 %fd116, 0d3EB1380B3AE80F1E;
.loc 2 389 10
fma.rn.f64 %fd117, %fd116, %fd114, %fd115;
mov.f64 %fd118, 0d3EF3B2669F02676F;
.loc 2 389 10
fma.rn.f64 %fd119, %fd117, %fd114, %fd118;
mov.f64 %fd120, 0d3F1745CBA9AB0956;
.loc 2 389 10
fma.rn.f64 %fd121, %fd119, %fd114, %fd120;
mov.f64 %fd122, 0d3F3C71C72D1B5154;
.loc 2 389 10
fma.rn.f64 %fd123, %fd121, %fd114, %fd122;
mov.f64 %fd124, 0d3F624924923BE72D;
.loc 2 389 10
fma.rn.f64 %fd125, %fd123, %fd114, %fd124;
mov.f64 %fd126, 0d3F8999999999A3C4;
.loc 2 389 10
fma.rn.f64 %fd127, %fd125, %fd114, %fd126;
mov.f64 %fd128, 0d3FB5555555555554;
.loc 2 389 10
fma.rn.f64 %fd129, %fd127, %fd114, %fd128;
sub.f64 %fd130, %fd111, %fd113;
add.f64 %fd131, %fd130, %fd130;
neg.f64 %fd132, %fd113;
fma.rn.f64 %fd133, %fd132, %fd111, %fd131;
mul.f64 %fd134, %fd110, %fd133;
mul.f64 %fd135, %fd129, %fd114;
fma.rn.f64 %fd136, %fd135, %fd113, %fd134;
cvt.rn.f64.s32 %fd137, %r68;
mov.f64 %fd138, 0d3FE62E42FEFA39EF;
.loc 2 389 10
fma.rn.f64 %fd139, %fd137, %fd138, %fd113;
neg.s32 %r50, %r68;
cvt.rn.f64.s32 %fd140, %r50;
fma.rn.f64 %fd141, %fd140, %fd138, %fd139;
sub.f64 %fd142, %fd141, %fd113;
sub.f64 %fd143, %fd136, %fd142;
mov.f64 %fd144, 0d3C7ABC9E3B39803F;
.loc 2 389 10
fma.rn.f64 %fd145, %fd137, %fd144, %fd143;
add.f64 %fd258, %fd139, %fd145;
BB12_18:
neg.f64 %fd146, %fd258;
rsqrt.approx.f64 %fd147, %fd146;
mov.f64 %fd148, 0d3FFA2013964E259C;
mov.f64 %fd149, 0d3FE8E2101C71B0BF;
.loc 2 389 10
fma.rn.f64 %fd150, %fd149, %fd147, %fd148;
mov.f64 %fd151, 0d3FDABFE90921BE68;
.loc 2 389 10
fma.rn.f64 %fd152, %fd150, %fd147, %fd151;
mov.f64 %fd153, 0d3F97E41314DE00D4;
.loc 2 389 10
fma.rn.f64 %fd154, %fd152, %fd147, %fd153;
mov.f64 %fd155, 0d3F311BD487102E94;
.loc 2 389 10
fma.rn.f64 %fd156, %fd154, %fd147, %fd155;
add.f64 %fd157, %fd147, 0d3FF59895C30BAA54;
mov.f64 %fd158, 0d3FFAE8E5956A143F;
.loc 2 389 10
fma.rn.f64 %fd159, %fd157, %fd147, %fd158;
mov.f64 %fd160, 0d3FDACCE85FF7383D;
.loc 2 389 10
fma.rn.f64 %fd161, %fd159, %fd147, %fd160;
mov.f64 %fd162, 0d3F97E43B6CAC34FE;
.loc 2 389 10
fma.rn.f64 %fd163, %fd161, %fd147, %fd162;
mov.f64 %fd164, 0d3F311BD08289EB12;
.loc 2 389 10
fma.rn.f64 %fd165, %fd163, %fd147, %fd164;
mul.f64 %fd166, %fd165, %fd147;
div.rn.f64 %fd259, %fd156, %fd166;
BB12_19:
neg.f64 %fd167, %fd259;
selp.f64 %fd260, %fd167, %fd259, %p6;
bra.uni BB12_21;
BB12_20:
.loc 2 389 10
mul.rn.f64 %fd170, %fd3, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd170;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd170;
}
shr.u32 %r53, %r51, 20;
and.b32 %r54, %r53, 2046;
add.s32 %r55, %r54, -1022;
cvt.rn.f64.s32 %fd171, %r55;
and.b32 %r56, %r51, -2145386497;
add.s32 %r57, %r56, 1071644672;
mov.b64 %fd172, {%r52, %r57};
add.f64 %fd173, %fd172, 0dBFF0000000000000;
add.f64 %fd168, %fd172, 0d3FF0000000000000;
mov.f64 %fd174, 0d3FF0000000000000;
.loc 2 389 10
// inline asm
cvt.rn.f32.f64 %f13,%fd168;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f14,%f13;
// inline asm
// inline asm
cvt.f64.f32 %fd169,%f14;
// inline asm
neg.f64 %fd175, %fd168;
fma.rn.f64 %fd176, %fd175, %fd169, %fd174;
fma.rn.f64 %fd177, %fd176, %fd176, %fd176;
fma.rn.f64 %fd178, %fd177, %fd169, %fd169;
mul.f64 %fd179, %fd173, %fd178;
mov.f64 %fd180, 0dC000000000000000;
.loc 2 389 10
fma.rn.f64 %fd181, %fd180, %fd179, %fd173;
neg.f64 %fd182, %fd179;
fma.rn.f64 %fd183, %fd182, %fd173, %fd181;
fma.rn.f64 %fd184, %fd183, %fd178, %fd179;
mul.f64 %fd185, %fd184, %fd184;
mov.f64 %fd186, 0d3FA55CF59CDC5D89;
mov.f64 %fd187, 0d3FB5C5C218C775C9;
.loc 2 389 10
fma.rn.f64 %fd188, %fd187, %fd185, %fd186;
mov.f64 %fd189, 0d3FAEFD18CF6EBB9C;
.loc 2 389 10
fma.rn.f64 %fd190, %fd188, %fd185, %fd189;
mov.f64 %fd191, 0d3FB10682EDCB8D1B;
.loc 2 389 10
fma.rn.f64 %fd192, %fd190, %fd185, %fd191;
mov.f64 %fd193, 0d3FB3B1DD3AC7FC96;
.loc 2 389 10
fma.rn.f64 %fd194, %fd192, %fd185, %fd193;
mov.f64 %fd195, 0d3FB745CB459B54A6;
.loc 2 389 10
fma.rn.f64 %fd196, %fd194, %fd185, %fd195;
mov.f64 %fd197, 0d3FBC71C741A0669F;
.loc 2 389 10
fma.rn.f64 %fd198, %fd196, %fd185, %fd197;
mov.f64 %fd199, 0d3FC249249209112E;
.loc 2 389 10
fma.rn.f64 %fd200, %fd198, %fd185, %fd199;
mov.f64 %fd201, 0d3FC99999999A06C1;
.loc 2 389 10
fma.rn.f64 %fd202, %fd200, %fd185, %fd201;
mov.f64 %fd203, 0d3FD5555555555535;
.loc 2 389 10
fma.rn.f64 %fd204, %fd202, %fd185, %fd203;
mul.f64 %fd205, %fd204, %fd185;
fma.rn.f64 %fd206, %fd205, %fd184, %fd184;
add.f64 %fd207, %fd206, %fd206;
mov.f64 %fd208, 0d3FE62E42FEFA39EF;
.loc 2 389 10
fma.rn.f64 %fd209, %fd171, %fd208, %fd207;
mov.f64 %fd210, 0dC009000000000000;
.loc 2 389 10
sub.f64 %fd211, %fd210, %fd209;
mov.f64 %fd212, 0dBC08DDF93324D327;
mov.f64 %fd213, 0dBBB135D2E746E627;
.loc 2 389 10
fma.rn.f64 %fd214, %fd213, %fd211, %fd212;
mov.f64 %fd215, 0d3C37B83EEF0B7C9F;
.loc 2 389 10
fma.rn.f64 %fd216, %fd214, %fd211, %fd215;
mov.f64 %fd217, 0d3C69BA72CD589B91;
.loc 2 389 10
fma.rn.f64 %fd218, %fd216, %fd211, %fd217;
mov.f64 %fd219, 0dBCA33689090A6B96;
.loc 2 389 10
fma.rn.f64 %fd220, %fd218, %fd211, %fd219;
mov.f64 %fd221, 0d3C782E11898132E0;
.loc 2 389 10
fma.rn.f64 %fd222, %fd220, %fd211, %fd221;
mov.f64 %fd223, 0d3CFDE4ACFD9E26BA;
.loc 2 389 10
fma.rn.f64 %fd224, %fd222, %fd211, %fd223;
mov.f64 %fd225, 0dBD26D33EED66C487;
.loc 2 389 10
fma.rn.f64 %fd226, %fd224, %fd211, %fd225;
mov.f64 %fd227, 0dBD36F2167040D8E2;
.loc 2 389 10
fma.rn.f64 %fd228, %fd226, %fd211, %fd227;
mov.f64 %fd229, 0d3D872A22C2D77E20;
.loc 2 389 10
fma.rn.f64 %fd230, %fd228, %fd211, %fd229;
mov.f64 %fd231, 0dBDAC8859C4E5C0AF;
.loc 2 389 10
fma.rn.f64 %fd232, %fd230, %fd211, %fd231;
mov.f64 %fd233, 0dBDCDC583D118A561;
.loc 2 389 10
fma.rn.f64 %fd234, %fd232, %fd211, %fd233;
mov.f64 %fd235, 0d3E120F47CCF46B3C;
.loc 2 389 10
fma.rn.f64 %fd236, %fd234, %fd211, %fd235;
mov.f64 %fd237, 0dBE31A9E38DC84D60;
.loc 2 389 10
fma.rn.f64 %fd238, %fd236, %fd211, %fd237;
mov.f64 %fd239, 0dBE5F36CD6D3D46A9;
.loc 2 389 10
fma.rn.f64 %fd240, %fd238, %fd211, %fd239;
mov.f64 %fd241, 0d3E9C6B4F5D03B787;
.loc 2 389 10
fma.rn.f64 %fd242, %fd240, %fd211, %fd241;
mov.f64 %fd243, 0dBEB6E8A5434AE8A2;
.loc 2 389 10
fma.rn.f64 %fd244, %fd242, %fd211, %fd243;
mov.f64 %fd245, 0dBEED1D1F7B8736F6;
.loc 2 389 10
fma.rn.f64 %fd246, %fd244, %fd211, %fd245;
mov.f64 %fd247, 0d3F2879C2A212F024;
.loc 2 389 10
fma.rn.f64 %fd248, %fd246, %fd211, %fd247;
mov.f64 %fd249, 0dBF4845769484FCA8;
.loc 2 389 10
fma.rn.f64 %fd250, %fd248, %fd211, %fd249;
mov.f64 %fd251, 0dBF78B6C33114F909;
.loc 2 389 10
fma.rn.f64 %fd252, %fd250, %fd211, %fd251;
mov.f64 %fd253, 0d3FCEBD80D9B13E28;
.loc 2 389 10
fma.rn.f64 %fd254, %fd252, %fd211, %fd253;
mov.f64 %fd255, 0d3FFA755E7C99AE86;
.loc 2 389 10
fma.rn.f64 %fd256, %fd254, %fd211, %fd255;
fma.rn.f64 %fd260, %fd256, %fd2, %fd256;
BB12_21:
.loc 1 29 42
mul.lo.s32 %r61, %r63, %r22;
add.s32 %r58, %r64, %r61;
mul.wide.s32 %rd7, %r58, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 1 29 42
st.global.f64 [%rd8], %fd260;
.loc 1 29 22
add.s32 %r64, %r2, %r64;
.loc 1 29 1
setp.lt.s32 %p17, %r64, %r20;
@%p17 bra BB12_3;
BB12_22:
.loc 1 29 22
mov.u32 %r59, %nctaid.x;
mad.lo.s32 %r63, %r59, %r24, %r63;
.loc 1 29 1
setp.lt.s32 %p18, %r63, %r21;
@%p18 bra BB12_2;
BB12_23:
.loc 1 29 2
ret;
}
.visible .entry map_erfcx_double(
.param .u32 map_erfcx_double_param_0,
.param .u32 map_erfcx_double_param_1,
.param .u64 map_erfcx_double_param_2,
.param .u32 map_erfcx_double_param_3,
.param .u64 map_erfcx_double_param_4,
.param .u32 map_erfcx_double_param_5
)
{
.reg .pred %p<14>;
.reg .f32 %f<9>;
.reg .s32 %r<48>;
.reg .s64 %rd<9>;
.reg .f64 %fd<141>;
ld.param.u32 %r15, [map_erfcx_double_param_0];
ld.param.u32 %r16, [map_erfcx_double_param_1];
ld.param.u64 %rd2, [map_erfcx_double_param_2];
ld.param.u32 %r17, [map_erfcx_double_param_3];
ld.param.u64 %rd3, [map_erfcx_double_param_4];
ld.param.u32 %r18, [map_erfcx_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 1 30 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r45, %r19, %r20, %r21;
.loc 1 30 1
setp.ge.s32 %p1, %r45, %r16;
@%p1 bra BB13_17;
.loc 1 30 1
mov.u32 %r22, %ntid.y;
.loc 1 30 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r2, %r23, %r22;
cvta.to.global.u64 %rd6, %rd2;
BB13_2:
.loc 1 30 1
mov.u32 %r24, %ctaid.y;
mov.u32 %r26, %tid.y;
mad.lo.s32 %r46, %r22, %r24, %r26;
.loc 1 30 1
setp.ge.s32 %p2, %r46, %r15;
@%p2 bra BB13_16;
.loc 1 30 1
mul.lo.s32 %r4, %r45, %r18;
.loc 1 30 42
mul.lo.s32 %r5, %r45, %r17;
BB13_4:
.loc 1 30 1
add.s32 %r31, %r46, %r4;
mul.wide.s32 %rd4, %r31, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 1 30 1
ld.global.f64 %fd1, [%rd5];
.loc 2 404 10
abs.f64 %fd2, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r32}, %fd2;
}
setp.lt.u32 %p3, %r32, 1077936128;
@%p3 bra BB13_6;
rcp.rn.f64 %fd16, %fd2;
mov.f64 %fd17, 0d3FF0000000000000;
.loc 2 404 10
mul.f64 %fd18, %fd16, %fd16;
mov.f64 %fd19, 0d401A400000000000;
mov.f64 %fd20, 0dC03D880000000000;
.loc 2 404 10
fma.rn.f64 %fd21, %fd20, %fd18, %fd19;
mov.f64 %fd22, 0dBFFE000000000000;
.loc 2 404 10
fma.rn.f64 %fd23, %fd21, %fd18, %fd22;
mov.f64 %fd24, 0d3FE8000000000000;
.loc 2 404 10
fma.rn.f64 %fd25, %fd23, %fd18, %fd24;
mov.f64 %fd26, 0dBFE0000000000000;
.loc 2 404 10
fma.rn.f64 %fd27, %fd25, %fd18, %fd26;
fma.rn.f64 %fd28, %fd27, %fd18, %fd17;
mul.f64 %fd29, %fd16, 0d3FE20DD750429B6D;
mul.f64 %fd140, %fd28, %fd29;
bra.uni BB13_7;
BB13_6:
.loc 2 404 10
add.f64 %fd34, %fd2, 0dC010000000000000;
mov.f64 %fd35, 0dC010000000000000;
.loc 2 404 10
add.f64 %fd30, %fd2, 0d4010000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd30;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd31,%f2;
// inline asm
neg.f64 %fd36, %fd30;
mov.f64 %fd37, 0d3FF0000000000000;
.loc 2 404 10
fma.rn.f64 %fd38, %fd36, %fd31, %fd37;
fma.rn.f64 %fd39, %fd38, %fd38, %fd38;
fma.rn.f64 %fd40, %fd39, %fd31, %fd31;
mul.f64 %fd41, %fd34, %fd40;
add.rn.f64 %fd42, %fd41, %fd37;
fma.rn.f64 %fd43, %fd35, %fd42, %fd2;
neg.f64 %fd44, %fd41;
fma.rn.f64 %fd45, %fd44, %fd2, %fd43;
fma.rn.f64 %fd46, %fd40, %fd45, %fd41;
mov.f64 %fd47, 0dBE44E1C6FD03D328;
mov.f64 %fd48, 0dBDF8774AD4E0BFD7;
.loc 2 404 10
fma.rn.f64 %fd49, %fd48, %fd46, %fd47;
mov.f64 %fd50, 0dBE4330149F7A56B6;
.loc 2 404 10
fma.rn.f64 %fd51, %fd49, %fd46, %fd50;
mov.f64 %fd52, 0d3E7BEDDED8376273;
.loc 2 404 10
fma.rn.f64 %fd53, %fd51, %fd46, %fd52;
mov.f64 %fd54, 0d3E6F9254C3ABF22B;
.loc 2 404 10
fma.rn.f64 %fd55, %fd53, %fd46, %fd54;
mov.f64 %fd56, 0dBEAB9068C2148CF0;
.loc 2 404 10
fma.rn.f64 %fd57, %fd55, %fd46, %fd56;
mov.f64 %fd58, 0d3E94C6454DB34009;
.loc 2 404 10
fma.rn.f64 %fd59, %fd57, %fd46, %fd58;
mov.f64 %fd60, 0d3ED7F1C378F2311D;
.loc 2 404 10
fma.rn.f64 %fd61, %fd59, %fd46, %fd60;
mov.f64 %fd62, 0dBEE78E051C6D5C58;
.loc 2 404 10
fma.rn.f64 %fd63, %fd61, %fd46, %fd62;
mov.f64 %fd64, 0dBEF995B4EAD14A90;
.loc 2 404 10
fma.rn.f64 %fd65, %fd63, %fd46, %fd64;
mov.f64 %fd66, 0d3F23BE27CF0A29B2;
.loc 2 404 10
fma.rn.f64 %fd67, %fd65, %fd46, %fd66;
mov.f64 %fd68, 0dBF2A1DEF3E81672E;
.loc 2 404 10
fma.rn.f64 %fd69, %fd67, %fd46, %fd68;
mov.f64 %fd70, 0dBF48D4ABE68C1713;
.loc 2 404 10
fma.rn.f64 %fd71, %fd69, %fd46, %fd70;
mov.f64 %fd72, 0d3F749C67210DD6B4;
.loc 2 404 10
fma.rn.f64 %fd73, %fd71, %fd46, %fd72;
mov.f64 %fd74, 0dBF9096238568E357;
.loc 2 404 10
fma.rn.f64 %fd75, %fd73, %fd46, %fd74;
mov.f64 %fd76, 0d3FA3079EDF8C2DC9;
.loc 2 404 10
fma.rn.f64 %fd77, %fd75, %fd46, %fd76;
mov.f64 %fd78, 0dBFB0FB06DFF601FC;
.loc 2 404 10
fma.rn.f64 %fd79, %fd77, %fd46, %fd78;
mov.f64 %fd80, 0d3FB7FEE004DFBCDC;
.loc 2 404 10
fma.rn.f64 %fd81, %fd79, %fd46, %fd80;
mov.f64 %fd82, 0dBFB9DDB23C3DB8C6;
.loc 2 404 10
fma.rn.f64 %fd83, %fd81, %fd46, %fd82;
mov.f64 %fd84, 0d3FB16ECEFCFA5FDA;
.loc 2 404 10
fma.rn.f64 %fd85, %fd83, %fd46, %fd84;
mov.f64 %fd86, 0d3F8F7F5DF66FB6D6;
.loc 2 404 10
fma.rn.f64 %fd87, %fd85, %fd46, %fd86;
mov.f64 %fd88, 0dBFC1DF1AD154A29D;
.loc 2 404 10
fma.rn.f64 %fd89, %fd87, %fd46, %fd88;
mov.f64 %fd90, 0d3FF3BA5916E9FD7F;
.loc 2 404 10
fma.rn.f64 %fd91, %fd89, %fd46, %fd90;
mov.f64 %fd92, 0d4000000000000000;
.loc 2 404 10
fma.rn.f64 %fd32, %fd92, %fd2, %fd37;
// inline asm
cvt.rn.f32.f64 %f5,%fd32;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd33,%f6;
// inline asm
neg.f64 %fd93, %fd32;
fma.rn.f64 %fd94, %fd93, %fd33, %fd37;
fma.rn.f64 %fd95, %fd94, %fd94, %fd94;
fma.rn.f64 %fd96, %fd95, %fd33, %fd33;
mul.f64 %fd97, %fd91, %fd96;
mul.f64 %fd98, %fd97, 0dC000000000000000;
fma.rn.f64 %fd99, %fd2, %fd98, %fd91;
neg.f64 %fd100, %fd97;
add.rn.f64 %fd101, %fd99, %fd100;
fma.rn.f64 %fd140, %fd101, %fd96, %fd97;
BB13_7:
{
.reg .b32 %temp;
mov.b64 {%temp, %r33}, %fd1;
}
setp.gt.s32 %p4, %r33, -1;
@%p4 bra BB13_15;
mul.f64 %fd6, %fd2, %fd2;
neg.f64 %fd102, %fd6;
fma.rn.f64 %fd7, %fd2, %fd2, %fd102;
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd6;
}
setp.lt.u32 %p5, %r8, 1082535491;
setp.lt.s32 %p6, %r8, -1064875759;
or.pred %p7, %p5, %p6;
@%p7 bra BB13_10;
setp.lt.s32 %p8, %r8, 0;
selp.f64 %fd103, 0d0000000000000000, 0d7FF0000000000000, %p8;
abs.f64 %fd104, %fd6;
setp.gtu.f64 %p9, %fd104, 0d7FF0000000000000;
add.f64 %fd105, %fd6, %fd6;
selp.f64 %fd139, %fd105, %fd103, %p9;
bra.uni BB13_14;
BB13_10:
.loc 2 404 10
mul.f64 %fd106, %fd6, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd107, %fd106;
cvt.rzi.s32.f64 %r9, %fd107;
mov.f64 %fd108, 0dBFE62E42FEFA39EF;
.loc 2 404 10
fma.rn.f64 %fd109, %fd107, %fd108, %fd6;
mov.f64 %fd110, 0dBC7ABC9E3B39803F;
.loc 2 404 10
fma.rn.f64 %fd111, %fd107, %fd110, %fd109;
mov.f64 %fd112, 0d3E928A27E30F5561;
mov.f64 %fd113, 0d3E5AE6449C0686C0;
.loc 2 404 10
fma.rn.f64 %fd114, %fd113, %fd111, %fd112;
mov.f64 %fd115, 0d3EC71DE8E6486D6B;
.loc 2 404 10
fma.rn.f64 %fd116, %fd114, %fd111, %fd115;
mov.f64 %fd117, 0d3EFA019A6B2464C5;
.loc 2 404 10
fma.rn.f64 %fd118, %fd116, %fd111, %fd117;
mov.f64 %fd119, 0d3F2A01A0171064A5;
.loc 2 404 10
fma.rn.f64 %fd120, %fd118, %fd111, %fd119;
mov.f64 %fd121, 0d3F56C16C17F29C8D;
.loc 2 404 10
fma.rn.f64 %fd122, %fd120, %fd111, %fd121;
mov.f64 %fd123, 0d3F8111111111A24E;
.loc 2 404 10
fma.rn.f64 %fd124, %fd122, %fd111, %fd123;
mov.f64 %fd125, 0d3FA555555555211D;
.loc 2 404 10
fma.rn.f64 %fd126, %fd124, %fd111, %fd125;
mov.f64 %fd127, 0d3FC5555555555530;
.loc 2 404 10
fma.rn.f64 %fd128, %fd126, %fd111, %fd127;
mov.f64 %fd129, 0d3FE0000000000005;
.loc 2 404 10
fma.rn.f64 %fd130, %fd128, %fd111, %fd129;
mov.f64 %fd131, 0d3FF0000000000000;
.loc 2 404 10
fma.rn.f64 %fd132, %fd130, %fd111, %fd131;
fma.rn.f64 %fd138, %fd132, %fd111, %fd131;
abs.s32 %r34, %r9;
setp.lt.s32 %p10, %r34, 1023;
@%p10 bra BB13_12;
add.s32 %r35, %r9, 2046;
shl.b32 %r36, %r35, 19;
and.b32 %r37, %r36, -1048576;
shl.b32 %r38, %r35, 20;
sub.s32 %r47, %r38, %r37;
mov.u32 %r39, 0;
.loc 2 404 10
mov.b64 %fd133, {%r39, %r37};
mul.f64 %fd138, %fd138, %fd133;
bra.uni BB13_13;
BB13_12:
.loc 2 404 10
shl.b32 %r40, %r9, 20;
add.s32 %r47, %r40, 1072693248;
BB13_13:
mov.u32 %r41, 0;
.loc 2 404 10
mov.b64 %fd134, {%r41, %r47};
mul.f64 %fd139, %fd138, %fd134;
BB13_14:
add.f64 %fd135, %fd139, %fd139;
fma.rn.f64 %fd136, %fd135, %fd7, %fd135;
sub.f64 %fd137, %fd136, %fd140;
setp.eq.f64 %p11, %fd135, 0d7FF0000000000000;
selp.f64 %fd140, %fd135, %fd137, %p11;
BB13_15:
.loc 1 30 42
add.s32 %r42, %r46, %r5;
mul.wide.s32 %rd7, %r42, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 1 30 42
st.global.f64 [%rd8], %fd140;
.loc 1 30 22
add.s32 %r46, %r2, %r46;
.loc 1 30 1
setp.lt.s32 %p12, %r46, %r15;
@%p12 bra BB13_4;
BB13_16:
.loc 1 30 22
mov.u32 %r43, %nctaid.x;
mad.lo.s32 %r45, %r43, %r19, %r45;
.loc 1 30 1
setp.lt.s32 %p13, %r45, %r16;
@%p13 bra BB13_2;
BB13_17:
.loc 1 30 2
ret;
}
.visible .entry map_erf_double(
.param .u32 map_erf_double_param_0,
.param .u32 map_erf_double_param_1,
.param .u64 map_erf_double_param_2,
.param .u32 map_erf_double_param_3,
.param .u64 map_erf_double_param_4,
.param .u32 map_erf_double_param_5
)
{
.reg .pred %p<7>;
.reg .s32 %r<38>;
.reg .s64 %rd<9>;
.reg .f64 %fd<105>;
ld.param.u32 %r10, [map_erf_double_param_0];
ld.param.u32 %r11, [map_erf_double_param_1];
ld.param.u64 %rd3, [map_erf_double_param_2];
ld.param.u32 %r12, [map_erf_double_param_3];
ld.param.u64 %rd4, [map_erf_double_param_4];
ld.param.u32 %r13, [map_erf_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 31 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r36, %r14, %r15, %r16;
.loc 1 31 1
setp.ge.s32 %p1, %r36, %r11;
@%p1 bra BB14_9;
.loc 1 31 1
mov.u32 %r17, %tid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %ctaid.y;
mad.lo.s32 %r2, %r18, %r19, %r17;
.loc 1 31 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r3, %r20, %r18;
BB14_2:
.loc 1 31 1
setp.ge.s32 %p2, %r2, %r10;
@%p2 bra BB14_8;
.loc 1 31 1
mul.lo.s32 %r5, %r36, %r13;
.loc 1 31 42
mul.lo.s32 %r6, %r36, %r12;
mov.u32 %r37, %r2;
BB14_4:
.loc 1 31 1
mov.u32 %r7, %r37;
add.s32 %r21, %r7, %r5;
mul.wide.s32 %rd5, %r21, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 31 1
ld.global.f64 %fd1, [%rd6];
.loc 2 374 10
abs.f64 %fd2, %fd1;
setp.ltu.f64 %p3, %fd2, 0d3FF0000000000000;
@%p3 bra BB14_6;
mov.f64 %fd6, 0dBCF1384CE38C616A;
mov.f64 %fd7, 0d3C8B9C2B870030E8;
.loc 2 374 10
fma.rn.f64 %fd8, %fd7, %fd2, %fd6;
mov.f64 %fd9, 0d3D4458AE9746C2FD;
.loc 2 374 10
fma.rn.f64 %fd10, %fd8, %fd2, %fd9;
mov.f64 %fd11, 0dBD8E4A44D4F1AB56;
.loc 2 374 10
fma.rn.f64 %fd12, %fd10, %fd2, %fd11;
mov.f64 %fd13, 0d3DCFDF15265C58EE;
.loc 2 374 10
fma.rn.f64 %fd14, %fd12, %fd2, %fd13;
mov.f64 %fd15, 0dBE0933832F358D51;
.loc 2 374 10
fma.rn.f64 %fd16, %fd14, %fd2, %fd15;
mov.f64 %fd17, 0d3E3F136D3F719446;
.loc 2 374 10
fma.rn.f64 %fd18, %fd16, %fd2, %fd17;
mov.f64 %fd19, 0dBE6E94C2FE151B3B;
.loc 2 374 10
fma.rn.f64 %fd20, %fd18, %fd2, %fd19;
mov.f64 %fd21, 0d3E985A70310EE0A8;
.loc 2 374 10
fma.rn.f64 %fd22, %fd20, %fd2, %fd21;
mov.f64 %fd23, 0dBEBF944DA1520B74;
.loc 2 374 10
fma.rn.f64 %fd24, %fd22, %fd2, %fd23;
mov.f64 %fd25, 0d3EE09F503825C543;
.loc 2 374 10
fma.rn.f64 %fd26, %fd24, %fd2, %fd25;
mov.f64 %fd27, 0dBEFBEEFE9F949E59;
.loc 2 374 10
fma.rn.f64 %fd28, %fd26, %fd2, %fd27;
mov.f64 %fd29, 0d3F11D785C6E28857;
.loc 2 374 10
fma.rn.f64 %fd30, %fd28, %fd2, %fd29;
mov.f64 %fd31, 0dBF1D866B223048C7;
.loc 2 374 10
fma.rn.f64 %fd32, %fd30, %fd2, %fd31;
mov.f64 %fd33, 0d3EF258F0847E8908;
.loc 2 374 10
fma.rn.f64 %fd34, %fd32, %fd2, %fd33;
mov.f64 %fd35, 0d3F429CFC58DBB776;
.loc 2 374 10
fma.rn.f64 %fd36, %fd34, %fd2, %fd35;
mov.f64 %fd37, 0dBF5BE16D3F71F3C5;
.loc 2 374 10
fma.rn.f64 %fd38, %fd36, %fd2, %fd37;
mov.f64 %fd39, 0d3F2E8BDA60326B1A;
.loc 2 374 10
fma.rn.f64 %fd40, %fd38, %fd2, %fd39;
mov.f64 %fd41, 0d3F938FB20B0988A6;
.loc 2 374 10
fma.rn.f64 %fd42, %fd40, %fd2, %fd41;
mov.f64 %fd43, 0dBFBA4E3A80F64E33;
.loc 2 374 10
fma.rn.f64 %fd44, %fd42, %fd2, %fd43;
mov.f64 %fd45, 0dBFE45F3E88093928;
.loc 2 374 10
fma.rn.f64 %fd46, %fd44, %fd2, %fd45;
mov.f64 %fd47, 0dBFF20DD599CAEEA0;
.loc 2 374 10
fma.rn.f64 %fd48, %fd46, %fd2, %fd47;
mov.f64 %fd49, 0dBE883BE1E31CE133;
.loc 2 374 10
fma.rn.f64 %fd50, %fd48, %fd2, %fd49;
mul.f64 %fd51, %fd50, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd52, %fd51;
cvt.rzi.s32.f64 %r22, %fd52;
mov.f64 %fd53, 0dBFE62E42FEFA39EF;
.loc 2 374 10
fma.rn.f64 %fd54, %fd52, %fd53, %fd50;
mov.f64 %fd55, 0d3E928A27E30F5561;
mov.f64 %fd56, 0d3E5AE6449C0686C0;
.loc 2 374 10
fma.rn.f64 %fd57, %fd56, %fd54, %fd55;
mov.f64 %fd58, 0d3EC71DE8E6486D6B;
.loc 2 374 10
fma.rn.f64 %fd59, %fd57, %fd54, %fd58;
mov.f64 %fd60, 0d3EFA019A6B2464C5;
.loc 2 374 10
fma.rn.f64 %fd61, %fd59, %fd54, %fd60;
mov.f64 %fd62, 0d3F2A01A0171064A5;
.loc 2 374 10
fma.rn.f64 %fd63, %fd61, %fd54, %fd62;
mov.f64 %fd64, 0d3F56C16C17F29C8D;
.loc 2 374 10
fma.rn.f64 %fd65, %fd63, %fd54, %fd64;
mov.f64 %fd66, 0d3F8111111111A24E;
.loc 2 374 10
fma.rn.f64 %fd67, %fd65, %fd54, %fd66;
mov.f64 %fd68, 0d3FA555555555211D;
.loc 2 374 10
fma.rn.f64 %fd69, %fd67, %fd54, %fd68;
mov.f64 %fd70, 0d3FC5555555555530;
.loc 2 374 10
fma.rn.f64 %fd71, %fd69, %fd54, %fd70;
mov.f64 %fd72, 0d3FE0000000000005;
.loc 2 374 10
fma.rn.f64 %fd73, %fd71, %fd54, %fd72;
mov.f64 %fd74, 0d3FF0000000000000;
.loc 2 374 10
fma.rn.f64 %fd75, %fd73, %fd54, %fd74;
fma.rn.f64 %fd76, %fd75, %fd54, %fd74;
{
.reg .b32 %temp;
mov.b64 {%temp, %r23}, %fd76;
}
shl.b32 %r24, %r22, 20;
add.s32 %r25, %r23, %r24;
{
.reg .b32 %temp;
mov.b64 {%r26, %temp}, %fd76;
}
mov.b64 %fd77, {%r26, %r25};
sub.f64 %fd78, %fd74, %fd77;
{
.reg .b32 %temp;
mov.b64 {%temp, %r27}, %fd2;
}
setp.gt.u32 %p4, %r27, 1075294207;
selp.f64 %fd79, 0d3FF0000000000000, %fd78, %p4;
{
.reg .b32 %temp;
mov.b64 {%r28, %temp}, %fd79;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r29}, %fd79;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r30}, %fd1;
}
and.b32 %r31, %r30, -2147483648;
or.b32 %r32, %r29, %r31;
mov.b64 %fd104, {%r28, %r32};
bra.uni BB14_7;
BB14_6:
.loc 2 374 10
mul.f64 %fd80, %fd1, %fd1;
mov.f64 %fd81, 0d3E4D5F4BB7A316F6;
mov.f64 %fd82, 0dBE0A83AA3B08FBC2;
.loc 2 374 10
fma.rn.f64 %fd83, %fd82, %fd80, %fd81;
mov.f64 %fd84, 0dBE85BDCE301B3CDF;
.loc 2 374 10
fma.rn.f64 %fd85, %fd83, %fd80, %fd84;
mov.f64 %fd86, 0d3EBB978FADB81BC9;
.loc 2 374 10
fma.rn.f64 %fd87, %fd85, %fd80, %fd86;
mov.f64 %fd88, 0dBEEF4C99D6AE5FB8;
.loc 2 374 10
fma.rn.f64 %fd89, %fd87, %fd80, %fd88;
mov.f64 %fd90, 0d3F1F9A2AF549012E;
.loc 2 374 10
fma.rn.f64 %fd91, %fd89, %fd80, %fd90;
mov.f64 %fd92, 0dBF4C02DAFC636A47;
.loc 2 374 10
fma.rn.f64 %fd93, %fd91, %fd80, %fd92;
mov.f64 %fd94, 0d3F7565BCCF619AC0;
.loc 2 374 10
fma.rn.f64 %fd95, %fd93, %fd80, %fd94;
mov.f64 %fd96, 0dBF9B82CE311E321A;
.loc 2 374 10
fma.rn.f64 %fd97, %fd95, %fd80, %fd96;
mov.f64 %fd98, 0d3FBCE2F21A04075C;
.loc 2 374 10
fma.rn.f64 %fd99, %fd97, %fd80, %fd98;
mov.f64 %fd100, 0dBFD812746B0379B4;
.loc 2 374 10
fma.rn.f64 %fd101, %fd99, %fd80, %fd100;
mov.f64 %fd102, 0d3FF20DD750429B6D;
.loc 2 374 10
fma.rn.f64 %fd103, %fd101, %fd80, %fd102;
mul.f64 %fd104, %fd103, %fd1;
BB14_7:
.loc 1 31 42
add.s32 %r33, %r7, %r6;
mul.wide.s32 %rd7, %r33, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 31 42
st.global.f64 [%rd8], %fd104;
.loc 1 31 22
add.s32 %r8, %r3, %r7;
.loc 1 31 1
setp.lt.s32 %p5, %r8, %r10;
mov.u32 %r37, %r8;
@%p5 bra BB14_4;
BB14_8:
.loc 1 31 22
mov.u32 %r34, %nctaid.x;
mad.lo.s32 %r36, %r34, %r14, %r36;
.loc 1 31 1
setp.lt.s32 %p6, %r36, %r11;
@%p6 bra BB14_2;
BB14_9:
.loc 1 31 2
ret;
}
.visible .entry map_erfinv_double(
.param .u32 map_erfinv_double_param_0,
.param .u32 map_erfinv_double_param_1,
.param .u64 map_erfinv_double_param_2,
.param .u32 map_erfinv_double_param_3,
.param .u64 map_erfinv_double_param_4,
.param .u32 map_erfinv_double_param_5
)
{
.reg .pred %p<10>;
.reg .f32 %f<5>;
.reg .s32 %r<34>;
.reg .s64 %rd<9>;
.reg .f64 %fd<175>;
ld.param.u32 %r10, [map_erfinv_double_param_0];
ld.param.u32 %r11, [map_erfinv_double_param_1];
ld.param.u64 %rd3, [map_erfinv_double_param_2];
ld.param.u32 %r12, [map_erfinv_double_param_3];
ld.param.u64 %rd4, [map_erfinv_double_param_4];
ld.param.u32 %r13, [map_erfinv_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 32 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r32, %r14, %r15, %r16;
.loc 1 32 1
setp.ge.s32 %p1, %r32, %r11;
@%p1 bra BB15_16;
.loc 1 32 1
mov.u32 %r17, %tid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %ctaid.y;
mad.lo.s32 %r2, %r18, %r19, %r17;
.loc 1 32 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r3, %r20, %r18;
BB15_2:
.loc 1 32 1
setp.ge.s32 %p2, %r2, %r10;
@%p2 bra BB15_15;
.loc 1 32 1
mul.lo.s32 %r5, %r32, %r13;
.loc 1 32 42
mul.lo.s32 %r6, %r32, %r12;
mov.u32 %r33, %r2;
BB15_4:
.loc 1 32 1
mov.u32 %r7, %r33;
add.s32 %r21, %r7, %r5;
mul.wide.s32 %rd5, %r21, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 32 1
ld.global.f64 %fd1, [%rd6];
.loc 2 384 10
abs.f64 %fd2, %fd1;
setp.lt.f64 %p3, %fd2, 0d3FF0000000000000;
@%p3 bra BB15_8;
setp.gtu.f64 %p4, %fd2, 0d7FF0000000000000;
@%p4 bra BB15_7;
setp.eq.f64 %p5, %fd2, 0d3FF0000000000000;
mul.f64 %fd14, %fd1, 0d7FF0000000000000;
selp.f64 %fd174, %fd14, 0dFFF8000000000000, %p5;
bra.uni BB15_14;
BB15_7:
.loc 2 384 10
add.f64 %fd174, %fd1, %fd1;
bra.uni BB15_14;
BB15_8:
.loc 2 384 10
neg.f64 %fd17, %fd1;
mov.f64 %fd18, 0d3FF0000000000000;
.loc 2 384 10
fma.rn.f64 %fd19, %fd1, %fd17, %fd18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r22}, %fd19;
}
{
.reg .b32 %temp;
mov.b64 {%r23, %temp}, %fd19;
}
shr.u32 %r24, %r22, 20;
and.b32 %r25, %r24, 2046;
add.s32 %r26, %r25, -1022;
cvt.rn.f64.s32 %fd20, %r26;
and.b32 %r27, %r22, -2145386497;
add.s32 %r28, %r27, 1071644672;
mov.b64 %fd21, {%r23, %r28};
add.f64 %fd22, %fd21, 0dBFF0000000000000;
add.f64 %fd15, %fd21, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd15;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd16,%f2;
// inline asm
neg.f64 %fd23, %fd15;
fma.rn.f64 %fd24, %fd23, %fd16, %fd18;
fma.rn.f64 %fd25, %fd24, %fd24, %fd24;
fma.rn.f64 %fd26, %fd25, %fd16, %fd16;
mul.f64 %fd27, %fd22, %fd26;
mov.f64 %fd28, 0dC000000000000000;
.loc 2 384 10
fma.rn.f64 %fd29, %fd28, %fd27, %fd22;
neg.f64 %fd30, %fd27;
fma.rn.f64 %fd31, %fd30, %fd22, %fd29;
fma.rn.f64 %fd32, %fd31, %fd26, %fd27;
mul.f64 %fd33, %fd32, %fd32;
mov.f64 %fd34, 0d3FA55CF59CDC5D89;
mov.f64 %fd35, 0d3FB5C5C218C775C9;
.loc 2 384 10
fma.rn.f64 %fd36, %fd35, %fd33, %fd34;
mov.f64 %fd37, 0d3FAEFD18CF6EBB9C;
.loc 2 384 10
fma.rn.f64 %fd38, %fd36, %fd33, %fd37;
mov.f64 %fd39, 0d3FB10682EDCB8D1B;
.loc 2 384 10
fma.rn.f64 %fd40, %fd38, %fd33, %fd39;
mov.f64 %fd41, 0d3FB3B1DD3AC7FC96;
.loc 2 384 10
fma.rn.f64 %fd42, %fd40, %fd33, %fd41;
mov.f64 %fd43, 0d3FB745CB459B54A6;
.loc 2 384 10
fma.rn.f64 %fd44, %fd42, %fd33, %fd43;
mov.f64 %fd45, 0d3FBC71C741A0669F;
.loc 2 384 10
fma.rn.f64 %fd46, %fd44, %fd33, %fd45;
mov.f64 %fd47, 0d3FC249249209112E;
.loc 2 384 10
fma.rn.f64 %fd48, %fd46, %fd33, %fd47;
mov.f64 %fd49, 0d3FC99999999A06C1;
.loc 2 384 10
fma.rn.f64 %fd50, %fd48, %fd33, %fd49;
mov.f64 %fd51, 0d3FD5555555555535;
.loc 2 384 10
fma.rn.f64 %fd52, %fd50, %fd33, %fd51;
mul.f64 %fd53, %fd52, %fd33;
fma.rn.f64 %fd54, %fd53, %fd32, %fd32;
add.f64 %fd55, %fd54, %fd54;
mov.f64 %fd56, 0d3FE62E42FEFA39EF;
.loc 2 384 10
fma.rn.f64 %fd5, %fd20, %fd56, %fd55;
neg.f64 %fd6, %fd5;
setp.gt.f64 %p6, %fd5, 0dC019000000000000;
@%p6 bra BB15_12;
sqrt.rn.f64 %fd7, %fd6;
setp.lt.f64 %p7, %fd7, 0d4010000000000000;
@%p7 bra BB15_11;
add.f64 %fd57, %fd7, 0dC014000000000000;
mov.f64 %fd58, 0dBDF18FEEC0E38727;
mov.f64 %fd59, 0dBDBDCEC3A7785389;
.loc 2 384 10
fma.rn.f64 %fd60, %fd59, %fd57, %fd58;
mov.f64 %fd61, 0d3E19E6BF2DDA45E3;
.loc 2 384 10
fma.rn.f64 %fd62, %fd60, %fd57, %fd61;
mov.f64 %fd63, 0dBE30468FB24E2F5F;
.loc 2 384 10
fma.rn.f64 %fd64, %fd62, %fd57, %fd63;
mov.f64 %fd65, 0d3E405AC6A8FBA182;
.loc 2 384 10
fma.rn.f64 %fd66, %fd64, %fd57, %fd65;
mov.f64 %fd67, 0dBE50102E495FB9C0;
.loc 2 384 10
fma.rn.f64 %fd68, %fd66, %fd57, %fd67;
mov.f64 %fd69, 0d3E5F4C20E1334AF8;
.loc 2 384 10
fma.rn.f64 %fd70, %fd68, %fd57, %fd69;
mov.f64 %fd71, 0dBE722D220FDF9C3E;
.loc 2 384 10
fma.rn.f64 %fd72, %fd70, %fd57, %fd71;
mov.f64 %fd73, 0d3E8EBC8BB824CB54;
.loc 2 384 10
fma.rn.f64 %fd74, %fd72, %fd57, %fd73;
mov.f64 %fd75, 0dBEB0A8D40EA372CC;
.loc 2 384 10
fma.rn.f64 %fd76, %fd74, %fd57, %fd75;
mov.f64 %fd77, 0d3ED2FBD29D093D2B;
.loc 2 384 10
fma.rn.f64 %fd78, %fd76, %fd57, %fd77;
mov.f64 %fd79, 0dBEF4A3497E1E0FAC;
.loc 2 384 10
fma.rn.f64 %fd80, %fd78, %fd57, %fd79;
mov.f64 %fd81, 0d3F13EBF4EB00938F;
.loc 2 384 10
fma.rn.f64 %fd82, %fd80, %fd57, %fd81;
mov.f64 %fd83, 0dBF2C2F36A8FC5D53;
.loc 2 384 10
fma.rn.f64 %fd84, %fd82, %fd57, %fd83;
mov.f64 %fd85, 0dBF222EA5DF04047C;
.loc 2 384 10
fma.rn.f64 %fd86, %fd84, %fd57, %fd85;
mov.f64 %fd87, 0d3FF02A30D1FBA0DC;
.loc 2 384 10
fma.rn.f64 %fd88, %fd86, %fd57, %fd87;
mov.f64 %fd89, 0d4013664DDD1AD7FB;
.loc 2 384 10
fma.rn.f64 %fd173, %fd88, %fd57, %fd89;
bra.uni BB15_13;
BB15_11:
.loc 2 384 10
add.f64 %fd90, %fd7, 0dC00A000000000000;
mov.f64 %fd91, 0d3E785CBE52878635;
mov.f64 %fd92, 0d3E23040F87DBD932;
.loc 2 384 10
fma.rn.f64 %fd93, %fd92, %fd90, %fd91;
mov.f64 %fd94, 0dBE92777453DD3955;
.loc 2 384 10
fma.rn.f64 %fd95, %fd93, %fd90, %fd94;
mov.f64 %fd96, 0d3E5395ABCD554C6C;
.loc 2 384 10
fma.rn.f64 %fd97, %fd95, %fd90, %fd96;
mov.f64 %fd98, 0d3EB936388A3790AD;
.loc 2 384 10
fma.rn.f64 %fd99, %fd97, %fd90, %fd98;
mov.f64 %fd100, 0dBED0D5DB812B5083;
.loc 2 384 10
fma.rn.f64 %fd101, %fd99, %fd90, %fd100;
mov.f64 %fd102, 0d3EC8860CD5D652F6;
.loc 2 384 10
fma.rn.f64 %fd103, %fd101, %fd90, %fd102;
mov.f64 %fd104, 0d3EEA29A0CACDFB23;
.loc 2 384 10
fma.rn.f64 %fd105, %fd103, %fd90, %fd104;
mov.f64 %fd106, 0dBF08CEF1F80281F2;
.loc 2 384 10
fma.rn.f64 %fd107, %fd105, %fd90, %fd106;
mov.f64 %fd108, 0d3F11E684D0B9188A;
.loc 2 384 10
fma.rn.f64 %fd109, %fd107, %fd90, %fd108;
mov.f64 %fd110, 0d3EF932CD54C8A222;
.loc 2 384 10
fma.rn.f64 %fd111, %fd109, %fd90, %fd110;
mov.f64 %fd112, 0dBF37448A89EF8AA3;
.loc 2 384 10
fma.rn.f64 %fd113, %fd111, %fd90, %fd112;
mov.f64 %fd114, 0d3F4F3CC55AD40C25;
.loc 2 384 10
fma.rn.f64 %fd115, %fd113, %fd90, %fd114;
mov.f64 %fd116, 0dBF5BA924132F38B1;
.loc 2 384 10
fma.rn.f64 %fd117, %fd115, %fd90, %fd116;
mov.f64 %fd118, 0d3F6468EECA533CF8;
.loc 2 384 10
fma.rn.f64 %fd119, %fd117, %fd90, %fd118;
mov.f64 %fd120, 0dBF6EBADABB891BBD;
.loc 2 384 10
fma.rn.f64 %fd121, %fd119, %fd90, %fd120;
mov.f64 %fd122, 0d3F75FFCFE5B76AFC;
.loc 2 384 10
fma.rn.f64 %fd123, %fd121, %fd90, %fd122;
mov.f64 %fd124, 0d3FF0158A6D641D39;
.loc 2 384 10
fma.rn.f64 %fd125, %fd123, %fd90, %fd124;
mov.f64 %fd126, 0d4008ABCC380D5A48;
.loc 2 384 10
fma.rn.f64 %fd173, %fd125, %fd90, %fd126;
bra.uni BB15_13;
BB15_12:
mov.f64 %fd127, 0dC009000000000000;
.loc 2 384 10
sub.f64 %fd128, %fd127, %fd5;
mov.f64 %fd129, 0dBC08DDF93324D327;
mov.f64 %fd130, 0dBBB135D2E746E627;
.loc 2 384 10
fma.rn.f64 %fd131, %fd130, %fd128, %fd129;
mov.f64 %fd132, 0d3C37B83EEF0B7C9F;
.loc 2 384 10
fma.rn.f64 %fd133, %fd131, %fd128, %fd132;
mov.f64 %fd134, 0d3C69BA72CD589B91;
.loc 2 384 10
fma.rn.f64 %fd135, %fd133, %fd128, %fd134;
mov.f64 %fd136, 0dBCA33689090A6B96;
.loc 2 384 10
fma.rn.f64 %fd137, %fd135, %fd128, %fd136;
mov.f64 %fd138, 0d3C782E11898132E0;
.loc 2 384 10
fma.rn.f64 %fd139, %fd137, %fd128, %fd138;
mov.f64 %fd140, 0d3CFDE4ACFD9E26BA;
.loc 2 384 10
fma.rn.f64 %fd141, %fd139, %fd128, %fd140;
mov.f64 %fd142, 0dBD26D33EED66C487;
.loc 2 384 10
fma.rn.f64 %fd143, %fd141, %fd128, %fd142;
mov.f64 %fd144, 0dBD36F2167040D8E2;
.loc 2 384 10
fma.rn.f64 %fd145, %fd143, %fd128, %fd144;
mov.f64 %fd146, 0d3D872A22C2D77E20;
.loc 2 384 10
fma.rn.f64 %fd147, %fd145, %fd128, %fd146;
mov.f64 %fd148, 0dBDAC8859C4E5C0AF;
.loc 2 384 10
fma.rn.f64 %fd149, %fd147, %fd128, %fd148;
mov.f64 %fd150, 0dBDCDC583D118A561;
.loc 2 384 10
fma.rn.f64 %fd151, %fd149, %fd128, %fd150;
mov.f64 %fd152, 0d3E120F47CCF46B3C;
.loc 2 384 10
fma.rn.f64 %fd153, %fd151, %fd128, %fd152;
mov.f64 %fd154, 0dBE31A9E38DC84D60;
.loc 2 384 10
fma.rn.f64 %fd155, %fd153, %fd128, %fd154;
mov.f64 %fd156, 0dBE5F36CD6D3D46A9;
.loc 2 384 10
fma.rn.f64 %fd157, %fd155, %fd128, %fd156;
mov.f64 %fd158, 0d3E9C6B4F5D03B787;
.loc 2 384 10
fma.rn.f64 %fd159, %fd157, %fd128, %fd158;
mov.f64 %fd160, 0dBEB6E8A5434AE8A2;
.loc 2 384 10
fma.rn.f64 %fd161, %fd159, %fd128, %fd160;
mov.f64 %fd162, 0dBEED1D1F7B8736F6;
.loc 2 384 10
fma.rn.f64 %fd163, %fd161, %fd128, %fd162;
mov.f64 %fd164, 0d3F2879C2A212F024;
.loc 2 384 10
fma.rn.f64 %fd165, %fd163, %fd128, %fd164;
mov.f64 %fd166, 0dBF4845769484FCA8;
.loc 2 384 10
fma.rn.f64 %fd167, %fd165, %fd128, %fd166;
mov.f64 %fd168, 0dBF78B6C33114F909;
.loc 2 384 10
fma.rn.f64 %fd169, %fd167, %fd128, %fd168;
mov.f64 %fd170, 0d3FCEBD80D9B13E28;
.loc 2 384 10
fma.rn.f64 %fd171, %fd169, %fd128, %fd170;
mov.f64 %fd172, 0d3FFA755E7C99AE86;
.loc 2 384 10
fma.rn.f64 %fd173, %fd171, %fd128, %fd172;
BB15_13:
mul.f64 %fd174, %fd173, %fd1;
BB15_14:
.loc 1 32 42
add.s32 %r29, %r7, %r6;
mul.wide.s32 %rd7, %r29, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 32 42
st.global.f64 [%rd8], %fd174;
.loc 1 32 22
add.s32 %r8, %r3, %r7;
.loc 1 32 1
setp.lt.s32 %p8, %r8, %r10;
mov.u32 %r33, %r8;
@%p8 bra BB15_4;
BB15_15:
.loc 1 32 22
mov.u32 %r30, %nctaid.x;
mad.lo.s32 %r32, %r30, %r14, %r32;
.loc 1 32 1
setp.lt.s32 %p9, %r32, %r11;
@%p9 bra BB15_2;
BB15_16:
.loc 1 32 2
ret;
}
.visible .entry map_exp10_double(
.param .u32 map_exp10_double_param_0,
.param .u32 map_exp10_double_param_1,
.param .u64 map_exp10_double_param_2,
.param .u32 map_exp10_double_param_3,
.param .u64 map_exp10_double_param_4,
.param .u32 map_exp10_double_param_5
)
{
.reg .pred %p<11>;
.reg .s32 %r<41>;
.reg .s64 %rd<9>;
.reg .f64 %fd<45>;
ld.param.u32 %r15, [map_exp10_double_param_0];
ld.param.u32 %r16, [map_exp10_double_param_1];
ld.param.u64 %rd3, [map_exp10_double_param_2];
ld.param.u32 %r17, [map_exp10_double_param_3];
ld.param.u64 %rd4, [map_exp10_double_param_4];
ld.param.u32 %r18, [map_exp10_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 33 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r38, %r19, %r20, %r21;
.loc 1 33 1
setp.ge.s32 %p1, %r38, %r16;
@%p1 bra BB16_12;
.loc 1 33 1
mov.u32 %r22, %tid.y;
mov.u32 %r23, %ntid.y;
mov.u32 %r24, %ctaid.y;
mad.lo.s32 %r2, %r23, %r24, %r22;
.loc 1 33 22
mov.u32 %r25, %nctaid.y;
mul.lo.s32 %r3, %r25, %r23;
BB16_2:
.loc 1 33 1
setp.ge.s32 %p2, %r2, %r15;
@%p2 bra BB16_11;
.loc 1 33 1
mul.lo.s32 %r5, %r38, %r18;
.loc 1 33 42
mul.lo.s32 %r6, %r38, %r17;
mov.u32 %r39, %r2;
BB16_4:
.loc 1 33 1
mov.u32 %r7, %r39;
add.s32 %r26, %r7, %r5;
mul.wide.s32 %rd5, %r26, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 33 1
ld.global.f64 %fd1, [%rd6];
.loc 2 253 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
setp.lt.u32 %p3, %r8, 1081295892;
setp.lt.s32 %p4, %r8, -1066124872;
or.pred %p5, %p3, %p4;
@%p5 bra BB16_6;
setp.lt.s32 %p6, %r8, 0;
selp.f64 %fd8, 0d0000000000000000, 0d7FF0000000000000, %p6;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p7, %fd9, 0d7FF0000000000000;
add.f64 %fd10, %fd1, %fd1;
selp.f64 %fd44, %fd10, %fd8, %p7;
bra.uni BB16_10;
BB16_6:
.loc 2 253 10
mul.f64 %fd11, %fd1, 0d400A934F0979A371;
cvt.rni.f64.f64 %fd12, %fd11;
cvt.rzi.s32.f64 %r9, %fd12;
mov.f64 %fd13, 0dBFD34413509F79FF;
.loc 2 253 10
fma.rn.f64 %fd14, %fd12, %fd13, %fd1;
mov.f64 %fd15, 0d3C49DC1DA994FD21;
.loc 2 253 10
fma.rn.f64 %fd16, %fd12, %fd15, %fd14;
mul.f64 %fd17, %fd16, 0dBCAF48AD494EA3E9;
mov.f64 %fd18, 0d40026BB1BBB55516;
.loc 2 253 10
fma.rn.f64 %fd19, %fd16, %fd18, %fd17;
mov.f64 %fd20, 0d3E928A27E30F5561;
mov.f64 %fd21, 0d3E5AE6449C0686C0;
.loc 2 253 10
fma.rn.f64 %fd22, %fd21, %fd19, %fd20;
mov.f64 %fd23, 0d3EC71DE8E6486D6B;
.loc 2 253 10
fma.rn.f64 %fd24, %fd22, %fd19, %fd23;
mov.f64 %fd25, 0d3EFA019A6B2464C5;
.loc 2 253 10
fma.rn.f64 %fd26, %fd24, %fd19, %fd25;
mov.f64 %fd27, 0d3F2A01A0171064A5;
.loc 2 253 10
fma.rn.f64 %fd28, %fd26, %fd19, %fd27;
mov.f64 %fd29, 0d3F56C16C17F29C8D;
.loc 2 253 10
fma.rn.f64 %fd30, %fd28, %fd19, %fd29;
mov.f64 %fd31, 0d3F8111111111A24E;
.loc 2 253 10
fma.rn.f64 %fd32, %fd30, %fd19, %fd31;
mov.f64 %fd33, 0d3FA555555555211D;
.loc 2 253 10
fma.rn.f64 %fd34, %fd32, %fd19, %fd33;
mov.f64 %fd35, 0d3FC5555555555530;
.loc 2 253 10
fma.rn.f64 %fd36, %fd34, %fd19, %fd35;
mov.f64 %fd37, 0d3FE0000000000005;
.loc 2 253 10
fma.rn.f64 %fd38, %fd36, %fd19, %fd37;
mov.f64 %fd39, 0d3FF0000000000000;
.loc 2 253 10
fma.rn.f64 %fd40, %fd38, %fd19, %fd39;
fma.rn.f64 %fd43, %fd40, %fd19, %fd39;
abs.s32 %r27, %r9;
setp.lt.s32 %p8, %r27, 1023;
@%p8 bra BB16_8;
add.s32 %r28, %r9, 2046;
shl.b32 %r29, %r28, 19;
and.b32 %r30, %r29, -1048576;
shl.b32 %r31, %r28, 20;
sub.s32 %r40, %r31, %r30;
mov.u32 %r32, 0;
.loc 2 253 10
mov.b64 %fd41, {%r32, %r30};
mul.f64 %fd43, %fd43, %fd41;
bra.uni BB16_9;
BB16_8:
.loc 2 253 10
shl.b32 %r33, %r9, 20;
add.s32 %r40, %r33, 1072693248;
BB16_9:
mov.u32 %r34, 0;
.loc 2 253 10
mov.b64 %fd42, {%r34, %r40};
mul.f64 %fd44, %fd43, %fd42;
BB16_10:
.loc 1 33 42
add.s32 %r35, %r7, %r6;
mul.wide.s32 %rd7, %r35, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 33 42
st.global.f64 [%rd8], %fd44;
.loc 1 33 22
add.s32 %r13, %r3, %r7;
.loc 1 33 1
setp.lt.s32 %p9, %r13, %r15;
mov.u32 %r39, %r13;
@%p9 bra BB16_4;
BB16_11:
.loc 1 33 22
mov.u32 %r36, %nctaid.x;
mad.lo.s32 %r38, %r36, %r19, %r38;
.loc 1 33 1
setp.lt.s32 %p10, %r38, %r16;
@%p10 bra BB16_2;
BB16_12:
.loc 1 33 2
ret;
}
.visible .entry map_exp2_double(
.param .u32 map_exp2_double_param_0,
.param .u32 map_exp2_double_param_1,
.param .u64 map_exp2_double_param_2,
.param .u32 map_exp2_double_param_3,
.param .u64 map_exp2_double_param_4,
.param .u32 map_exp2_double_param_5
)
{
.reg .pred %p<11>;
.reg .s32 %r<41>;
.reg .s64 %rd<9>;
.reg .f64 %fd<41>;
ld.param.u32 %r15, [map_exp2_double_param_0];
ld.param.u32 %r16, [map_exp2_double_param_1];
ld.param.u64 %rd3, [map_exp2_double_param_2];
ld.param.u32 %r17, [map_exp2_double_param_3];
ld.param.u64 %rd4, [map_exp2_double_param_4];
ld.param.u32 %r18, [map_exp2_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 34 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r38, %r19, %r20, %r21;
.loc 1 34 1
setp.ge.s32 %p1, %r38, %r16;
@%p1 bra BB17_12;
.loc 1 34 1
mov.u32 %r22, %tid.y;
mov.u32 %r23, %ntid.y;
mov.u32 %r24, %ctaid.y;
mad.lo.s32 %r2, %r23, %r24, %r22;
.loc 1 34 22
mov.u32 %r25, %nctaid.y;
mul.lo.s32 %r3, %r25, %r23;
BB17_2:
.loc 1 34 1
setp.ge.s32 %p2, %r2, %r15;
@%p2 bra BB17_11;
.loc 1 34 1
mul.lo.s32 %r5, %r38, %r18;
.loc 1 34 42
mul.lo.s32 %r6, %r38, %r17;
mov.u32 %r39, %r2;
BB17_4:
.loc 1 34 1
mov.u32 %r7, %r39;
add.s32 %r26, %r7, %r5;
mul.wide.s32 %rd5, %r26, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 34 1
ld.global.f64 %fd1, [%rd6];
.loc 2 248 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
setp.lt.u32 %p3, %r8, 1083179008;
setp.lt.s32 %p4, %r8, -1064252416;
or.pred %p5, %p3, %p4;
@%p5 bra BB17_6;
setp.lt.s32 %p6, %r8, 0;
selp.f64 %fd8, 0d0000000000000000, 0d7FF0000000000000, %p6;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p7, %fd9, 0d7FF0000000000000;
add.f64 %fd10, %fd1, %fd1;
selp.f64 %fd40, %fd10, %fd8, %p7;
bra.uni BB17_10;
BB17_6:
.loc 2 248 10
cvt.rni.f64.f64 %fd11, %fd1;
sub.f64 %fd12, %fd1, %fd11;
cvt.rzi.s32.f64 %r9, %fd11;
mul.f64 %fd13, %fd12, 0d3C7ABC9E3B39803F;
mov.f64 %fd14, 0d3FE62E42FEFA39EF;
.loc 2 248 10
fma.rn.f64 %fd15, %fd12, %fd14, %fd13;
mov.f64 %fd16, 0d3E928A27E30F5561;
mov.f64 %fd17, 0d3E5AE6449C0686C0;
.loc 2 248 10
fma.rn.f64 %fd18, %fd17, %fd15, %fd16;
mov.f64 %fd19, 0d3EC71DE8E6486D6B;
.loc 2 248 10
fma.rn.f64 %fd20, %fd18, %fd15, %fd19;
mov.f64 %fd21, 0d3EFA019A6B2464C5;
.loc 2 248 10
fma.rn.f64 %fd22, %fd20, %fd15, %fd21;
mov.f64 %fd23, 0d3F2A01A0171064A5;
.loc 2 248 10
fma.rn.f64 %fd24, %fd22, %fd15, %fd23;
mov.f64 %fd25, 0d3F56C16C17F29C8D;
.loc 2 248 10
fma.rn.f64 %fd26, %fd24, %fd15, %fd25;
mov.f64 %fd27, 0d3F8111111111A24E;
.loc 2 248 10
fma.rn.f64 %fd28, %fd26, %fd15, %fd27;
mov.f64 %fd29, 0d3FA555555555211D;
.loc 2 248 10
fma.rn.f64 %fd30, %fd28, %fd15, %fd29;
mov.f64 %fd31, 0d3FC5555555555530;
.loc 2 248 10
fma.rn.f64 %fd32, %fd30, %fd15, %fd31;
mov.f64 %fd33, 0d3FE0000000000005;
.loc 2 248 10
fma.rn.f64 %fd34, %fd32, %fd15, %fd33;
mov.f64 %fd35, 0d3FF0000000000000;
.loc 2 248 10
fma.rn.f64 %fd36, %fd34, %fd15, %fd35;
fma.rn.f64 %fd39, %fd36, %fd15, %fd35;
abs.s32 %r27, %r9;
setp.lt.s32 %p8, %r27, 1023;
@%p8 bra BB17_8;
add.s32 %r28, %r9, 2046;
shl.b32 %r29, %r28, 19;
and.b32 %r30, %r29, -1048576;
shl.b32 %r31, %r28, 20;
sub.s32 %r40, %r31, %r30;
mov.u32 %r32, 0;
.loc 2 248 10
mov.b64 %fd37, {%r32, %r30};
mul.f64 %fd39, %fd39, %fd37;
bra.uni BB17_9;
BB17_8:
.loc 2 248 10
shl.b32 %r33, %r9, 20;
add.s32 %r40, %r33, 1072693248;
BB17_9:
mov.u32 %r34, 0;
.loc 2 248 10
mov.b64 %fd38, {%r34, %r40};
mul.f64 %fd40, %fd39, %fd38;
BB17_10:
.loc 1 34 42
add.s32 %r35, %r7, %r6;
mul.wide.s32 %rd7, %r35, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 34 42
st.global.f64 [%rd8], %fd40;
.loc 1 34 22
add.s32 %r13, %r3, %r7;
.loc 1 34 1
setp.lt.s32 %p9, %r13, %r15;
mov.u32 %r39, %r13;
@%p9 bra BB17_4;
BB17_11:
.loc 1 34 22
mov.u32 %r36, %nctaid.x;
mad.lo.s32 %r38, %r36, %r19, %r38;
.loc 1 34 1
setp.lt.s32 %p10, %r38, %r16;
@%p10 bra BB17_2;
BB17_12:
.loc 1 34 2
ret;
}
.visible .entry map_exp_double(
.param .u32 map_exp_double_param_0,
.param .u32 map_exp_double_param_1,
.param .u64 map_exp_double_param_2,
.param .u32 map_exp_double_param_3,
.param .u64 map_exp_double_param_4,
.param .u32 map_exp_double_param_5
)
{
.reg .pred %p<11>;
.reg .s32 %r<41>;
.reg .s64 %rd<9>;
.reg .f64 %fd<42>;
ld.param.u32 %r15, [map_exp_double_param_0];
ld.param.u32 %r16, [map_exp_double_param_1];
ld.param.u64 %rd3, [map_exp_double_param_2];
ld.param.u32 %r17, [map_exp_double_param_3];
ld.param.u64 %rd4, [map_exp_double_param_4];
ld.param.u32 %r18, [map_exp_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 35 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r38, %r19, %r20, %r21;
.loc 1 35 1
setp.ge.s32 %p1, %r38, %r16;
@%p1 bra BB18_12;
.loc 1 35 1
mov.u32 %r22, %tid.y;
mov.u32 %r23, %ntid.y;
mov.u32 %r24, %ctaid.y;
mad.lo.s32 %r2, %r23, %r24, %r22;
.loc 1 35 22
mov.u32 %r25, %nctaid.y;
mul.lo.s32 %r3, %r25, %r23;
BB18_2:
.loc 1 35 1
setp.ge.s32 %p2, %r2, %r15;
@%p2 bra BB18_11;
.loc 1 35 1
mul.lo.s32 %r5, %r38, %r18;
.loc 1 35 42
mul.lo.s32 %r6, %r38, %r17;
mov.u32 %r39, %r2;
BB18_4:
.loc 1 35 1
mov.u32 %r7, %r39;
add.s32 %r26, %r7, %r5;
mul.wide.s32 %rd5, %r26, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 35 1
ld.global.f64 %fd1, [%rd6];
.loc 2 243 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
setp.lt.u32 %p3, %r8, 1082535491;
setp.lt.s32 %p4, %r8, -1064875759;
or.pred %p5, %p3, %p4;
@%p5 bra BB18_6;
setp.lt.s32 %p6, %r8, 0;
selp.f64 %fd8, 0d0000000000000000, 0d7FF0000000000000, %p6;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p7, %fd9, 0d7FF0000000000000;
add.f64 %fd10, %fd1, %fd1;
selp.f64 %fd41, %fd10, %fd8, %p7;
bra.uni BB18_10;
BB18_6:
.loc 2 243 10
mul.f64 %fd11, %fd1, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd12, %fd11;
cvt.rzi.s32.f64 %r9, %fd12;
mov.f64 %fd13, 0dBFE62E42FEFA39EF;
.loc 2 243 10
fma.rn.f64 %fd14, %fd12, %fd13, %fd1;
mov.f64 %fd15, 0dBC7ABC9E3B39803F;
.loc 2 243 10
fma.rn.f64 %fd16, %fd12, %fd15, %fd14;
mov.f64 %fd17, 0d3E928A27E30F5561;
mov.f64 %fd18, 0d3E5AE6449C0686C0;
.loc 2 243 10
fma.rn.f64 %fd19, %fd18, %fd16, %fd17;
mov.f64 %fd20, 0d3EC71DE8E6486D6B;
.loc 2 243 10
fma.rn.f64 %fd21, %fd19, %fd16, %fd20;
mov.f64 %fd22, 0d3EFA019A6B2464C5;
.loc 2 243 10
fma.rn.f64 %fd23, %fd21, %fd16, %fd22;
mov.f64 %fd24, 0d3F2A01A0171064A5;
.loc 2 243 10
fma.rn.f64 %fd25, %fd23, %fd16, %fd24;
mov.f64 %fd26, 0d3F56C16C17F29C8D;
.loc 2 243 10
fma.rn.f64 %fd27, %fd25, %fd16, %fd26;
mov.f64 %fd28, 0d3F8111111111A24E;
.loc 2 243 10
fma.rn.f64 %fd29, %fd27, %fd16, %fd28;
mov.f64 %fd30, 0d3FA555555555211D;
.loc 2 243 10
fma.rn.f64 %fd31, %fd29, %fd16, %fd30;
mov.f64 %fd32, 0d3FC5555555555530;
.loc 2 243 10
fma.rn.f64 %fd33, %fd31, %fd16, %fd32;
mov.f64 %fd34, 0d3FE0000000000005;
.loc 2 243 10
fma.rn.f64 %fd35, %fd33, %fd16, %fd34;
mov.f64 %fd36, 0d3FF0000000000000;
.loc 2 243 10
fma.rn.f64 %fd37, %fd35, %fd16, %fd36;
fma.rn.f64 %fd40, %fd37, %fd16, %fd36;
abs.s32 %r27, %r9;
setp.lt.s32 %p8, %r27, 1023;
@%p8 bra BB18_8;
add.s32 %r28, %r9, 2046;
shl.b32 %r29, %r28, 19;
and.b32 %r30, %r29, -1048576;
shl.b32 %r31, %r28, 20;
sub.s32 %r40, %r31, %r30;
mov.u32 %r32, 0;
.loc 2 243 10
mov.b64 %fd38, {%r32, %r30};
mul.f64 %fd40, %fd40, %fd38;
bra.uni BB18_9;
BB18_8:
.loc 2 243 10
shl.b32 %r33, %r9, 20;
add.s32 %r40, %r33, 1072693248;
BB18_9:
mov.u32 %r34, 0;
.loc 2 243 10
mov.b64 %fd39, {%r34, %r40};
mul.f64 %fd41, %fd40, %fd39;
BB18_10:
.loc 1 35 42
add.s32 %r35, %r7, %r6;
mul.wide.s32 %rd7, %r35, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 35 42
st.global.f64 [%rd8], %fd41;
.loc 1 35 22
add.s32 %r13, %r3, %r7;
.loc 1 35 1
setp.lt.s32 %p9, %r13, %r15;
mov.u32 %r39, %r13;
@%p9 bra BB18_4;
BB18_11:
.loc 1 35 22
mov.u32 %r36, %nctaid.x;
mad.lo.s32 %r38, %r36, %r19, %r38;
.loc 1 35 1
setp.lt.s32 %p10, %r38, %r16;
@%p10 bra BB18_2;
BB18_12:
.loc 1 35 2
ret;
}
.visible .entry map_expm1_double(
.param .u32 map_expm1_double_param_0,
.param .u32 map_expm1_double_param_1,
.param .u64 map_expm1_double_param_2,
.param .u32 map_expm1_double_param_3,
.param .u64 map_expm1_double_param_4,
.param .u32 map_expm1_double_param_5
)
{
.reg .pred %p<13>;
.reg .s32 %r<37>;
.reg .s64 %rd<9>;
.reg .f64 %fd<45>;
ld.param.u32 %r11, [map_expm1_double_param_0];
ld.param.u32 %r12, [map_expm1_double_param_1];
ld.param.u64 %rd3, [map_expm1_double_param_2];
ld.param.u32 %r13, [map_expm1_double_param_3];
ld.param.u64 %rd4, [map_expm1_double_param_4];
ld.param.u32 %r14, [map_expm1_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 36 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r35, %r15, %r16, %r17;
.loc 1 36 1
setp.ge.s32 %p1, %r35, %r12;
@%p1 bra BB19_9;
.loc 1 36 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r2, %r19, %r20, %r18;
.loc 1 36 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r3, %r21, %r19;
BB19_2:
.loc 1 36 1
setp.ge.s32 %p2, %r2, %r11;
@%p2 bra BB19_8;
.loc 1 36 1
mul.lo.s32 %r5, %r35, %r14;
.loc 1 36 42
mul.lo.s32 %r6, %r35, %r13;
mov.u32 %r36, %r2;
BB19_4:
.loc 1 36 1
mov.u32 %r7, %r36;
add.s32 %r22, %r7, %r5;
mul.wide.s32 %rd5, %r22, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 36 1
ld.global.f64 %fd1, [%rd6];
.loc 2 258 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
setp.lt.u32 %p3, %r8, 1082535491;
setp.lt.s32 %p4, %r8, -1068859392;
or.pred %p5, %p3, %p4;
@%p5 bra BB19_6;
setp.lt.s32 %p6, %r8, 0;
selp.f64 %fd5, 0dBFF0000000000000, 0d7FF0000000000000, %p6;
abs.f64 %fd6, %fd1;
setp.gtu.f64 %p7, %fd6, 0d7FF0000000000000;
add.f64 %fd7, %fd1, %fd1;
selp.f64 %fd44, %fd7, %fd5, %p7;
bra.uni BB19_7;
BB19_6:
.loc 2 258 10
mul.f64 %fd8, %fd1, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd9, %fd8;
cvt.rzi.s32.f64 %r23, %fd9;
mov.f64 %fd10, 0dBFE62E42FEFA39EF;
.loc 2 258 10
fma.rn.f64 %fd11, %fd9, %fd10, %fd1;
mov.f64 %fd12, 0dBC7ABC9E3B39803F;
.loc 2 258 10
fma.rn.f64 %fd13, %fd9, %fd12, %fd11;
shl.b32 %r24, %r8, 1;
setp.lt.u32 %p8, %r24, 2142496327;
selp.b32 %r25, 0, %r23, %p8;
mov.u32 %r26, 0;
.loc 2 258 10
selp.f64 %fd14, %fd1, %fd13, %p8;
mov.f64 %fd15, 0d3E5AF86D8EBD13CD;
mov.f64 %fd16, 0d3E21F4076ACD15B6;
.loc 2 258 10
fma.rn.f64 %fd17, %fd16, %fd14, %fd15;
mov.f64 %fd18, 0d3E927E5092BA033D;
.loc 2 258 10
fma.rn.f64 %fd19, %fd17, %fd14, %fd18;
mov.f64 %fd20, 0d3EC71DDE6C5F9DA1;
.loc 2 258 10
fma.rn.f64 %fd21, %fd19, %fd14, %fd20;
mov.f64 %fd22, 0d3EFA01A018D034E6;
.loc 2 258 10
fma.rn.f64 %fd23, %fd21, %fd14, %fd22;
mov.f64 %fd24, 0d3F2A01A01B3B6940;
.loc 2 258 10
fma.rn.f64 %fd25, %fd23, %fd14, %fd24;
mov.f64 %fd26, 0d3F56C16C16C1B5DD;
.loc 2 258 10
fma.rn.f64 %fd27, %fd25, %fd14, %fd26;
mov.f64 %fd28, 0d3F8111111110F74D;
.loc 2 258 10
fma.rn.f64 %fd29, %fd27, %fd14, %fd28;
mov.f64 %fd30, 0d3FA555555555554D;
.loc 2 258 10
fma.rn.f64 %fd31, %fd29, %fd14, %fd30;
mov.f64 %fd32, 0d3FC5555555555557;
.loc 2 258 10
fma.rn.f64 %fd33, %fd31, %fd14, %fd32;
mov.f64 %fd34, 0d3FE0000000000000;
.loc 2 258 10
fma.rn.f64 %fd35, %fd33, %fd14, %fd34;
mul.f64 %fd36, %fd35, %fd14;
fma.rn.f64 %fd37, %fd36, %fd14, %fd14;
setp.eq.s32 %p9, %r25, 1024;
selp.b32 %r27, -1, 0, %p9;
add.s32 %r28, %r27, %r25;
shl.b32 %r29, %r28, 20;
add.s32 %r30, %r29, 1072693248;
mov.u32 %r31, 1072693248;
.loc 2 258 10
mov.b64 %fd38, {%r26, %r30};
mov.b64 %fd39, {%r26, %r31};
sub.f64 %fd40, %fd38, %fd39;
fma.rn.f64 %fd41, %fd37, %fd38, %fd40;
add.f64 %fd42, %fd41, %fd41;
selp.f64 %fd43, %fd42, %fd41, %p9;
setp.eq.s32 %p10, %r24, 0;
selp.f64 %fd44, %fd14, %fd43, %p10;
BB19_7:
.loc 1 36 42
add.s32 %r32, %r7, %r6;
mul.wide.s32 %rd7, %r32, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 36 42
st.global.f64 [%rd8], %fd44;
.loc 1 36 22
add.s32 %r9, %r3, %r7;
.loc 1 36 1
setp.lt.s32 %p11, %r9, %r11;
mov.u32 %r36, %r9;
@%p11 bra BB19_4;
BB19_8:
.loc 1 36 22
mov.u32 %r33, %nctaid.x;
mad.lo.s32 %r35, %r33, %r15, %r35;
.loc 1 36 1
setp.lt.s32 %p12, %r35, %r12;
@%p12 bra BB19_2;
BB19_9:
.loc 1 36 2
ret;
}
.visible .entry map_fabs_double(
.param .u32 map_fabs_double_param_0,
.param .u32 map_fabs_double_param_1,
.param .u64 map_fabs_double_param_2,
.param .u32 map_fabs_double_param_3,
.param .u64 map_fabs_double_param_4,
.param .u32 map_fabs_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_fabs_double_param_0];
ld.param.u32 %r13, [map_fabs_double_param_1];
ld.param.u64 %rd3, [map_fabs_double_param_2];
ld.param.u32 %r14, [map_fabs_double_param_3];
ld.param.u64 %rd4, [map_fabs_double_param_4];
ld.param.u32 %r15, [map_fabs_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 37 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 37 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB20_6;
.loc 1 37 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 37 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 37 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB20_2:
.loc 1 37 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB20_5;
.loc 1 37 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 37 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB20_4:
.loc 1 37 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 37 1
ld.global.f64 %fd1, [%rd6];
.loc 3 2755 10
abs.f64 %fd2, %fd1;
.loc 1 37 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 37 42
st.global.f64 [%rd8], %fd2;
.loc 1 37 22
add.s32 %r10, %r5, %r9;
.loc 1 37 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB20_4;
BB20_5:
.loc 1 37 22
add.s32 %r25, %r4, %r25;
.loc 1 37 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB20_2;
BB20_6:
.loc 1 37 2
ret;
}
.visible .entry map_floor_double(
.param .u32 map_floor_double_param_0,
.param .u32 map_floor_double_param_1,
.param .u64 map_floor_double_param_2,
.param .u32 map_floor_double_param_3,
.param .u64 map_floor_double_param_4,
.param .u32 map_floor_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_floor_double_param_0];
ld.param.u32 %r13, [map_floor_double_param_1];
ld.param.u64 %rd3, [map_floor_double_param_2];
ld.param.u32 %r14, [map_floor_double_param_3];
ld.param.u64 %rd4, [map_floor_double_param_4];
ld.param.u32 %r15, [map_floor_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 38 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 38 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB21_6;
.loc 1 38 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 38 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 38 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB21_2:
.loc 1 38 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB21_5;
.loc 1 38 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 38 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB21_4:
.loc 1 38 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 38 1
ld.global.f64 %fd1, [%rd6];
.loc 3 2745 10
cvt.rmi.f64.f64 %fd2, %fd1;
.loc 1 38 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 38 42
st.global.f64 [%rd8], %fd2;
.loc 1 38 22
add.s32 %r10, %r5, %r9;
.loc 1 38 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB21_4;
BB21_5:
.loc 1 38 22
add.s32 %r25, %r4, %r25;
.loc 1 38 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB21_2;
BB21_6:
.loc 1 38 2
ret;
}
.visible .entry map_j0_double(
.param .u32 map_j0_double_param_0,
.param .u32 map_j0_double_param_1,
.param .u64 map_j0_double_param_2,
.param .u32 map_j0_double_param_3,
.param .u64 map_j0_double_param_4,
.param .u32 map_j0_double_param_5
)
{
.local .align 4 .b8 __local_depot22[8];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<15>;
.reg .f32 %f<5>;
.reg .s32 %r<44>;
.reg .s64 %rd<20>;
.reg .f64 %fd<217>;
mov.u64 %SPL, __local_depot22;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r15, [map_j0_double_param_0];
ld.param.u32 %r16, [map_j0_double_param_1];
ld.param.u64 %rd1, [map_j0_double_param_2];
ld.param.u32 %r17, [map_j0_double_param_3];
ld.param.u64 %rd2, [map_j0_double_param_4];
ld.param.u32 %r18, [map_j0_double_param_5];
.loc 1 39 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r40, %r19, %r20, %r21;
.loc 1 39 1
setp.ge.s32 %p1, %r40, %r16;
@%p1 bra BB22_24;
.loc 1 39 1
mov.u32 %r22, %ntid.y;
.loc 1 39 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r2, %r23, %r22;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd17, %rd1;
BB22_2:
.loc 1 39 1
mov.u32 %r24, %ctaid.y;
mov.u32 %r26, %tid.y;
mad.lo.s32 %r41, %r22, %r24, %r26;
.loc 1 39 1
setp.ge.s32 %p2, %r41, %r15;
@%p2 bra BB22_23;
BB22_3:
.loc 1 39 1
mad.lo.s32 %r31, %r40, %r18, %r41;
mul.wide.s32 %rd4, %r31, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd25, [%rd5];
.loc 2 333 10
abs.f64 %fd1, %fd25;
setp.gtu.f64 %p3, %fd1, 0d400FB319F277BBE5;
@%p3 bra BB22_5;
add.f64 %fd26, %fd1, 0dC0033D152E971B40;
add.f64 %fd27, %fd26, 0d3CA0F539D7DA258E;
mov.f64 %fd28, 0dBCFCF8F9A8C294BC;
mov.f64 %fd29, 0dBCC0D18564C48C61;
.loc 2 333 10
fma.rn.f64 %fd30, %fd29, %fd27, %fd28;
mov.f64 %fd31, 0d3D3FAB983CAE498B;
.loc 2 333 10
fma.rn.f64 %fd32, %fd30, %fd27, %fd31;
mov.f64 %fd33, 0d3D7CD7C018579B88;
.loc 2 333 10
fma.rn.f64 %fd34, %fd32, %fd27, %fd33;
mov.f64 %fd35, 0dBDBBDD2342D64FDD;
.loc 2 333 10
fma.rn.f64 %fd36, %fd34, %fd27, %fd35;
mov.f64 %fd37, 0dBDF5C2D9416B1E2B;
.loc 2 333 10
fma.rn.f64 %fd38, %fd36, %fd27, %fd37;
mov.f64 %fd39, 0d3E32951D73174DD5;
.loc 2 333 10
fma.rn.f64 %fd40, %fd38, %fd27, %fd39;
mov.f64 %fd41, 0d3E67FF99802CAEB5;
.loc 2 333 10
fma.rn.f64 %fd42, %fd40, %fd27, %fd41;
mov.f64 %fd43, 0dBEA1CCE305C4C9F7;
.loc 2 333 10
fma.rn.f64 %fd44, %fd42, %fd27, %fd43;
mov.f64 %fd45, 0dBED232C77E29E1BB;
.loc 2 333 10
fma.rn.f64 %fd46, %fd44, %fd27, %fd45;
mov.f64 %fd47, 0d3F06ED3B9F0EF757;
.loc 2 333 10
fma.rn.f64 %fd48, %fd46, %fd27, %fd47;
mov.f64 %fd49, 0d3F315382BA096A62;
.loc 2 333 10
fma.rn.f64 %fd50, %fd48, %fd27, %fd49;
mov.f64 %fd51, 0dBF61F992590D1AE4;
.loc 2 333 10
fma.rn.f64 %fd52, %fd50, %fd27, %fd51;
mov.f64 %fd53, 0dBF81BB1CBE1A465F;
.loc 2 333 10
fma.rn.f64 %fd54, %fd52, %fd27, %fd53;
mov.f64 %fd55, 0d3FACFAE864368D84;
.loc 2 333 10
fma.rn.f64 %fd56, %fd54, %fd27, %fd55;
mov.f64 %fd57, 0d3FBBA1DEEA0294A3;
.loc 2 333 10
fma.rn.f64 %fd58, %fd56, %fd27, %fd57;
mov.f64 %fd59, 0dBFE09CDB36551280;
.loc 2 333 10
fma.rn.f64 %fd60, %fd58, %fd27, %fd59;
mul.f64 %fd216, %fd60, %fd27;
bra.uni BB22_22;
BB22_5:
.loc 2 333 10
setp.gtu.f64 %p4, %fd1, 0d401C58FD1A62F5EC;
@%p4 bra BB22_7;
add.f64 %fd61, %fd1, 0dC016148F5B2C2E45;
add.f64 %fd62, %fd61, 0dBC975054CD60A517;
mov.f64 %fd63, 0d3CF83FD1F333EB61;
mov.f64 %fd64, 0d3CBCB0A8F126B343;
.loc 2 333 10
fma.rn.f64 %fd65, %fd64, %fd62, %fd63;
mov.f64 %fd66, 0dBD4100E33E3FB413;
.loc 2 333 10
fma.rn.f64 %fd67, %fd65, %fd62, %fd66;
mov.f64 %fd68, 0dBD7846076D004627;
.loc 2 333 10
fma.rn.f64 %fd69, %fd67, %fd62, %fd68;
mov.f64 %fd70, 0d3DBE2F1D4F90720D;
.loc 2 333 10
fma.rn.f64 %fd71, %fd69, %fd62, %fd70;
mov.f64 %fd72, 0d3DF1D03B1E4A119B;
.loc 2 333 10
fma.rn.f64 %fd73, %fd71, %fd62, %fd72;
mov.f64 %fd74, 0dBE341D72B1B3BCE9;
.loc 2 333 10
fma.rn.f64 %fd75, %fd73, %fd62, %fd74;
mov.f64 %fd76, 0dBE62DA37CE2A9EF8;
.loc 2 333 10
fma.rn.f64 %fd77, %fd75, %fd62, %fd76;
mov.f64 %fd78, 0d3EA32E6D9974F763;
.loc 2 333 10
fma.rn.f64 %fd79, %fd77, %fd62, %fd78;
mov.f64 %fd80, 0d3ECAD77D744A1879;
.loc 2 333 10
fma.rn.f64 %fd81, %fd79, %fd62, %fd80;
mov.f64 %fd82, 0dBF0863F481A37337;
.loc 2 333 10
fma.rn.f64 %fd83, %fd81, %fd62, %fd82;
mov.f64 %fd84, 0dBF26F641F418F0F4;
.loc 2 333 10
fma.rn.f64 %fd85, %fd83, %fd62, %fd84;
mov.f64 %fd86, 0d3F627E31FE9A969E;
.loc 2 333 10
fma.rn.f64 %fd87, %fd85, %fd62, %fd86;
mov.f64 %fd88, 0d3F72F7FFE9025628;
.loc 2 333 10
fma.rn.f64 %fd89, %fd87, %fd62, %fd88;
mov.f64 %fd90, 0dBFAB2150CB41E8BF;
.loc 2 333 10
fma.rn.f64 %fd91, %fd89, %fd62, %fd90;
mov.f64 %fd92, 0dBF9F8F72E7A848DE;
.loc 2 333 10
fma.rn.f64 %fd93, %fd91, %fd62, %fd92;
mov.f64 %fd94, 0d3FD5C6E60A097823;
.loc 2 333 10
fma.rn.f64 %fd95, %fd93, %fd62, %fd94;
mul.f64 %fd216, %fd95, %fd62;
bra.uni BB22_22;
BB22_7:
.loc 2 333 10
setp.gtu.f64 %p5, %fd1, 0d402471FCB6A7A8C0;
@%p5 bra BB22_9;
add.f64 %fd96, %fd1, 0dC0214EB56CCCDECA;
add.f64 %fd97, %fd96, 0d3CB51970714C7C25;
mov.f64 %fd98, 0dBCF4B3A71AAAC629;
mov.f64 %fd99, 0dBCBDB7FFCF659E24;
.loc 2 333 10
fma.rn.f64 %fd100, %fd99, %fd97, %fd98;
mov.f64 %fd101, 0d3D417EC150ECDCE7;
.loc 2 333 10
fma.rn.f64 %fd102, %fd100, %fd97, %fd101;
mov.f64 %fd103, 0d3D7438F5EA1D10B2;
.loc 2 333 10
fma.rn.f64 %fd104, %fd102, %fd97, %fd103;
mov.f64 %fd105, 0dBDBEDAE7EC2C9E87;
.loc 2 333 10
fma.rn.f64 %fd106, %fd104, %fd97, %fd105;
mov.f64 %fd107, 0dBDECADD2C4B91F58;
.loc 2 333 10
fma.rn.f64 %fd108, %fd106, %fd97, %fd107;
mov.f64 %fd109, 0d3E34582C8EE12204;
.loc 2 333 10
fma.rn.f64 %fd110, %fd108, %fd97, %fd109;
mov.f64 %fd111, 0d3E5CEDA451DD20F8;
.loc 2 333 10
fma.rn.f64 %fd112, %fd110, %fd97, %fd111;
mov.f64 %fd113, 0dBEA30E8CC3165E2F;
.loc 2 333 10
fma.rn.f64 %fd114, %fd112, %fd97, %fd113;
mov.f64 %fd115, 0dBEC3324842BB1A2E;
.loc 2 333 10
fma.rn.f64 %fd116, %fd114, %fd97, %fd115;
mov.f64 %fd117, 0d3F07800BC54FBDDB;
.loc 2 333 10
fma.rn.f64 %fd118, %fd116, %fd97, %fd117;
mov.f64 %fd119, 0d3F1D79605276949A;
.loc 2 333 10
fma.rn.f64 %fd120, %fd118, %fd97, %fd119;
mov.f64 %fd121, 0dBF60E0D60385A629;
.loc 2 333 10
fma.rn.f64 %fd122, %fd120, %fd97, %fd121;
mov.f64 %fd123, 0dBF648E63600D82F3;
.loc 2 333 10
fma.rn.f64 %fd124, %fd122, %fd97, %fd123;
mov.f64 %fd125, 0d3FA68B984EC6493A;
.loc 2 333 10
fma.rn.f64 %fd126, %fd124, %fd97, %fd125;
mov.f64 %fd127, 0d3F900F7FCF183E0B;
.loc 2 333 10
fma.rn.f64 %fd128, %fd126, %fd97, %fd127;
mov.f64 %fd129, 0dBFD15F7977A772D4;
.loc 2 333 10
fma.rn.f64 %fd130, %fd128, %fd97, %fd129;
mul.f64 %fd216, %fd130, %fd97;
bra.uni BB22_22;
BB22_9:
.loc 2 333 10
abs.f64 %fd131, %fd1;
setp.neu.f64 %p6, %fd131, 0d7FF0000000000000;
@%p6 bra BB22_11;
mov.f64 %fd216, 0d0000000000000000;
bra.uni BB22_22;
BB22_11:
add.u64 %rd6, %SP, 4;
.loc 2 333 10
// inline asm
cvt.rn.f32.f64 %f1,%fd1;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd134,%f2;
// inline asm
neg.f64 %fd135, %fd1;
mov.f64 %fd136, 0d3FF0000000000000;
.loc 2 333 10
fma.rn.f64 %fd137, %fd135, %fd134, %fd136;
fma.rn.f64 %fd138, %fd137, %fd137, %fd137;
fma.rn.f64 %fd139, %fd138, %fd134, %fd134;
mul.f64 %fd140, %fd139, %fd139;
mov.f64 %fd141, 0d409927467A655012;
mov.f64 %fd142, 0dC0D115CB8C11A9DC;
.loc 2 333 10
fma.rn.f64 %fd143, %fd142, %fd140, %fd141;
mov.f64 %fd144, 0dC05751787E247BD4;
.loc 2 333 10
fma.rn.f64 %fd145, %fd143, %fd140, %fd144;
mov.f64 %fd146, 0d401704C4E5FC36B2;
.loc 2 333 10
fma.rn.f64 %fd147, %fd145, %fd140, %fd146;
mov.f64 %fd148, 0dBFE15B747A2FD531;
.loc 2 333 10
fma.rn.f64 %fd149, %fd147, %fd140, %fd148;
mov.f64 %fd150, 0d3FBA7FEACF6CB79B;
.loc 2 333 10
fma.rn.f64 %fd151, %fd149, %fd140, %fd150;
mov.f64 %fd152, 0dBFAFFFFFEDDCF548;
.loc 2 333 10
fma.rn.f64 %fd153, %fd151, %fd140, %fd152;
mov.f64 %fd154, 0d3FEFFFFFFFFFC9E5;
.loc 2 333 10
fma.rn.f64 %fd155, %fd153, %fd140, %fd154;
mov.f64 %fd156, 0d410ECD4523B12B84;
mov.f64 %fd157, 0dC14602FE1C34685E;
.loc 2 333 10
fma.rn.f64 %fd158, %fd157, %fd140, %fd156;
mov.f64 %fd159, 0dC0C7A2FC1972F05A;
.loc 2 333 10
fma.rn.f64 %fd160, %fd158, %fd140, %fd159;
mov.f64 %fd161, 0d407EBA131F7E5BEB;
.loc 2 333 10
fma.rn.f64 %fd162, %fd160, %fd140, %fd161;
mov.f64 %fd163, 0dC0373B92E6E7CC7D;
.loc 2 333 10
fma.rn.f64 %fd164, %fd162, %fd140, %fd163;
mov.f64 %fd165, 0d3FFA31BEE63A2F08;
.loc 2 333 10
fma.rn.f64 %fd166, %fd164, %fd140, %fd165;
mov.f64 %fd167, 0dBFCAD320104D5D05;
.loc 2 333 10
fma.rn.f64 %fd168, %fd166, %fd140, %fd167;
mov.f64 %fd169, 0d3FB0AAAA9C76D07E;
.loc 2 333 10
fma.rn.f64 %fd170, %fd168, %fd140, %fd169;
mov.f64 %fd171, 0dBFBFFFFFFFFDACEC;
.loc 2 333 10
fma.rn.f64 %fd172, %fd170, %fd140, %fd171;
fma.rn.f64 %fd5, %fd172, %fd139, %fd1;
rsqrt.approx.f64 %fd173, %fd1;
mul.f64 %fd174, %fd173, 0d3FE9884533D43651;
mul.f64 %fd6, %fd174, %fd155;
mul.f64 %fd175, %fd5, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r42, %fd175;
cvta.to.local.u64 %rd7, %rd6;
.loc 2 333 10
st.local.u32 [%rd7], %r42;
cvt.rn.f64.s32 %fd176, %r42;
neg.f64 %fd177, %fd176;
mov.f64 %fd178, 0d3FF921FB54442D18;
.loc 2 333 10
fma.rn.f64 %fd179, %fd177, %fd178, %fd5;
mov.f64 %fd180, 0d3C91A62633145C00;
.loc 2 333 10
fma.rn.f64 %fd181, %fd177, %fd180, %fd179;
mov.f64 %fd182, 0d397B839A252049C0;
.loc 2 333 10
fma.rn.f64 %fd212, %fd177, %fd182, %fd181;
abs.f64 %fd183, %fd5;
setp.leu.f64 %p7, %fd183, 0d41E0000000000000;
@%p7 bra BB22_13;
// Callseq Start 1
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd5;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 2 333 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd212, [retval0+0];
}
// Callseq End 1
ld.local.u32 %r42, [%rd7];
BB22_13:
and.b32 %r32, %r42, 3;
cvt.rn.f64.s32 %fd184, %r32;
add.f64 %fd185, %fd212, 0dBFE921FB54442D18;
fma.rn.f64 %fd213, %fd184, 0d3FF921FB54442D18, %fd185;
abs.f64 %fd186, %fd213;
setp.neu.f64 %p8, %fd186, 0d7FF0000000000000;
@%p8 bra BB22_15;
mov.f64 %fd187, 0d0000000000000000;
.loc 2 333 10
mul.rn.f64 %fd213, %fd213, %fd187;
BB22_15:
add.u64 %rd10, %SP, 0;
.loc 2 333 10
mul.f64 %fd188, %fd213, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r43, %fd188;
cvta.to.local.u64 %rd11, %rd10;
.loc 2 333 10
st.local.u32 [%rd11], %r43;
cvt.rn.f64.s32 %fd189, %r43;
neg.f64 %fd190, %fd189;
fma.rn.f64 %fd192, %fd190, %fd178, %fd213;
fma.rn.f64 %fd194, %fd190, %fd180, %fd192;
fma.rn.f64 %fd214, %fd190, %fd182, %fd194;
abs.f64 %fd196, %fd213;
setp.leu.f64 %p9, %fd196, 0d41E0000000000000;
@%p9 bra BB22_17;
// Callseq Start 2
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd213;
.param .b64 param1;
st.param.b64 [param1+0], %rd10;
.param .b64 retval0;
.loc 2 333 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd214, [retval0+0];
}
// Callseq End 2
ld.local.u32 %r43, [%rd11];
BB22_17:
add.s32 %r12, %r43, 1;
shl.b32 %r33, %r12, 3;
and.b32 %r34, %r33, 8;
and.b32 %r35, %r12, 1;
setp.eq.b32 %p10, %r35, 1;
not.pred %p11, %p10;
selp.f64 %fd197, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p11;
mul.wide.u32 %rd14, %r34, 8;
mov.u64 %rd15, __cudart_sin_cos_coeffs;
add.s64 %rd16, %rd15, %rd14;
.loc 2 333 10
ld.const.f64 %fd198, [%rd16+8];
mul.rn.f64 %fd16, %fd214, %fd214;
fma.rn.f64 %fd199, %fd197, %fd16, %fd198;
ld.const.f64 %fd200, [%rd16+16];
fma.rn.f64 %fd201, %fd199, %fd16, %fd200;
ld.const.f64 %fd202, [%rd16+24];
fma.rn.f64 %fd203, %fd201, %fd16, %fd202;
ld.const.f64 %fd204, [%rd16+32];
fma.rn.f64 %fd205, %fd203, %fd16, %fd204;
ld.const.f64 %fd206, [%rd16+40];
fma.rn.f64 %fd207, %fd205, %fd16, %fd206;
ld.const.f64 %fd208, [%rd16+48];
fma.rn.f64 %fd17, %fd207, %fd16, %fd208;
fma.rn.f64 %fd215, %fd17, %fd214, %fd214;
@%p11 bra BB22_19;
fma.rn.f64 %fd215, %fd17, %fd16, %fd136;
BB22_19:
and.b32 %r36, %r12, 2;
setp.eq.s32 %p12, %r36, 0;
@%p12 bra BB22_21;
mov.f64 %fd210, 0d0000000000000000;
mov.f64 %fd211, 0dBFF0000000000000;
.loc 2 333 10
fma.rn.f64 %fd215, %fd215, %fd211, %fd210;
BB22_21:
mul.f64 %fd216, %fd6, %fd215;
BB22_22:
.loc 1 39 42
mad.lo.s32 %r37, %r40, %r17, %r41;
mul.wide.s32 %rd18, %r37, 8;
add.s64 %rd19, %rd17, %rd18;
st.global.f64 [%rd19], %fd216;
.loc 1 39 22
add.s32 %r41, %r2, %r41;
.loc 1 39 1
setp.lt.s32 %p13, %r41, %r15;
@%p13 bra BB22_3;
BB22_23:
.loc 1 39 22
mov.u32 %r38, %nctaid.x;
mad.lo.s32 %r40, %r38, %r19, %r40;
.loc 1 39 1
setp.lt.s32 %p14, %r40, %r16;
@%p14 bra BB22_2;
BB22_24:
.loc 1 39 2
ret;
}
.visible .entry map_j1_double(
.param .u32 map_j1_double_param_0,
.param .u32 map_j1_double_param_1,
.param .u64 map_j1_double_param_2,
.param .u32 map_j1_double_param_3,
.param .u64 map_j1_double_param_4,
.param .u32 map_j1_double_param_5
)
{
.local .align 4 .b8 __local_depot23[8];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<17>;
.reg .f32 %f<5>;
.reg .s32 %r<43>;
.reg .s64 %rd<20>;
.reg .f64 %fd<215>;
mov.u64 %SPL, __local_depot23;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r14, [map_j1_double_param_0];
ld.param.u32 %r15, [map_j1_double_param_1];
ld.param.u64 %rd1, [map_j1_double_param_2];
ld.param.u32 %r16, [map_j1_double_param_3];
ld.param.u64 %rd2, [map_j1_double_param_4];
ld.param.u32 %r17, [map_j1_double_param_5];
.loc 1 40 1
mov.u32 %r18, %ntid.x;
mov.u32 %r19, %ctaid.x;
mov.u32 %r20, %tid.x;
mad.lo.s32 %r39, %r18, %r19, %r20;
.loc 1 40 1
setp.ge.s32 %p1, %r39, %r15;
@%p1 bra BB23_24;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd17, %rd1;
BB23_2:
.loc 1 40 1
mov.u32 %r21, %ctaid.y;
mov.u32 %r22, %ntid.y;
mov.u32 %r23, %tid.y;
mad.lo.s32 %r40, %r22, %r21, %r23;
.loc 1 40 1
setp.ge.s32 %p2, %r40, %r14;
@%p2 bra BB23_23;
BB23_3:
.loc 1 40 1
mad.lo.s32 %r28, %r39, %r17, %r40;
mul.wide.s32 %rd4, %r28, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd1, [%rd5];
.loc 2 338 10
abs.f64 %fd2, %fd1;
setp.gtu.f64 %p3, %fd2, 0d400353AABAD7B784;
@%p3 bra BB23_5;
mov.f64 %fd26, 0dBD4DD167A0DC3F55;
mov.f64 %fd27, 0d3D020E4ADCDE2AD3;
.loc 2 338 10
fma.rn.f64 %fd28, %fd27, %fd2, %fd26;
mov.f64 %fd29, 0d3D5503F5A491E487;
.loc 2 338 10
fma.rn.f64 %fd30, %fd28, %fd2, %fd29;
mov.f64 %fd31, 0d3DC1F29940C2403A;
.loc 2 338 10
fma.rn.f64 %fd32, %fd30, %fd2, %fd31;
mov.f64 %fd33, 0d3D84CF9302EACDEF;
.loc 2 338 10
fma.rn.f64 %fd34, %fd32, %fd2, %fd33;
mov.f64 %fd35, 0dBE384A53DBBCA436;
.loc 2 338 10
fma.rn.f64 %fd36, %fd34, %fd2, %fd35;
mov.f64 %fd37, 0d3D9779BEE4F63BCC;
.loc 2 338 10
fma.rn.f64 %fd38, %fd36, %fd2, %fd37;
mov.f64 %fd39, 0d3EA6C160E414F3F0;
.loc 2 338 10
fma.rn.f64 %fd40, %fd38, %fd2, %fd39;
mov.f64 %fd41, 0d3D8F3D2F12430699;
.loc 2 338 10
fma.rn.f64 %fd42, %fd40, %fd2, %fd41;
mov.f64 %fd43, 0dBF0C71C72C0CED04;
.loc 2 338 10
fma.rn.f64 %fd44, %fd42, %fd2, %fd43;
mov.f64 %fd45, 0d3D659BCA506F1128;
.loc 2 338 10
fma.rn.f64 %fd46, %fd44, %fd2, %fd45;
mov.f64 %fd47, 0d3F65555555506982;
.loc 2 338 10
fma.rn.f64 %fd48, %fd46, %fd2, %fd47;
mov.f64 %fd49, 0d3D15BA0B425F1BFB;
.loc 2 338 10
fma.rn.f64 %fd50, %fd48, %fd2, %fd49;
mov.f64 %fd51, 0dBFB0000000000065;
.loc 2 338 10
fma.rn.f64 %fd52, %fd50, %fd2, %fd51;
mov.f64 %fd53, 0d3C8729A7253FB679;
.loc 2 338 10
fma.rn.f64 %fd54, %fd52, %fd2, %fd53;
mov.f64 %fd55, 0d3FE0000000000000;
.loc 2 338 10
fma.rn.f64 %fd56, %fd54, %fd2, %fd55;
mul.f64 %fd214, %fd56, %fd2;
bra.uni BB23_22;
BB23_5:
.loc 2 338 10
setp.gtu.f64 %p4, %fd2, 0d4015B1D0574614EA;
@%p4 bra BB23_7;
add.f64 %fd57, %fd2, 0dC00EA75575AF6F09;
add.f64 %fd58, %fd57, 0d3CA60155A9D1B256;
mov.f64 %fd59, 0d3D41011A1DF02DAD;
mov.f64 %fd60, 0dBCF8D3CDBB60175E;
.loc 2 338 10
fma.rn.f64 %fd61, %fd60, %fd58, %fd59;
mov.f64 %fd62, 0d3D76013AC1E5E222;
.loc 2 338 10
fma.rn.f64 %fd63, %fd61, %fd58, %fd62;
mov.f64 %fd64, 0dBDBEC315D96D5F03;
.loc 2 338 10
fma.rn.f64 %fd65, %fd63, %fd58, %fd64;
mov.f64 %fd66, 0dBDF03BE1B4B57207;
.loc 2 338 10
fma.rn.f64 %fd67, %fd65, %fd58, %fd66;
mov.f64 %fd68, 0d3E345695F8B660F7;
.loc 2 338 10
fma.rn.f64 %fd69, %fd67, %fd58, %fd68;
mov.f64 %fd70, 0d3E617069FCFCFFF4;
.loc 2 338 10
fma.rn.f64 %fd71, %fd69, %fd58, %fd70;
mov.f64 %fd72, 0dBEA33825C36745EB;
.loc 2 338 10
fma.rn.f64 %fd73, %fd71, %fd58, %fd72;
mov.f64 %fd74, 0dBEC9799D4F90931B;
.loc 2 338 10
fma.rn.f64 %fd75, %fd73, %fd58, %fd74;
mov.f64 %fd76, 0d3F083A06E2F7DF13;
.loc 2 338 10
fma.rn.f64 %fd77, %fd75, %fd58, %fd76;
mov.f64 %fd78, 0d3F26E4C2D53A7CF6;
.loc 2 338 10
fma.rn.f64 %fd79, %fd77, %fd58, %fd78;
mov.f64 %fd80, 0dBF624B3409957B1C;
.loc 2 338 10
fma.rn.f64 %fd81, %fd79, %fd58, %fd80;
mov.f64 %fd82, 0dBF7537544C3325DF;
.loc 2 338 10
fma.rn.f64 %fd83, %fd81, %fd58, %fd82;
mov.f64 %fd84, 0d3FAB589D1DA138E2;
.loc 2 338 10
fma.rn.f64 %fd85, %fd83, %fd58, %fd84;
mov.f64 %fd86, 0d3FAAE8A39F51AD13;
.loc 2 338 10
fma.rn.f64 %fd87, %fd85, %fd58, %fd86;
mov.f64 %fd88, 0dBFD9C6CF582CBF7F;
.loc 2 338 10
fma.rn.f64 %fd89, %fd87, %fd58, %fd88;
mul.f64 %fd214, %fd89, %fd58;
bra.uni BB23_22;
BB23_7:
.loc 2 338 10
setp.gtu.f64 %p5, %fd2, 0d40213065E54C1AA9;
@%p5 bra BB23_9;
add.f64 %fd90, %fd2, 0dC01C0FF5F3B47250;
add.f64 %fd91, %fd90, 0d3C9B226D9D243827;
mov.f64 %fd92, 0dBD40E8363DB649A9;
mov.f64 %fd93, 0d3CF3EB867515FAD6;
.loc 2 338 10
fma.rn.f64 %fd94, %fd93, %fd91, %fd92;
mov.f64 %fd95, 0dBD73B7DD4A6608FB;
.loc 2 338 10
fma.rn.f64 %fd96, %fd94, %fd91, %fd95;
mov.f64 %fd97, 0d3DBEC5E01482C750;
.loc 2 338 10
fma.rn.f64 %fd98, %fd96, %fd91, %fd97;
mov.f64 %fd99, 0d3DEC62BB9E882103;
.loc 2 338 10
fma.rn.f64 %fd100, %fd98, %fd91, %fd99;
mov.f64 %fd101, 0dBE34462EED732A23;
.loc 2 338 10
fma.rn.f64 %fd102, %fd100, %fd91, %fd101;
mov.f64 %fd103, 0dBE5D48DCAD7DC59B;
.loc 2 338 10
fma.rn.f64 %fd104, %fd102, %fd91, %fd103;
mov.f64 %fd105, 0d3EA3026DF29167E9;
.loc 2 338 10
fma.rn.f64 %fd106, %fd104, %fd91, %fd105;
mov.f64 %fd107, 0d3EC4255B0119666C;
.loc 2 338 10
fma.rn.f64 %fd108, %fd106, %fd91, %fd107;
mov.f64 %fd109, 0dBF0796A751B32693;
.loc 2 338 10
fma.rn.f64 %fd110, %fd108, %fd91, %fd109;
mov.f64 %fd111, 0dBF207358BBDBA284;
.loc 2 338 10
fma.rn.f64 %fd112, %fd110, %fd91, %fd111;
mov.f64 %fd113, 0d3F613FBC7D6927B1;
.loc 2 338 10
fma.rn.f64 %fd114, %fd112, %fd91, %fd113;
mov.f64 %fd115, 0d3F69A4B292E3DD75;
.loc 2 338 10
fma.rn.f64 %fd116, %fd114, %fd91, %fd115;
mov.f64 %fd117, 0dBFA80C83BDEEE4FB;
.loc 2 338 10
fma.rn.f64 %fd118, %fd116, %fd91, %fd117;
mov.f64 %fd119, 0dBF95E70DC60362BF;
.loc 2 338 10
fma.rn.f64 %fd120, %fd118, %fd91, %fd119;
mov.f64 %fd121, 0d3FD33518B3874E8A;
.loc 2 338 10
fma.rn.f64 %fd122, %fd120, %fd91, %fd121;
mul.f64 %fd214, %fd122, %fd91;
bra.uni BB23_22;
BB23_9:
.loc 2 338 10
abs.f64 %fd123, %fd2;
setp.neu.f64 %p6, %fd123, 0d7FF0000000000000;
@%p6 bra BB23_11;
mov.f64 %fd214, 0d0000000000000000;
bra.uni BB23_22;
BB23_11:
add.u64 %rd6, %SP, 4;
.loc 2 338 10
// inline asm
cvt.rn.f32.f64 %f1,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd126,%f2;
// inline asm
neg.f64 %fd127, %fd2;
mov.f64 %fd128, 0d3FF0000000000000;
.loc 2 338 10
fma.rn.f64 %fd129, %fd127, %fd126, %fd128;
fma.rn.f64 %fd130, %fd129, %fd129, %fd129;
fma.rn.f64 %fd131, %fd130, %fd126, %fd126;
mul.f64 %fd132, %fd131, %fd131;
mov.f64 %fd133, 0dC099C06322A3F8BE;
mov.f64 %fd134, 0d40CD02EA3F2F6751;
.loc 2 338 10
fma.rn.f64 %fd135, %fd134, %fd132, %fd133;
mov.f64 %fd136, 0d405B89354DA77324;
.loc 2 338 10
fma.rn.f64 %fd137, %fd135, %fd132, %fd136;
mov.f64 %fd138, 0dC01E352294653188;
.loc 2 338 10
fma.rn.f64 %fd139, %fd137, %fd132, %fd138;
mov.f64 %fd140, 0d3FE9BC7DB16BD7A7;
.loc 2 338 10
fma.rn.f64 %fd141, %fd139, %fd132, %fd140;
mov.f64 %fd142, 0dBFC8BFE1C3A4F741;
.loc 2 338 10
fma.rn.f64 %fd143, %fd141, %fd132, %fd142;
mov.f64 %fd144, 0d3FC7FFFFF0D00BE2;
.loc 2 338 10
fma.rn.f64 %fd145, %fd143, %fd132, %fd144;
mov.f64 %fd146, 0d3FF00000000068CC;
.loc 2 338 10
fma.rn.f64 %fd147, %fd145, %fd132, %fd146;
mov.f64 %fd148, 0d415A30AC6857BEE0;
mov.f64 %fd149, 0dC18DA26B212FDC9A;
.loc 2 338 10
fma.rn.f64 %fd150, %fd149, %fd132, %fd148;
mov.f64 %fd151, 0dC11764222AD7C910;
.loc 2 338 10
fma.rn.f64 %fd152, %fd150, %fd132, %fd151;
mov.f64 %fd153, 0d40CEB02E0C306857;
.loc 2 338 10
fma.rn.f64 %fd154, %fd152, %fd132, %fd153;
mov.f64 %fd155, 0dC08351859FA2B23B;
.loc 2 338 10
fma.rn.f64 %fd156, %fd154, %fd132, %fd155;
mov.f64 %fd157, 0d403E65A07AF51F42;
.loc 2 338 10
fma.rn.f64 %fd158, %fd156, %fd132, %fd157;
mov.f64 %fd159, 0dC002F2B817F77A57;
.loc 2 338 10
fma.rn.f64 %fd160, %fd158, %fd132, %fd159;
mov.f64 %fd161, 0d3FD7BCC34DA069FD;
.loc 2 338 10
fma.rn.f64 %fd162, %fd160, %fd132, %fd161;
mov.f64 %fd163, 0dBFC4FFFFF8A44463;
.loc 2 338 10
fma.rn.f64 %fd164, %fd162, %fd132, %fd163;
mov.f64 %fd165, 0d3FD7FFFFFFFF5CD7;
.loc 2 338 10
fma.rn.f64 %fd166, %fd164, %fd132, %fd165;
fma.rn.f64 %fd6, %fd166, %fd131, %fd2;
rsqrt.approx.f64 %fd167, %fd2;
mul.f64 %fd168, %fd167, 0d3FE9884533D43651;
mul.f64 %fd7, %fd168, %fd147;
mul.f64 %fd169, %fd6, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r41, %fd169;
cvta.to.local.u64 %rd7, %rd6;
.loc 2 338 10
st.local.u32 [%rd7], %r41;
cvt.rn.f64.s32 %fd170, %r41;
neg.f64 %fd171, %fd170;
mov.f64 %fd172, 0d3FF921FB54442D18;
.loc 2 338 10
fma.rn.f64 %fd173, %fd171, %fd172, %fd6;
mov.f64 %fd174, 0d3C91A62633145C00;
.loc 2 338 10
fma.rn.f64 %fd175, %fd171, %fd174, %fd173;
mov.f64 %fd176, 0d397B839A252049C0;
.loc 2 338 10
fma.rn.f64 %fd210, %fd171, %fd176, %fd175;
abs.f64 %fd177, %fd6;
setp.leu.f64 %p7, %fd177, 0d41E0000000000000;
@%p7 bra BB23_13;
// Callseq Start 3
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd6;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 2 338 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd210, [retval0+0];
}
// Callseq End 3
ld.local.u32 %r41, [%rd7];
BB23_13:
and.b32 %r29, %r41, 3;
cvt.rn.f64.s32 %fd178, %r29;
add.f64 %fd179, %fd210, 0dC002D97C7F3321D2;
fma.rn.f64 %fd211, %fd178, 0d3FF921FB54442D18, %fd179;
abs.f64 %fd180, %fd211;
setp.neu.f64 %p8, %fd180, 0d7FF0000000000000;
@%p8 bra BB23_15;
mov.f64 %fd181, 0d0000000000000000;
.loc 2 338 10
mul.rn.f64 %fd211, %fd211, %fd181;
BB23_15:
add.u64 %rd10, %SP, 0;
.loc 2 338 10
mul.f64 %fd182, %fd211, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r42, %fd182;
cvta.to.local.u64 %rd11, %rd10;
.loc 2 338 10
st.local.u32 [%rd11], %r42;
cvt.rn.f64.s32 %fd183, %r42;
neg.f64 %fd184, %fd183;
fma.rn.f64 %fd186, %fd184, %fd172, %fd211;
fma.rn.f64 %fd188, %fd184, %fd174, %fd186;
fma.rn.f64 %fd212, %fd184, %fd176, %fd188;
abs.f64 %fd190, %fd211;
setp.leu.f64 %p9, %fd190, 0d41E0000000000000;
@%p9 bra BB23_17;
// Callseq Start 4
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd211;
.param .b64 param1;
st.param.b64 [param1+0], %rd10;
.param .b64 retval0;
.loc 2 338 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd212, [retval0+0];
}
// Callseq End 4
ld.local.u32 %r42, [%rd11];
BB23_17:
add.s32 %r11, %r42, 1;
shl.b32 %r30, %r11, 3;
and.b32 %r31, %r30, 8;
and.b32 %r32, %r11, 1;
setp.eq.b32 %p10, %r32, 1;
not.pred %p11, %p10;
selp.f64 %fd191, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p11;
mul.wide.u32 %rd14, %r31, 8;
mov.u64 %rd15, __cudart_sin_cos_coeffs;
add.s64 %rd16, %rd15, %rd14;
.loc 2 338 10
ld.const.f64 %fd192, [%rd16+8];
mul.rn.f64 %fd17, %fd212, %fd212;
fma.rn.f64 %fd193, %fd191, %fd17, %fd192;
ld.const.f64 %fd194, [%rd16+16];
fma.rn.f64 %fd195, %fd193, %fd17, %fd194;
ld.const.f64 %fd196, [%rd16+24];
fma.rn.f64 %fd197, %fd195, %fd17, %fd196;
ld.const.f64 %fd198, [%rd16+32];
fma.rn.f64 %fd199, %fd197, %fd17, %fd198;
ld.const.f64 %fd200, [%rd16+40];
fma.rn.f64 %fd201, %fd199, %fd17, %fd200;
ld.const.f64 %fd202, [%rd16+48];
fma.rn.f64 %fd18, %fd201, %fd17, %fd202;
fma.rn.f64 %fd213, %fd18, %fd212, %fd212;
@%p11 bra BB23_19;
fma.rn.f64 %fd213, %fd18, %fd17, %fd128;
BB23_19:
and.b32 %r33, %r11, 2;
setp.eq.s32 %p12, %r33, 0;
@%p12 bra BB23_21;
mov.f64 %fd204, 0d0000000000000000;
mov.f64 %fd205, 0dBFF0000000000000;
.loc 2 338 10
fma.rn.f64 %fd213, %fd213, %fd205, %fd204;
BB23_21:
mul.f64 %fd214, %fd7, %fd213;
BB23_22:
neg.f64 %fd206, %fd214;
setp.lt.f64 %p13, %fd1, 0d0000000000000000;
selp.f64 %fd207, %fd206, %fd214, %p13;
mul.f64 %fd208, %fd1, 0d3FE0000000000000;
setp.lt.f64 %p14, %fd2, 0d39B4484BFEEBC2A0;
selp.f64 %fd209, %fd208, %fd207, %p14;
.loc 1 40 42
mad.lo.s32 %r34, %r39, %r16, %r40;
mul.wide.s32 %rd18, %r34, 8;
add.s64 %rd19, %rd17, %rd18;
st.global.f64 [%rd19], %fd209;
.loc 1 40 22
mov.u32 %r36, %nctaid.y;
mad.lo.s32 %r40, %r36, %r22, %r40;
.loc 1 40 1
setp.lt.s32 %p15, %r40, %r14;
@%p15 bra BB23_3;
BB23_23:
.loc 1 40 22
mov.u32 %r37, %nctaid.x;
mad.lo.s32 %r39, %r37, %r18, %r39;
.loc 1 40 1
setp.lt.s32 %p16, %r39, %r15;
@%p16 bra BB23_2;
BB23_24:
.loc 1 40 2
ret;
}
.visible .entry map_lgamma_double(
.param .u32 map_lgamma_double_param_0,
.param .u32 map_lgamma_double_param_1,
.param .u64 map_lgamma_double_param_2,
.param .u32 map_lgamma_double_param_3,
.param .u64 map_lgamma_double_param_4,
.param .u32 map_lgamma_double_param_5
)
{
.reg .pred %p<49>;
.reg .f32 %f<21>;
.reg .s32 %r<130>;
.reg .s64 %rd<14>;
.reg .f64 %fd<452>;
ld.param.u32 %r49, [map_lgamma_double_param_0];
ld.param.u32 %r50, [map_lgamma_double_param_1];
ld.param.u64 %rd1, [map_lgamma_double_param_2];
ld.param.u32 %r51, [map_lgamma_double_param_3];
ld.param.u64 %rd2, [map_lgamma_double_param_4];
ld.param.u32 %r52, [map_lgamma_double_param_5];
.loc 1 41 1
mov.u32 %r53, %ntid.x;
mov.u32 %r54, %ctaid.x;
mov.u32 %r55, %tid.x;
mad.lo.s32 %r112, %r53, %r54, %r55;
.loc 1 41 1
setp.ge.s32 %p1, %r112, %r50;
@%p1 bra BB24_72;
.loc 1 41 1
mov.u32 %r56, %ntid.y;
.loc 1 41 22
mov.u32 %r57, %nctaid.y;
mul.lo.s32 %r2, %r57, %r56;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd11, %rd1;
BB24_2:
.loc 1 41 1
mov.u32 %r58, %ctaid.y;
mov.u32 %r60, %tid.y;
mad.lo.s32 %r113, %r56, %r58, %r60;
.loc 1 41 1
setp.ge.s32 %p2, %r113, %r49;
@%p2 bra BB24_71;
BB24_3:
.loc 1 41 1
mad.lo.s32 %r65, %r112, %r52, %r113;
mul.wide.s32 %rd4, %r65, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 1 41 1
ld.global.f64 %fd1, [%rd5];
.loc 2 423 10
abs.f64 %fd2, %fd1;
setp.gtu.f64 %p3, %fd2, 0d7FF0000000000000;
@%p3 bra BB24_69;
setp.ltu.f64 %p4, %fd2, 0d4008000000000000;
@%p4 bra BB24_20;
setp.ltu.f64 %p5, %fd2, 0d4020000000000000;
@%p5 bra BB24_19;
// inline asm
cvt.rn.f32.f64 %f1,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd52,%f2;
// inline asm
neg.f64 %fd53, %fd2;
mov.f64 %fd54, 0d3FF0000000000000;
.loc 2 423 10
fma.rn.f64 %fd55, %fd53, %fd52, %fd54;
fma.rn.f64 %fd56, %fd55, %fd55, %fd55;
fma.rn.f64 %fd57, %fd56, %fd52, %fd52;
mul.f64 %fd58, %fd57, %fd57;
mov.f64 %fd59, 0d3F4B68B992738FBF;
mov.f64 %fd60, 0dBF5AC321034783F9;
.loc 2 423 10
fma.rn.f64 %fd61, %fd60, %fd58, %fd59;
mov.f64 %fd62, 0dBF4380D01E4F7B8C;
.loc 2 423 10
fma.rn.f64 %fd63, %fd61, %fd58, %fd62;
mov.f64 %fd64, 0d3F4A019FA29F7264;
.loc 2 423 10
fma.rn.f64 %fd65, %fd63, %fd58, %fd64;
mov.f64 %fd66, 0dBF66C16C16B2ACEC;
.loc 2 423 10
fma.rn.f64 %fd67, %fd65, %fd58, %fd66;
mov.f64 %fd68, 0d3FB5555555555545;
.loc 2 423 10
fma.rn.f64 %fd69, %fd67, %fd58, %fd68;
mov.f64 %fd70, 0d3FED67F1C864BEAE;
.loc 2 423 10
fma.rn.f64 %fd3, %fd69, %fd57, %fd70;
{
.reg .b32 %temp;
mov.b64 {%temp, %r114}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r115, %temp}, %fd2;
}
setp.lt.f64 %p6, %fd2, 0d7FF0000000000000;
setp.gt.f64 %p7, %fd2, 0d0000000000000000;
and.pred %p8, %p7, %p6;
@%p8 bra BB24_12;
abs.f64 %fd71, %fd2;
setp.gtu.f64 %p9, %fd71, 0d7FF0000000000000;
@%p9 bra BB24_11;
setp.neu.f64 %p10, %fd2, 0d0000000000000000;
@%p10 bra BB24_10;
mov.f64 %fd445, 0dFFF0000000000000;
bra.uni BB24_18;
BB24_10:
.loc 2 423 10
setp.eq.f64 %p11, %fd2, 0d7FF0000000000000;
selp.f64 %fd445, %fd2, 0dFFF8000000000000, %p11;
bra.uni BB24_18;
BB24_11:
.loc 2 423 10
add.f64 %fd445, %fd2, %fd2;
bra.uni BB24_18;
BB24_12:
.loc 2 423 10
setp.lt.u32 %p12, %r114, 1048576;
@%p12 bra BB24_14;
mov.u32 %r116, -1023;
bra.uni BB24_15;
BB24_14:
.loc 2 423 10
mul.f64 %fd73, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r114}, %fd73;
}
{
.reg .b32 %temp;
mov.b64 {%r115, %temp}, %fd73;
}
mov.u32 %r116, -1077;
BB24_15:
.loc 2 423 10
shr.s32 %r68, %r114, 20;
add.s32 %r117, %r116, %r68;
and.b32 %r69, %r114, -2146435073;
or.b32 %r70, %r69, 1072693248;
mov.b64 %fd444, {%r115, %r70};
setp.lt.u32 %p13, %r70, 1073127583;
@%p13 bra BB24_17;
{
.reg .b32 %temp;
mov.b64 {%r71, %temp}, %fd444;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd444;
}
add.s32 %r73, %r72, -1048576;
mov.b64 %fd444, {%r71, %r73};
add.s32 %r117, %r117, 1;
BB24_17:
add.f64 %fd74, %fd444, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f5,%fd74;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd75,%f6;
// inline asm
neg.f64 %fd77, %fd74;
fma.rn.f64 %fd78, %fd77, %fd75, %fd54;
fma.rn.f64 %fd79, %fd78, %fd78, %fd78;
fma.rn.f64 %fd80, %fd79, %fd75, %fd75;
add.f64 %fd81, %fd444, 0dBFF0000000000000;
mul.f64 %fd82, %fd81, %fd80;
fma.rn.f64 %fd83, %fd81, %fd80, %fd82;
mul.f64 %fd84, %fd83, %fd83;
mov.f64 %fd85, 0d3ED0EE258B7A8B04;
mov.f64 %fd86, 0d3EB1380B3AE80F1E;
.loc 2 423 10
fma.rn.f64 %fd87, %fd86, %fd84, %fd85;
mov.f64 %fd88, 0d3EF3B2669F02676F;
.loc 2 423 10
fma.rn.f64 %fd89, %fd87, %fd84, %fd88;
mov.f64 %fd90, 0d3F1745CBA9AB0956;
.loc 2 423 10
fma.rn.f64 %fd91, %fd89, %fd84, %fd90;
mov.f64 %fd92, 0d3F3C71C72D1B5154;
.loc 2 423 10
fma.rn.f64 %fd93, %fd91, %fd84, %fd92;
mov.f64 %fd94, 0d3F624924923BE72D;
.loc 2 423 10
fma.rn.f64 %fd95, %fd93, %fd84, %fd94;
mov.f64 %fd96, 0d3F8999999999A3C4;
.loc 2 423 10
fma.rn.f64 %fd97, %fd95, %fd84, %fd96;
mov.f64 %fd98, 0d3FB5555555555554;
.loc 2 423 10
fma.rn.f64 %fd99, %fd97, %fd84, %fd98;
sub.f64 %fd100, %fd81, %fd83;
add.f64 %fd101, %fd100, %fd100;
neg.f64 %fd102, %fd83;
fma.rn.f64 %fd103, %fd102, %fd81, %fd101;
mul.f64 %fd104, %fd80, %fd103;
mul.f64 %fd105, %fd99, %fd84;
fma.rn.f64 %fd106, %fd105, %fd83, %fd104;
cvt.rn.f64.s32 %fd107, %r117;
mov.f64 %fd108, 0d3FE62E42FEFA39EF;
.loc 2 423 10
fma.rn.f64 %fd109, %fd107, %fd108, %fd83;
neg.s32 %r74, %r117;
cvt.rn.f64.s32 %fd110, %r74;
fma.rn.f64 %fd111, %fd110, %fd108, %fd109;
sub.f64 %fd112, %fd111, %fd83;
sub.f64 %fd113, %fd106, %fd112;
mov.f64 %fd114, 0d3C7ABC9E3B39803F;
.loc 2 423 10
fma.rn.f64 %fd115, %fd107, %fd114, %fd113;
add.f64 %fd445, %fd109, %fd115;
BB24_18:
{
.reg .b32 %temp;
mov.b64 {%temp, %r75}, %fd445;
}
add.s32 %r76, %r75, -1048576;
{
.reg .b32 %temp;
mov.b64 {%r77, %temp}, %fd445;
}
mov.b64 %fd116, {%r77, %r76};
add.f64 %fd117, %fd2, 0dBFE0000000000000;
fma.rn.f64 %fd118, %fd116, %fd117, %fd3;
fma.rn.f64 %fd120, %fd116, %fd117, %fd53;
add.f64 %fd121, %fd120, %fd118;
setp.eq.f64 %p14, %fd2, 0d7FF0000000000000;
selp.f64 %fd451, %fd2, %fd121, %p14;
bra.uni BB24_37;
BB24_19:
.loc 2 423 10
add.f64 %fd122, %fd2, 0dC008000000000000;
mov.f64 %fd123, 0dC1122B7730207EF3;
mov.f64 %fd124, 0dC0AF7040BB18FB05;
.loc 2 423 10
fma.rn.f64 %fd125, %fd124, %fd122, %fd123;
mov.f64 %fd126, 0dC1585A0DB81DE7D0;
.loc 2 423 10
fma.rn.f64 %fd127, %fd125, %fd122, %fd126;
mov.f64 %fd128, 0dC18A992B8BA94677;
.loc 2 423 10
fma.rn.f64 %fd129, %fd127, %fd122, %fd128;
mov.f64 %fd130, 0dC1AAC5CB6957CC20;
.loc 2 423 10
fma.rn.f64 %fd131, %fd129, %fd122, %fd130;
mov.f64 %fd132, 0dC1BC0E2B308774BE;
.loc 2 423 10
fma.rn.f64 %fd133, %fd131, %fd122, %fd132;
mov.f64 %fd134, 0dC1C6BA13DCAE7F67;
.loc 2 423 10
fma.rn.f64 %fd135, %fd133, %fd122, %fd134;
mov.f64 %fd136, 0dC1CCF33B9C3D120C;
.loc 2 423 10
fma.rn.f64 %fd137, %fd135, %fd122, %fd136;
add.f64 %fd138, %fd122, 0dC08FF62E0BE189FE;
mov.f64 %fd139, 0dC10074FACE10C93F;
.loc 2 423 10
fma.rn.f64 %fd140, %fd138, %fd122, %fd139;
mov.f64 %fd141, 0dC151B662F8D75791;
.loc 2 423 10
fma.rn.f64 %fd142, %fd140, %fd122, %fd141;
mov.f64 %fd143, 0dC18EE64AB4D207F7;
.loc 2 423 10
fma.rn.f64 %fd144, %fd142, %fd122, %fd143;
mov.f64 %fd145, 0dC1B9051687C9951A;
.loc 2 423 10
fma.rn.f64 %fd146, %fd144, %fd122, %fd145;
mov.f64 %fd147, 0dC1D2B866BF0B853D;
.loc 2 423 10
fma.rn.f64 %fd148, %fd146, %fd122, %fd147;
mov.f64 %fd149, 0dC1D4E2130E9DC133;
.loc 2 423 10
fma.rn.f64 %fd150, %fd148, %fd122, %fd149;
div.rn.f64 %fd151, %fd137, %fd150;
add.f64 %fd451, %fd151, %fd122;
bra.uni BB24_37;
BB24_20:
.loc 2 423 10
setp.ltu.f64 %p15, %fd2, 0d3FF8000000000000;
@%p15 bra BB24_22;
add.f64 %fd152, %fd2, 0dC000000000000000;
mov.f64 %fd153, 0dBE71FA71D78C0EE2;
mov.f64 %fd154, 0d3E452636124338B3;
.loc 2 423 10
fma.rn.f64 %fd155, %fd154, %fd152, %fd153;
mov.f64 %fd156, 0d3E8D111F31E61306;
.loc 2 423 10
fma.rn.f64 %fd157, %fd155, %fd152, %fd156;
mov.f64 %fd158, 0dBEA0502BBE1B2706;
.loc 2 423 10
fma.rn.f64 %fd159, %fd157, %fd152, %fd158;
mov.f64 %fd160, 0d3EB06850B2970292;
.loc 2 423 10
fma.rn.f64 %fd161, %fd159, %fd152, %fd160;
mov.f64 %fd162, 0dBEC108474875033D;
.loc 2 423 10
fma.rn.f64 %fd163, %fd161, %fd152, %fd162;
mov.f64 %fd164, 0d3ED24ACCC62909DC;
.loc 2 423 10
fma.rn.f64 %fd165, %fd163, %fd152, %fd164;
mov.f64 %fd166, 0dBEE3CB25209E63BE;
.loc 2 423 10
fma.rn.f64 %fd167, %fd165, %fd152, %fd166;
mov.f64 %fd168, 0d3EF581CBBC8CDC7B;
.loc 2 423 10
fma.rn.f64 %fd169, %fd167, %fd152, %fd168;
mov.f64 %fd170, 0dBF078E04B85C7597;
.loc 2 423 10
fma.rn.f64 %fd171, %fd169, %fd152, %fd170;
mov.f64 %fd172, 0d3F1A12730CF45051;
.loc 2 423 10
fma.rn.f64 %fd173, %fd171, %fd152, %fd172;
mov.f64 %fd174, 0dBF2D3FD354062012;
.loc 2 423 10
fma.rn.f64 %fd175, %fd173, %fd152, %fd174;
mov.f64 %fd176, 0d3F40B36B0B4DE323;
.loc 2 423 10
fma.rn.f64 %fd177, %fd175, %fd152, %fd176;
mov.f64 %fd178, 0dBF538AC5C6D0317A;
.loc 2 423 10
fma.rn.f64 %fd179, %fd177, %fd152, %fd178;
mov.f64 %fd180, 0d3F67ADD6EAAB19FC;
.loc 2 423 10
fma.rn.f64 %fd181, %fd179, %fd152, %fd180;
mov.f64 %fd182, 0dBF7E404FC20E4D5B;
.loc 2 423 10
fma.rn.f64 %fd183, %fd181, %fd152, %fd182;
mov.f64 %fd184, 0d3F951322AC7DA390;
.loc 2 423 10
fma.rn.f64 %fd185, %fd183, %fd152, %fd184;
mov.f64 %fd186, 0dBFB13E001A5578A3;
.loc 2 423 10
fma.rn.f64 %fd187, %fd185, %fd152, %fd186;
mov.f64 %fd188, 0d3FD4A34CC4A60FA3;
.loc 2 423 10
fma.rn.f64 %fd189, %fd187, %fd152, %fd188;
mov.f64 %fd190, 0d3FDB0EE6072093CF;
.loc 2 423 10
fma.rn.f64 %fd191, %fd189, %fd152, %fd190;
mul.f64 %fd451, %fd191, %fd152;
bra.uni BB24_37;
BB24_22:
.loc 2 423 10
setp.ltu.f64 %p16, %fd2, 0d3FE6666666666666;
@%p16 bra BB24_24;
mov.f64 %fd192, 0d3FF0000000000000;
.loc 2 423 10
sub.f64 %fd193, %fd192, %fd2;
mov.f64 %fd194, 0d3FA3EB504359EB88;
mov.f64 %fd195, 0d3F881F6D2A4C4310;
.loc 2 423 10
fma.rn.f64 %fd196, %fd195, %fd193, %fd194;
mov.f64 %fd197, 0d3FAE35D8DEB06317;
.loc 2 423 10
fma.rn.f64 %fd198, %fd196, %fd193, %fd197;
mov.f64 %fd199, 0d3FAED469A8B6ECCE;
.loc 2 423 10
fma.rn.f64 %fd200, %fd198, %fd193, %fd199;
mov.f64 %fd201, 0d3FACC1B1C357BEFE;
.loc 2 423 10
fma.rn.f64 %fd202, %fd200, %fd193, %fd201;
mov.f64 %fd203, 0d3FAD7154DB67F79F;
.loc 2 423 10
fma.rn.f64 %fd204, %fd202, %fd193, %fd203;
mov.f64 %fd205, 0d3FAFCC622CF2F7BB;
.loc 2 423 10
fma.rn.f64 %fd206, %fd204, %fd193, %fd205;
mov.f64 %fd207, 0d3FB11747A4D1CC43;
.loc 2 423 10
fma.rn.f64 %fd208, %fd206, %fd193, %fd207;
mov.f64 %fd209, 0d3FB24CE16A21B8AC;
.loc 2 423 10
fma.rn.f64 %fd210, %fd208, %fd193, %fd209;
mov.f64 %fd211, 0d3FB3B1C21A7BCB00;
.loc 2 423 10
fma.rn.f64 %fd212, %fd210, %fd193, %fd211;
mov.f64 %fd213, 0d3FB556723452ED57;
.loc 2 423 10
fma.rn.f64 %fd214, %fd212, %fd193, %fd213;
mov.f64 %fd215, 0d3FB748C00891544F;
.loc 2 423 10
fma.rn.f64 %fd216, %fd214, %fd193, %fd215;
mov.f64 %fd217, 0d3FB9A0207808CF40;
.loc 2 423 10
fma.rn.f64 %fd218, %fd216, %fd193, %fd217;
mov.f64 %fd219, 0d3FBC80673B8AE26B;
.loc 2 423 10
fma.rn.f64 %fd220, %fd218, %fd193, %fd219;
mov.f64 %fd221, 0d3FC010B364B7E555;
.loc 2 423 10
fma.rn.f64 %fd222, %fd220, %fd193, %fd221;
mov.f64 %fd223, 0d3FC2703A1D239658;
.loc 2 423 10
fma.rn.f64 %fd224, %fd222, %fd193, %fd223;
mov.f64 %fd225, 0d3FC5B40CB1137E6E;
.loc 2 423 10
fma.rn.f64 %fd226, %fd224, %fd193, %fd225;
mov.f64 %fd227, 0d3FCA8B9C17AC4F03;
.loc 2 423 10
fma.rn.f64 %fd228, %fd226, %fd193, %fd227;
mov.f64 %fd229, 0d3FD151322AC7CB52;
.loc 2 423 10
fma.rn.f64 %fd230, %fd228, %fd193, %fd229;
mov.f64 %fd231, 0d3FD9A4D55BEAB1D4;
.loc 2 423 10
fma.rn.f64 %fd232, %fd230, %fd193, %fd231;
mov.f64 %fd233, 0d3FEA51A6625307D6;
.loc 2 423 10
fma.rn.f64 %fd234, %fd232, %fd193, %fd233;
mov.f64 %fd235, 0d3FE2788CFC6FB619;
.loc 2 423 10
fma.rn.f64 %fd236, %fd234, %fd193, %fd235;
mul.f64 %fd451, %fd236, %fd193;
bra.uni BB24_37;
BB24_24:
mov.f64 %fd237, 0d3EA7B77CEB0625E8;
mov.f64 %fd238, 0dBE7844988BFE6590;
.loc 2 423 10
fma.rn.f64 %fd239, %fd238, %fd2, %fd237;
mov.f64 %fd240, 0dBE998C69C8710CC4;
.loc 2 423 10
fma.rn.f64 %fd241, %fd239, %fd2, %fd240;
mov.f64 %fd242, 0dBEF6527A5A11CF6E;
.loc 2 423 10
fma.rn.f64 %fd243, %fd241, %fd2, %fd242;
mov.f64 %fd244, 0d3F20EC2950B1B5DE;
.loc 2 423 10
fma.rn.f64 %fd245, %fd243, %fd2, %fd244;
mov.f64 %fd246, 0dBF2C4D80C24BA278;
.loc 2 423 10
fma.rn.f64 %fd247, %fd245, %fd2, %fd246;
mov.f64 %fd248, 0dBF5315B4E8CC0D09;
.loc 2 423 10
fma.rn.f64 %fd249, %fd247, %fd2, %fd248;
mov.f64 %fd250, 0d3F7D917F15D50020;
.loc 2 423 10
fma.rn.f64 %fd251, %fd249, %fd2, %fd250;
mov.f64 %fd252, 0dBF83B4ABB41CB6FA;
.loc 2 423 10
fma.rn.f64 %fd253, %fd251, %fd2, %fd252;
mov.f64 %fd254, 0dBFA59AF1275B7120;
.loc 2 423 10
fma.rn.f64 %fd255, %fd253, %fd2, %fd254;
mov.f64 %fd256, 0d3FC5512321A168A0;
.loc 2 423 10
fma.rn.f64 %fd257, %fd255, %fd2, %fd256;
mov.f64 %fd258, 0dBFA5815E8FDCE74C;
.loc 2 423 10
fma.rn.f64 %fd259, %fd257, %fd2, %fd258;
mov.f64 %fd260, 0dBFE4FCF4026ADD1A;
.loc 2 423 10
fma.rn.f64 %fd261, %fd259, %fd2, %fd260;
mov.f64 %fd262, 0d3FE2788CFC6FB5C8;
.loc 2 423 10
fma.rn.f64 %fd263, %fd261, %fd2, %fd262;
mul.f64 %fd264, %fd263, %fd2;
fma.rn.f64 %fd15, %fd264, %fd2, %fd2;
{
.reg .b32 %temp;
mov.b64 {%temp, %r118}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%r119, %temp}, %fd15;
}
setp.gt.f64 %p17, %fd15, 0d0000000000000000;
setp.lt.f64 %p18, %fd15, 0d7FF0000000000000;
and.pred %p19, %p17, %p18;
@%p19 bra BB24_30;
abs.f64 %fd265, %fd15;
setp.gtu.f64 %p20, %fd265, 0d7FF0000000000000;
@%p20 bra BB24_29;
setp.neu.f64 %p21, %fd15, 0d0000000000000000;
@%p21 bra BB24_28;
mov.f64 %fd447, 0dFFF0000000000000;
bra.uni BB24_36;
BB24_28:
.loc 2 423 10
setp.eq.f64 %p22, %fd15, 0d7FF0000000000000;
selp.f64 %fd447, %fd15, 0dFFF8000000000000, %p22;
bra.uni BB24_36;
BB24_29:
.loc 2 423 10
add.f64 %fd447, %fd15, %fd15;
bra.uni BB24_36;
BB24_30:
.loc 2 423 10
setp.lt.u32 %p23, %r118, 1048576;
@%p23 bra BB24_32;
mov.u32 %r120, -1023;
bra.uni BB24_33;
BB24_32:
.loc 2 423 10
mul.f64 %fd267, %fd15, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r118}, %fd267;
}
{
.reg .b32 %temp;
mov.b64 {%r119, %temp}, %fd267;
}
mov.u32 %r120, -1077;
BB24_33:
.loc 2 423 10
shr.s32 %r80, %r118, 20;
add.s32 %r121, %r120, %r80;
and.b32 %r81, %r118, -2146435073;
or.b32 %r82, %r81, 1072693248;
mov.b64 %fd446, {%r119, %r82};
setp.lt.u32 %p24, %r82, 1073127583;
@%p24 bra BB24_35;
{
.reg .b32 %temp;
mov.b64 {%r83, %temp}, %fd446;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r84}, %fd446;
}
add.s32 %r85, %r84, -1048576;
mov.b64 %fd446, {%r83, %r85};
add.s32 %r121, %r121, 1;
BB24_35:
add.f64 %fd268, %fd446, 0d3FF0000000000000;
mov.f64 %fd270, 0d3FF0000000000000;
.loc 2 423 10
// inline asm
cvt.rn.f32.f64 %f9,%fd268;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd269,%f10;
// inline asm
neg.f64 %fd271, %fd268;
fma.rn.f64 %fd272, %fd271, %fd269, %fd270;
fma.rn.f64 %fd273, %fd272, %fd272, %fd272;
fma.rn.f64 %fd274, %fd273, %fd269, %fd269;
add.f64 %fd275, %fd446, 0dBFF0000000000000;
mul.f64 %fd276, %fd275, %fd274;
fma.rn.f64 %fd277, %fd275, %fd274, %fd276;
mul.f64 %fd278, %fd277, %fd277;
mov.f64 %fd279, 0d3ED0EE258B7A8B04;
mov.f64 %fd280, 0d3EB1380B3AE80F1E;
.loc 2 423 10
fma.rn.f64 %fd281, %fd280, %fd278, %fd279;
mov.f64 %fd282, 0d3EF3B2669F02676F;
.loc 2 423 10
fma.rn.f64 %fd283, %fd281, %fd278, %fd282;
mov.f64 %fd284, 0d3F1745CBA9AB0956;
.loc 2 423 10
fma.rn.f64 %fd285, %fd283, %fd278, %fd284;
mov.f64 %fd286, 0d3F3C71C72D1B5154;
.loc 2 423 10
fma.rn.f64 %fd287, %fd285, %fd278, %fd286;
mov.f64 %fd288, 0d3F624924923BE72D;
.loc 2 423 10
fma.rn.f64 %fd289, %fd287, %fd278, %fd288;
mov.f64 %fd290, 0d3F8999999999A3C4;
.loc 2 423 10
fma.rn.f64 %fd291, %fd289, %fd278, %fd290;
mov.f64 %fd292, 0d3FB5555555555554;
.loc 2 423 10
fma.rn.f64 %fd293, %fd291, %fd278, %fd292;
sub.f64 %fd294, %fd275, %fd277;
add.f64 %fd295, %fd294, %fd294;
neg.f64 %fd296, %fd277;
fma.rn.f64 %fd297, %fd296, %fd275, %fd295;
mul.f64 %fd298, %fd274, %fd297;
mul.f64 %fd299, %fd293, %fd278;
fma.rn.f64 %fd300, %fd299, %fd277, %fd298;
cvt.rn.f64.s32 %fd301, %r121;
mov.f64 %fd302, 0d3FE62E42FEFA39EF;
.loc 2 423 10
fma.rn.f64 %fd303, %fd301, %fd302, %fd277;
neg.s32 %r86, %r121;
cvt.rn.f64.s32 %fd304, %r86;
fma.rn.f64 %fd305, %fd304, %fd302, %fd303;
sub.f64 %fd306, %fd305, %fd277;
sub.f64 %fd307, %fd300, %fd306;
mov.f64 %fd308, 0d3C7ABC9E3B39803F;
.loc 2 423 10
fma.rn.f64 %fd309, %fd301, %fd308, %fd307;
add.f64 %fd447, %fd303, %fd309;
BB24_36:
neg.f64 %fd451, %fd447;
BB24_37:
setp.ge.f64 %p25, %fd1, 0d0000000000000000;
@%p25 bra BB24_70;
cvt.rzi.f64.f64 %fd310, %fd2;
setp.neu.f64 %p26, %fd2, %fd310;
@%p26 bra BB24_40;
mov.f64 %fd451, 0d7FF0000000000000;
bra.uni BB24_70;
BB24_40:
.loc 2 423 10
setp.lt.f64 %p27, %fd2, 0d3BFD83C94FB6D2AC;
{
.reg .b32 %temp;
mov.b64 {%temp, %r126}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r127, %temp}, %fd2;
}
@%p27 bra BB24_57;
add.s32 %r87, %r126, 1048576;
mov.b64 %fd311, {%r127, %r87};
cvt.rni.f64.f64 %fd312, %fd311;
cvt.rzi.s64.f64 %rd6, %fd312;
cvt.u32.u64 %r28, %rd6;
neg.f64 %fd313, %fd312;
mov.f64 %fd314, 0d3FE0000000000000;
.loc 2 423 10
fma.rn.f64 %fd315, %fd313, %fd314, %fd2;
mul.f64 %fd316, %fd315, 0d3CA1A62633145C07;
mov.f64 %fd317, 0d400921FB54442D18;
.loc 2 423 10
fma.rn.f64 %fd318, %fd315, %fd317, %fd316;
shl.b32 %r88, %r28, 3;
and.b32 %r89, %r88, 8;
mul.rn.f64 %fd25, %fd318, %fd318;
and.b64 %rd7, %rd6, 1;
setp.eq.b64 %p28, %rd7, 1;
not.pred %p29, %p28;
selp.f64 %fd319, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p29;
mul.wide.u32 %rd8, %r89, 8;
mov.u64 %rd9, __cudart_sin_cos_coeffs;
add.s64 %rd10, %rd9, %rd8;
.loc 2 423 10
ld.const.f64 %fd320, [%rd10+8];
fma.rn.f64 %fd321, %fd319, %fd25, %fd320;
ld.const.f64 %fd322, [%rd10+16];
fma.rn.f64 %fd323, %fd321, %fd25, %fd322;
ld.const.f64 %fd324, [%rd10+24];
fma.rn.f64 %fd325, %fd323, %fd25, %fd324;
ld.const.f64 %fd326, [%rd10+32];
fma.rn.f64 %fd327, %fd325, %fd25, %fd326;
ld.const.f64 %fd328, [%rd10+40];
fma.rn.f64 %fd329, %fd327, %fd25, %fd328;
ld.const.f64 %fd330, [%rd10+48];
fma.rn.f64 %fd26, %fd329, %fd25, %fd330;
fma.rn.f64 %fd448, %fd26, %fd318, %fd318;
@%p29 bra BB24_43;
mov.f64 %fd331, 0d3FF0000000000000;
.loc 2 423 10
fma.rn.f64 %fd448, %fd26, %fd25, %fd331;
BB24_43:
and.b32 %r90, %r28, 2;
setp.eq.s32 %p30, %r90, 0;
@%p30 bra BB24_45;
mov.f64 %fd332, 0d0000000000000000;
mov.f64 %fd333, 0dBFF0000000000000;
.loc 2 423 10
fma.rn.f64 %fd448, %fd448, %fd333, %fd332;
BB24_45:
abs.f64 %fd334, %fd448;
mul.f64 %fd335, %fd334, %fd2;
div.rn.f64 %fd32, %fd317, %fd335;
{
.reg .b32 %temp;
mov.b64 {%temp, %r122}, %fd32;
}
{
.reg .b32 %temp;
mov.b64 {%r123, %temp}, %fd32;
}
setp.gt.f64 %p31, %fd32, 0d0000000000000000;
setp.lt.f64 %p32, %fd32, 0d7FF0000000000000;
and.pred %p33, %p31, %p32;
@%p33 bra BB24_51;
abs.f64 %fd337, %fd32;
setp.gtu.f64 %p34, %fd337, 0d7FF0000000000000;
@%p34 bra BB24_50;
setp.neu.f64 %p35, %fd32, 0d0000000000000000;
@%p35 bra BB24_49;
mov.f64 %fd338, 0dFFF0000000000000;
.loc 2 423 10
sub.f64 %fd451, %fd338, %fd451;
bra.uni BB24_70;
BB24_49:
.loc 2 423 10
setp.eq.f64 %p36, %fd32, 0d7FF0000000000000;
selp.f64 %fd33, %fd32, 0dFFF8000000000000, %p36;
sub.f64 %fd451, %fd33, %fd451;
bra.uni BB24_70;
BB24_50:
.loc 2 423 10
add.f64 %fd34, %fd32, %fd32;
sub.f64 %fd451, %fd34, %fd451;
bra.uni BB24_70;
BB24_51:
.loc 2 423 10
setp.lt.u32 %p37, %r122, 1048576;
@%p37 bra BB24_53;
mov.u32 %r124, -1023;
bra.uni BB24_54;
BB24_53:
.loc 2 423 10
mul.f64 %fd339, %fd32, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r122}, %fd339;
}
{
.reg .b32 %temp;
mov.b64 {%r123, %temp}, %fd339;
}
mov.u32 %r124, -1077;
BB24_54:
.loc 2 423 10
shr.s32 %r93, %r122, 20;
add.s32 %r125, %r124, %r93;
and.b32 %r94, %r122, -2146435073;
or.b32 %r95, %r94, 1072693248;
mov.b64 %fd449, {%r123, %r95};
setp.lt.u32 %p38, %r95, 1073127583;
@%p38 bra BB24_56;
{
.reg .b32 %temp;
mov.b64 {%r96, %temp}, %fd449;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r97}, %fd449;
}
add.s32 %r98, %r97, -1048576;
mov.b64 %fd449, {%r96, %r98};
add.s32 %r125, %r125, 1;
BB24_56:
add.f64 %fd340, %fd449, 0d3FF0000000000000;
mov.f64 %fd342, 0d3FF0000000000000;
.loc 2 423 10
// inline asm
cvt.rn.f32.f64 %f13,%fd340;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f14,%f13;
// inline asm
// inline asm
cvt.f64.f32 %fd341,%f14;
// inline asm
neg.f64 %fd343, %fd340;
fma.rn.f64 %fd344, %fd343, %fd341, %fd342;
fma.rn.f64 %fd345, %fd344, %fd344, %fd344;
fma.rn.f64 %fd346, %fd345, %fd341, %fd341;
add.f64 %fd347, %fd449, 0dBFF0000000000000;
mul.f64 %fd348, %fd347, %fd346;
fma.rn.f64 %fd349, %fd347, %fd346, %fd348;
mul.f64 %fd350, %fd349, %fd349;
mov.f64 %fd351, 0d3ED0EE258B7A8B04;
mov.f64 %fd352, 0d3EB1380B3AE80F1E;
.loc 2 423 10
fma.rn.f64 %fd353, %fd352, %fd350, %fd351;
mov.f64 %fd354, 0d3EF3B2669F02676F;
.loc 2 423 10
fma.rn.f64 %fd355, %fd353, %fd350, %fd354;
mov.f64 %fd356, 0d3F1745CBA9AB0956;
.loc 2 423 10
fma.rn.f64 %fd357, %fd355, %fd350, %fd356;
mov.f64 %fd358, 0d3F3C71C72D1B5154;
.loc 2 423 10
fma.rn.f64 %fd359, %fd357, %fd350, %fd358;
mov.f64 %fd360, 0d3F624924923BE72D;
.loc 2 423 10
fma.rn.f64 %fd361, %fd359, %fd350, %fd360;
mov.f64 %fd362, 0d3F8999999999A3C4;
.loc 2 423 10
fma.rn.f64 %fd363, %fd361, %fd350, %fd362;
mov.f64 %fd364, 0d3FB5555555555554;
.loc 2 423 10
fma.rn.f64 %fd365, %fd363, %fd350, %fd364;
sub.f64 %fd366, %fd347, %fd349;
add.f64 %fd367, %fd366, %fd366;
neg.f64 %fd368, %fd349;
fma.rn.f64 %fd369, %fd368, %fd347, %fd367;
mul.f64 %fd370, %fd346, %fd369;
mul.f64 %fd371, %fd365, %fd350;
fma.rn.f64 %fd372, %fd371, %fd349, %fd370;
cvt.rn.f64.s32 %fd373, %r125;
mov.f64 %fd374, 0d3FE62E42FEFA39EF;
.loc 2 423 10
fma.rn.f64 %fd375, %fd373, %fd374, %fd349;
neg.s32 %r99, %r125;
cvt.rn.f64.s32 %fd376, %r99;
fma.rn.f64 %fd377, %fd376, %fd374, %fd375;
sub.f64 %fd378, %fd377, %fd349;
sub.f64 %fd379, %fd372, %fd378;
mov.f64 %fd380, 0d3C7ABC9E3B39803F;
.loc 2 423 10
fma.rn.f64 %fd381, %fd373, %fd380, %fd379;
add.f64 %fd38, %fd375, %fd381;
sub.f64 %fd451, %fd38, %fd451;
bra.uni BB24_70;
BB24_57:
.loc 2 423 10
setp.gt.f64 %p39, %fd2, 0d0000000000000000;
setp.lt.f64 %p40, %fd2, 0d7FF0000000000000;
and.pred %p41, %p39, %p40;
@%p41 bra BB24_63;
abs.f64 %fd382, %fd2;
setp.gtu.f64 %p42, %fd382, 0d7FF0000000000000;
@%p42 bra BB24_62;
setp.neu.f64 %p43, %fd2, 0d0000000000000000;
@%p43 bra BB24_61;
mov.f64 %fd383, 0dFFF0000000000000;
.loc 2 423 10
neg.f64 %fd451, %fd383;
bra.uni BB24_70;
BB24_61:
.loc 2 423 10
setp.eq.f64 %p44, %fd2, 0d7FF0000000000000;
selp.f64 %fd41, %fd2, 0dFFF8000000000000, %p44;
neg.f64 %fd451, %fd41;
bra.uni BB24_70;
BB24_62:
.loc 2 423 10
add.f64 %fd42, %fd2, %fd2;
neg.f64 %fd451, %fd42;
bra.uni BB24_70;
BB24_63:
.loc 2 423 10
setp.lt.u32 %p45, %r126, 1048576;
@%p45 bra BB24_65;
mov.u32 %r128, -1023;
bra.uni BB24_66;
BB24_65:
.loc 2 423 10
mul.f64 %fd384, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r126}, %fd384;
}
{
.reg .b32 %temp;
mov.b64 {%r127, %temp}, %fd384;
}
mov.u32 %r128, -1077;
BB24_66:
.loc 2 423 10
shr.s32 %r102, %r126, 20;
add.s32 %r129, %r128, %r102;
and.b32 %r103, %r126, -2146435073;
or.b32 %r104, %r103, 1072693248;
mov.b64 %fd450, {%r127, %r104};
setp.lt.u32 %p46, %r104, 1073127583;
@%p46 bra BB24_68;
{
.reg .b32 %temp;
mov.b64 {%r105, %temp}, %fd450;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r106}, %fd450;
}
add.s32 %r107, %r106, -1048576;
mov.b64 %fd450, {%r105, %r107};
add.s32 %r129, %r129, 1;
BB24_68:
add.f64 %fd385, %fd450, 0d3FF0000000000000;
mov.f64 %fd387, 0d3FF0000000000000;
.loc 2 423 10
// inline asm
cvt.rn.f32.f64 %f17,%fd385;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f18,%f17;
// inline asm
// inline asm
cvt.f64.f32 %fd386,%f18;
// inline asm
neg.f64 %fd388, %fd385;
fma.rn.f64 %fd389, %fd388, %fd386, %fd387;
fma.rn.f64 %fd390, %fd389, %fd389, %fd389;
fma.rn.f64 %fd391, %fd390, %fd386, %fd386;
add.f64 %fd392, %fd450, 0dBFF0000000000000;
mul.f64 %fd393, %fd392, %fd391;
fma.rn.f64 %fd394, %fd392, %fd391, %fd393;
mul.f64 %fd395, %fd394, %fd394;
mov.f64 %fd396, 0d3ED0EE258B7A8B04;
mov.f64 %fd397, 0d3EB1380B3AE80F1E;
.loc 2 423 10
fma.rn.f64 %fd398, %fd397, %fd395, %fd396;
mov.f64 %fd399, 0d3EF3B2669F02676F;
.loc 2 423 10
fma.rn.f64 %fd400, %fd398, %fd395, %fd399;
mov.f64 %fd401, 0d3F1745CBA9AB0956;
.loc 2 423 10
fma.rn.f64 %fd402, %fd400, %fd395, %fd401;
mov.f64 %fd403, 0d3F3C71C72D1B5154;
.loc 2 423 10
fma.rn.f64 %fd404, %fd402, %fd395, %fd403;
mov.f64 %fd405, 0d3F624924923BE72D;
.loc 2 423 10
fma.rn.f64 %fd406, %fd404, %fd395, %fd405;
mov.f64 %fd407, 0d3F8999999999A3C4;
.loc 2 423 10
fma.rn.f64 %fd408, %fd406, %fd395, %fd407;
mov.f64 %fd409, 0d3FB5555555555554;
.loc 2 423 10
fma.rn.f64 %fd410, %fd408, %fd395, %fd409;
sub.f64 %fd411, %fd392, %fd394;
add.f64 %fd412, %fd411, %fd411;
neg.f64 %fd413, %fd394;
fma.rn.f64 %fd414, %fd413, %fd392, %fd412;
mul.f64 %fd415, %fd391, %fd414;
mul.f64 %fd416, %fd410, %fd395;
fma.rn.f64 %fd417, %fd416, %fd394, %fd415;
cvt.rn.f64.s32 %fd418, %r129;
mov.f64 %fd419, 0d3FE62E42FEFA39EF;
.loc 2 423 10
fma.rn.f64 %fd420, %fd418, %fd419, %fd394;
neg.s32 %r108, %r129;
cvt.rn.f64.s32 %fd421, %r108;
fma.rn.f64 %fd422, %fd421, %fd419, %fd420;
sub.f64 %fd423, %fd422, %fd394;
sub.f64 %fd424, %fd417, %fd423;
mov.f64 %fd425, 0d3C7ABC9E3B39803F;
.loc 2 423 10
fma.rn.f64 %fd426, %fd418, %fd425, %fd424;
add.f64 %fd46, %fd420, %fd426;
neg.f64 %fd451, %fd46;
bra.uni BB24_70;
BB24_69:
.loc 2 423 10
add.f64 %fd451, %fd1, %fd1;
BB24_70:
.loc 1 41 42
mad.lo.s32 %r109, %r112, %r51, %r113;
mul.wide.s32 %rd12, %r109, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 1 41 42
st.global.f64 [%rd13], %fd451;
.loc 1 41 22
add.s32 %r113, %r2, %r113;
.loc 1 41 1
setp.lt.s32 %p47, %r113, %r49;
@%p47 bra BB24_3;
BB24_71:
.loc 1 41 22
mov.u32 %r110, %nctaid.x;
mad.lo.s32 %r112, %r110, %r53, %r112;
.loc 1 41 1
setp.lt.s32 %p48, %r112, %r50;
@%p48 bra BB24_2;
BB24_72:
.loc 1 41 2
ret;
}
.visible .entry map_log10_double(
.param .u32 map_log10_double_param_0,
.param .u32 map_log10_double_param_1,
.param .u64 map_log10_double_param_2,
.param .u32 map_log10_double_param_3,
.param .u64 map_log10_double_param_4,
.param .u32 map_log10_double_param_5
)
{
.reg .pred %p<13>;
.reg .f32 %f<5>;
.reg .s32 %r<55>;
.reg .s64 %rd<9>;
.reg .f64 %fd<59>;
ld.param.u32 %r20, [map_log10_double_param_0];
ld.param.u32 %r21, [map_log10_double_param_1];
ld.param.u64 %rd3, [map_log10_double_param_2];
ld.param.u32 %r22, [map_log10_double_param_3];
ld.param.u64 %rd4, [map_log10_double_param_4];
ld.param.u32 %r23, [map_log10_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 42 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r49, %r24, %r25, %r26;
.loc 1 42 1
setp.ge.s32 %p1, %r49, %r21;
@%p1 bra BB25_18;
.loc 1 42 1
mov.u32 %r27, %ntid.y;
.loc 1 42 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
BB25_2:
.loc 1 42 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r50, %r27, %r29, %r31;
.loc 1 42 1
setp.ge.s32 %p2, %r50, %r20;
@%p2 bra BB25_17;
.loc 1 42 1
mul.lo.s32 %r4, %r49, %r23;
.loc 1 42 42
mul.lo.s32 %r5, %r49, %r22;
BB25_4:
.loc 1 42 1
add.s32 %r36, %r50, %r4;
mul.wide.s32 %rd5, %r36, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 42 1
ld.global.f64 %fd1, [%rd6];
.loc 2 233 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd1;
}
setp.gt.f64 %p3, %fd1, 0d0000000000000000;
setp.lt.f64 %p4, %fd1, 0d7FF0000000000000;
and.pred %p5, %p3, %p4;
@%p5 bra BB25_10;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p6, %fd9, 0d7FF0000000000000;
@%p6 bra BB25_9;
setp.neu.f64 %p7, %fd1, 0d0000000000000000;
@%p7 bra BB25_8;
mov.f64 %fd58, 0dFFF0000000000000;
bra.uni BB25_16;
BB25_8:
.loc 2 233 10
setp.eq.f64 %p8, %fd1, 0d7FF0000000000000;
selp.f64 %fd58, %fd1, 0dFFF8000000000000, %p8;
bra.uni BB25_16;
BB25_9:
.loc 2 233 10
add.f64 %fd58, %fd1, %fd1;
bra.uni BB25_16;
BB25_10:
.loc 2 233 10
setp.lt.u32 %p9, %r51, 1048576;
@%p9 bra BB25_12;
mov.u32 %r53, -1023;
bra.uni BB25_13;
BB25_12:
.loc 2 233 10
mul.f64 %fd11, %fd1, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd11;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd11;
}
mov.u32 %r53, -1077;
BB25_13:
.loc 2 233 10
shr.s32 %r39, %r51, 20;
add.s32 %r54, %r53, %r39;
and.b32 %r40, %r51, -2146435073;
or.b32 %r41, %r40, 1072693248;
mov.b64 %fd57, {%r52, %r41};
setp.lt.u32 %p10, %r41, 1073127583;
@%p10 bra BB25_15;
{
.reg .b32 %temp;
mov.b64 {%r42, %temp}, %fd57;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r43}, %fd57;
}
add.s32 %r44, %r43, -1048576;
mov.b64 %fd57, {%r42, %r44};
add.s32 %r54, %r54, 1;
BB25_15:
add.f64 %fd12, %fd57, 0d3FF0000000000000;
mov.f64 %fd14, 0d3FF0000000000000;
.loc 2 233 10
// inline asm
cvt.rn.f32.f64 %f1,%fd12;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd13,%f2;
// inline asm
neg.f64 %fd15, %fd12;
fma.rn.f64 %fd16, %fd15, %fd13, %fd14;
fma.rn.f64 %fd17, %fd16, %fd16, %fd16;
fma.rn.f64 %fd18, %fd17, %fd13, %fd13;
add.f64 %fd19, %fd57, 0dBFF0000000000000;
mul.f64 %fd20, %fd19, %fd18;
fma.rn.f64 %fd21, %fd19, %fd18, %fd20;
mul.f64 %fd22, %fd21, %fd21;
mov.f64 %fd23, 0d3ED0EE258B7A8B04;
mov.f64 %fd24, 0d3EB1380B3AE80F1E;
.loc 2 233 10
fma.rn.f64 %fd25, %fd24, %fd22, %fd23;
mov.f64 %fd26, 0d3EF3B2669F02676F;
.loc 2 233 10
fma.rn.f64 %fd27, %fd25, %fd22, %fd26;
mov.f64 %fd28, 0d3F1745CBA9AB0956;
.loc 2 233 10
fma.rn.f64 %fd29, %fd27, %fd22, %fd28;
mov.f64 %fd30, 0d3F3C71C72D1B5154;
.loc 2 233 10
fma.rn.f64 %fd31, %fd29, %fd22, %fd30;
mov.f64 %fd32, 0d3F624924923BE72D;
.loc 2 233 10
fma.rn.f64 %fd33, %fd31, %fd22, %fd32;
mov.f64 %fd34, 0d3F8999999999A3C4;
.loc 2 233 10
fma.rn.f64 %fd35, %fd33, %fd22, %fd34;
mov.f64 %fd36, 0d3FB5555555555554;
.loc 2 233 10
fma.rn.f64 %fd37, %fd35, %fd22, %fd36;
sub.f64 %fd38, %fd19, %fd21;
add.f64 %fd39, %fd38, %fd38;
neg.f64 %fd40, %fd21;
fma.rn.f64 %fd41, %fd40, %fd19, %fd39;
mul.f64 %fd42, %fd18, %fd41;
mul.f64 %fd43, %fd37, %fd22;
fma.rn.f64 %fd44, %fd43, %fd21, %fd42;
cvt.rn.f64.s32 %fd45, %r54;
mov.f64 %fd46, 0d3FE62E42FEFA39EF;
.loc 2 233 10
fma.rn.f64 %fd47, %fd45, %fd46, %fd21;
neg.s32 %r45, %r54;
cvt.rn.f64.s32 %fd48, %r45;
fma.rn.f64 %fd49, %fd48, %fd46, %fd47;
sub.f64 %fd50, %fd49, %fd21;
sub.f64 %fd51, %fd44, %fd50;
mov.f64 %fd52, 0d3C7ABC9E3B39803F;
.loc 2 233 10
fma.rn.f64 %fd53, %fd45, %fd52, %fd51;
add.f64 %fd58, %fd47, %fd53;
BB25_16:
mul.f64 %fd54, %fd58, 0d3C695355BAAAFAD3;
mov.f64 %fd55, 0d3FDBCB7B1526E50E;
.loc 2 233 10
fma.rn.f64 %fd56, %fd58, %fd55, %fd54;
.loc 1 42 42
add.s32 %r46, %r50, %r5;
mul.wide.s32 %rd7, %r46, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 42 42
st.global.f64 [%rd8], %fd56;
.loc 1 42 22
add.s32 %r50, %r2, %r50;
.loc 1 42 1
setp.lt.s32 %p11, %r50, %r20;
@%p11 bra BB25_4;
BB25_17:
.loc 1 42 22
mov.u32 %r47, %nctaid.x;
mad.lo.s32 %r49, %r47, %r24, %r49;
.loc 1 42 1
setp.lt.s32 %p12, %r49, %r21;
@%p12 bra BB25_2;
BB25_18:
.loc 1 42 2
ret;
}
.visible .entry map_log1p_double(
.param .u32 map_log1p_double_param_0,
.param .u32 map_log1p_double_param_1,
.param .u64 map_log1p_double_param_2,
.param .u32 map_log1p_double_param_3,
.param .u64 map_log1p_double_param_4,
.param .u32 map_log1p_double_param_5
)
{
.reg .pred %p<16>;
.reg .f32 %f<5>;
.reg .s32 %r<56>;
.reg .s64 %rd<9>;
.reg .f64 %fd<81>;
ld.param.u32 %r20, [map_log1p_double_param_0];
ld.param.u32 %r21, [map_log1p_double_param_1];
ld.param.u64 %rd3, [map_log1p_double_param_2];
ld.param.u32 %r22, [map_log1p_double_param_3];
ld.param.u64 %rd4, [map_log1p_double_param_4];
ld.param.u32 %r23, [map_log1p_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 43 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r50, %r24, %r25, %r26;
.loc 1 43 1
setp.ge.s32 %p1, %r50, %r21;
@%p1 bra BB26_20;
.loc 1 43 1
mov.u32 %r27, %ntid.y;
.loc 1 43 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
BB26_2:
.loc 1 43 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r51, %r27, %r29, %r31;
.loc 1 43 1
setp.ge.s32 %p2, %r51, %r20;
@%p2 bra BB26_19;
.loc 1 43 1
mul.lo.s32 %r4, %r50, %r23;
.loc 1 43 42
mul.lo.s32 %r5, %r50, %r22;
BB26_4:
.loc 1 43 1
add.s32 %r36, %r51, %r4;
mul.wide.s32 %rd5, %r36, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 43 1
ld.global.f64 %fd1, [%rd6];
.loc 2 238 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r37}, %fd1;
}
setp.lt.u32 %p3, %r37, 1071994197;
setp.lt.s32 %p4, %r37, -1076258407;
or.pred %p5, %p3, %p4;
@%p5 bra BB26_17;
add.f64 %fd2, %fd1, 0d3FF0000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r53, %temp}, %fd2;
}
setp.gt.f64 %p6, %fd2, 0d0000000000000000;
setp.lt.f64 %p7, %fd2, 0d7FF0000000000000;
and.pred %p8, %p6, %p7;
@%p8 bra BB26_11;
abs.f64 %fd11, %fd2;
setp.gtu.f64 %p9, %fd11, 0d7FF0000000000000;
@%p9 bra BB26_10;
setp.neu.f64 %p10, %fd2, 0d0000000000000000;
@%p10 bra BB26_9;
mov.f64 %fd80, 0dFFF0000000000000;
bra.uni BB26_18;
BB26_9:
.loc 2 238 10
setp.eq.f64 %p11, %fd2, 0d7FF0000000000000;
selp.f64 %fd80, %fd2, 0dFFF8000000000000, %p11;
bra.uni BB26_18;
BB26_10:
.loc 2 238 10
add.f64 %fd80, %fd2, %fd2;
bra.uni BB26_18;
BB26_11:
.loc 2 238 10
setp.lt.u32 %p12, %r52, 1048576;
@%p12 bra BB26_13;
mov.u32 %r54, -1023;
bra.uni BB26_14;
BB26_13:
.loc 2 238 10
mul.f64 %fd13, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd13;
}
{
.reg .b32 %temp;
mov.b64 {%r53, %temp}, %fd13;
}
mov.u32 %r54, -1077;
BB26_14:
.loc 2 238 10
shr.s32 %r40, %r52, 20;
add.s32 %r55, %r54, %r40;
and.b32 %r41, %r52, -2146435073;
or.b32 %r42, %r41, 1072693248;
mov.b64 %fd79, {%r53, %r42};
setp.lt.u32 %p13, %r42, 1073127583;
@%p13 bra BB26_16;
{
.reg .b32 %temp;
mov.b64 {%r43, %temp}, %fd79;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd79;
}
add.s32 %r45, %r44, -1048576;
mov.b64 %fd79, {%r43, %r45};
add.s32 %r55, %r55, 1;
BB26_16:
add.f64 %fd14, %fd79, 0d3FF0000000000000;
mov.f64 %fd16, 0d3FF0000000000000;
.loc 2 238 10
// inline asm
cvt.rn.f32.f64 %f1,%fd14;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd15,%f2;
// inline asm
neg.f64 %fd17, %fd14;
fma.rn.f64 %fd18, %fd17, %fd15, %fd16;
fma.rn.f64 %fd19, %fd18, %fd18, %fd18;
fma.rn.f64 %fd20, %fd19, %fd15, %fd15;
add.f64 %fd21, %fd79, 0dBFF0000000000000;
mul.f64 %fd22, %fd21, %fd20;
fma.rn.f64 %fd23, %fd21, %fd20, %fd22;
mul.f64 %fd24, %fd23, %fd23;
mov.f64 %fd25, 0d3ED0EE258B7A8B04;
mov.f64 %fd26, 0d3EB1380B3AE80F1E;
.loc 2 238 10
fma.rn.f64 %fd27, %fd26, %fd24, %fd25;
mov.f64 %fd28, 0d3EF3B2669F02676F;
.loc 2 238 10
fma.rn.f64 %fd29, %fd27, %fd24, %fd28;
mov.f64 %fd30, 0d3F1745CBA9AB0956;
.loc 2 238 10
fma.rn.f64 %fd31, %fd29, %fd24, %fd30;
mov.f64 %fd32, 0d3F3C71C72D1B5154;
.loc 2 238 10
fma.rn.f64 %fd33, %fd31, %fd24, %fd32;
mov.f64 %fd34, 0d3F624924923BE72D;
.loc 2 238 10
fma.rn.f64 %fd35, %fd33, %fd24, %fd34;
mov.f64 %fd36, 0d3F8999999999A3C4;
.loc 2 238 10
fma.rn.f64 %fd37, %fd35, %fd24, %fd36;
mov.f64 %fd38, 0d3FB5555555555554;
.loc 2 238 10
fma.rn.f64 %fd39, %fd37, %fd24, %fd38;
sub.f64 %fd40, %fd21, %fd23;
add.f64 %fd41, %fd40, %fd40;
neg.f64 %fd42, %fd23;
fma.rn.f64 %fd43, %fd42, %fd21, %fd41;
mul.f64 %fd44, %fd20, %fd43;
mul.f64 %fd45, %fd39, %fd24;
fma.rn.f64 %fd46, %fd45, %fd23, %fd44;
cvt.rn.f64.s32 %fd47, %r55;
mov.f64 %fd48, 0d3FE62E42FEFA39EF;
.loc 2 238 10
fma.rn.f64 %fd49, %fd47, %fd48, %fd23;
neg.s32 %r46, %r55;
cvt.rn.f64.s32 %fd50, %r46;
fma.rn.f64 %fd51, %fd50, %fd48, %fd49;
sub.f64 %fd52, %fd51, %fd23;
sub.f64 %fd53, %fd46, %fd52;
mov.f64 %fd54, 0d3C7ABC9E3B39803F;
.loc 2 238 10
fma.rn.f64 %fd55, %fd47, %fd54, %fd53;
add.f64 %fd80, %fd49, %fd55;
bra.uni BB26_18;
BB26_17:
.loc 2 238 10
add.f64 %fd56, %fd1, 0d4000000000000000;
div.rn.f64 %fd57, %fd1, %fd56;
neg.f64 %fd58, %fd1;
mul.f64 %fd59, %fd57, %fd58;
add.f64 %fd60, %fd1, %fd59;
mul.f64 %fd61, %fd60, %fd60;
mov.f64 %fd62, 0d3ED087FFCEB2DC44;
mov.f64 %fd63, 0d3EB372FB2FBE14B5;
.loc 2 238 10
fma.rn.f64 %fd64, %fd63, %fd61, %fd62;
mov.f64 %fd65, 0d3EF3B9FF890F468C;
.loc 2 238 10
fma.rn.f64 %fd66, %fd64, %fd61, %fd65;
mov.f64 %fd67, 0d3F17457EFD51BAF8;
.loc 2 238 10
fma.rn.f64 %fd68, %fd66, %fd61, %fd67;
mov.f64 %fd69, 0d3F3C71C8DE3CE825;
.loc 2 238 10
fma.rn.f64 %fd70, %fd68, %fd61, %fd69;
mov.f64 %fd71, 0d3F6249248FA4661F;
.loc 2 238 10
fma.rn.f64 %fd72, %fd70, %fd61, %fd71;
mov.f64 %fd73, 0d3F899999999D70C4;
.loc 2 238 10
fma.rn.f64 %fd74, %fd72, %fd61, %fd73;
mov.f64 %fd75, 0d3FB5555555555462;
.loc 2 238 10
fma.rn.f64 %fd76, %fd74, %fd61, %fd75;
mul.f64 %fd77, %fd76, %fd61;
fma.rn.f64 %fd78, %fd77, %fd60, %fd59;
add.f64 %fd80, %fd78, %fd1;
BB26_18:
.loc 1 43 42
add.s32 %r47, %r51, %r5;
mul.wide.s32 %rd7, %r47, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 43 42
st.global.f64 [%rd8], %fd80;
.loc 1 43 22
add.s32 %r51, %r2, %r51;
.loc 1 43 1
setp.lt.s32 %p14, %r51, %r20;
@%p14 bra BB26_4;
BB26_19:
.loc 1 43 22
mov.u32 %r48, %nctaid.x;
mad.lo.s32 %r50, %r48, %r24, %r50;
.loc 1 43 1
setp.lt.s32 %p15, %r50, %r21;
@%p15 bra BB26_2;
BB26_20:
.loc 1 43 2
ret;
}
.visible .entry map_log2_double(
.param .u32 map_log2_double_param_0,
.param .u32 map_log2_double_param_1,
.param .u64 map_log2_double_param_2,
.param .u32 map_log2_double_param_3,
.param .u64 map_log2_double_param_4,
.param .u32 map_log2_double_param_5
)
{
.reg .pred %p<13>;
.reg .f32 %f<5>;
.reg .s32 %r<55>;
.reg .s64 %rd<9>;
.reg .f64 %fd<59>;
ld.param.u32 %r20, [map_log2_double_param_0];
ld.param.u32 %r21, [map_log2_double_param_1];
ld.param.u64 %rd3, [map_log2_double_param_2];
ld.param.u32 %r22, [map_log2_double_param_3];
ld.param.u64 %rd4, [map_log2_double_param_4];
ld.param.u32 %r23, [map_log2_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 44 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r49, %r24, %r25, %r26;
.loc 1 44 1
setp.ge.s32 %p1, %r49, %r21;
@%p1 bra BB27_18;
.loc 1 44 1
mov.u32 %r27, %ntid.y;
.loc 1 44 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
BB27_2:
.loc 1 44 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r50, %r27, %r29, %r31;
.loc 1 44 1
setp.ge.s32 %p2, %r50, %r20;
@%p2 bra BB27_17;
.loc 1 44 1
mul.lo.s32 %r4, %r49, %r23;
.loc 1 44 42
mul.lo.s32 %r5, %r49, %r22;
BB27_4:
.loc 1 44 1
add.s32 %r36, %r50, %r4;
mul.wide.s32 %rd5, %r36, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 44 1
ld.global.f64 %fd1, [%rd6];
.loc 2 228 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd1;
}
setp.gt.f64 %p3, %fd1, 0d0000000000000000;
setp.lt.f64 %p4, %fd1, 0d7FF0000000000000;
and.pred %p5, %p3, %p4;
@%p5 bra BB27_10;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p6, %fd9, 0d7FF0000000000000;
@%p6 bra BB27_9;
setp.neu.f64 %p7, %fd1, 0d0000000000000000;
@%p7 bra BB27_8;
mov.f64 %fd58, 0dFFF0000000000000;
bra.uni BB27_16;
BB27_8:
.loc 2 228 10
setp.eq.f64 %p8, %fd1, 0d7FF0000000000000;
selp.f64 %fd58, %fd1, 0dFFF8000000000000, %p8;
bra.uni BB27_16;
BB27_9:
.loc 2 228 10
add.f64 %fd58, %fd1, %fd1;
bra.uni BB27_16;
BB27_10:
.loc 2 228 10
setp.lt.u32 %p9, %r51, 1048576;
@%p9 bra BB27_12;
mov.u32 %r53, -1023;
bra.uni BB27_13;
BB27_12:
.loc 2 228 10
mul.f64 %fd11, %fd1, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd11;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd11;
}
mov.u32 %r53, -1077;
BB27_13:
.loc 2 228 10
shr.s32 %r39, %r51, 20;
add.s32 %r54, %r53, %r39;
and.b32 %r40, %r51, -2146435073;
or.b32 %r41, %r40, 1072693248;
mov.b64 %fd57, {%r52, %r41};
setp.lt.u32 %p10, %r41, 1073127583;
@%p10 bra BB27_15;
{
.reg .b32 %temp;
mov.b64 {%r42, %temp}, %fd57;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r43}, %fd57;
}
add.s32 %r44, %r43, -1048576;
mov.b64 %fd57, {%r42, %r44};
add.s32 %r54, %r54, 1;
BB27_15:
add.f64 %fd12, %fd57, 0d3FF0000000000000;
mov.f64 %fd14, 0d3FF0000000000000;
.loc 2 228 10
// inline asm
cvt.rn.f32.f64 %f1,%fd12;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd13,%f2;
// inline asm
neg.f64 %fd15, %fd12;
fma.rn.f64 %fd16, %fd15, %fd13, %fd14;
fma.rn.f64 %fd17, %fd16, %fd16, %fd16;
fma.rn.f64 %fd18, %fd17, %fd13, %fd13;
add.f64 %fd19, %fd57, 0dBFF0000000000000;
mul.f64 %fd20, %fd19, %fd18;
fma.rn.f64 %fd21, %fd19, %fd18, %fd20;
mul.f64 %fd22, %fd21, %fd21;
mov.f64 %fd23, 0d3ED0EE258B7A8B04;
mov.f64 %fd24, 0d3EB1380B3AE80F1E;
.loc 2 228 10
fma.rn.f64 %fd25, %fd24, %fd22, %fd23;
mov.f64 %fd26, 0d3EF3B2669F02676F;
.loc 2 228 10
fma.rn.f64 %fd27, %fd25, %fd22, %fd26;
mov.f64 %fd28, 0d3F1745CBA9AB0956;
.loc 2 228 10
fma.rn.f64 %fd29, %fd27, %fd22, %fd28;
mov.f64 %fd30, 0d3F3C71C72D1B5154;
.loc 2 228 10
fma.rn.f64 %fd31, %fd29, %fd22, %fd30;
mov.f64 %fd32, 0d3F624924923BE72D;
.loc 2 228 10
fma.rn.f64 %fd33, %fd31, %fd22, %fd32;
mov.f64 %fd34, 0d3F8999999999A3C4;
.loc 2 228 10
fma.rn.f64 %fd35, %fd33, %fd22, %fd34;
mov.f64 %fd36, 0d3FB5555555555554;
.loc 2 228 10
fma.rn.f64 %fd37, %fd35, %fd22, %fd36;
sub.f64 %fd38, %fd19, %fd21;
add.f64 %fd39, %fd38, %fd38;
neg.f64 %fd40, %fd21;
fma.rn.f64 %fd41, %fd40, %fd19, %fd39;
mul.f64 %fd42, %fd18, %fd41;
mul.f64 %fd43, %fd37, %fd22;
fma.rn.f64 %fd44, %fd43, %fd21, %fd42;
cvt.rn.f64.s32 %fd45, %r54;
mov.f64 %fd46, 0d3FE62E42FEFA39EF;
.loc 2 228 10
fma.rn.f64 %fd47, %fd45, %fd46, %fd21;
neg.s32 %r45, %r54;
cvt.rn.f64.s32 %fd48, %r45;
fma.rn.f64 %fd49, %fd48, %fd46, %fd47;
sub.f64 %fd50, %fd49, %fd21;
sub.f64 %fd51, %fd44, %fd50;
mov.f64 %fd52, 0d3C7ABC9E3B39803F;
.loc 2 228 10
fma.rn.f64 %fd53, %fd45, %fd52, %fd51;
add.f64 %fd58, %fd47, %fd53;
BB27_16:
mul.f64 %fd54, %fd58, 0d3C7777D0FFDA0D24;
mov.f64 %fd55, 0d3FF71547652B82FE;
.loc 2 228 10
fma.rn.f64 %fd56, %fd58, %fd55, %fd54;
.loc 1 44 42
add.s32 %r46, %r50, %r5;
mul.wide.s32 %rd7, %r46, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 44 42
st.global.f64 [%rd8], %fd56;
.loc 1 44 22
add.s32 %r50, %r2, %r50;
.loc 1 44 1
setp.lt.s32 %p11, %r50, %r20;
@%p11 bra BB27_4;
BB27_17:
.loc 1 44 22
mov.u32 %r47, %nctaid.x;
mad.lo.s32 %r49, %r47, %r24, %r49;
.loc 1 44 1
setp.lt.s32 %p12, %r49, %r21;
@%p12 bra BB27_2;
BB27_18:
.loc 1 44 2
ret;
}
.visible .entry map_logb_double(
.param .u32 map_logb_double_param_0,
.param .u32 map_logb_double_param_1,
.param .u64 map_logb_double_param_2,
.param .u32 map_logb_double_param_3,
.param .u64 map_logb_double_param_4,
.param .u32 map_logb_double_param_5
)
{
.reg .pred %p<9>;
.reg .s32 %r<35>;
.reg .s64 %rd<13>;
.reg .f64 %fd<9>;
ld.param.u32 %r13, [map_logb_double_param_0];
ld.param.u32 %r14, [map_logb_double_param_1];
ld.param.u64 %rd4, [map_logb_double_param_2];
ld.param.u32 %r15, [map_logb_double_param_3];
ld.param.u64 %rd5, [map_logb_double_param_4];
ld.param.u32 %r16, [map_logb_double_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 45 1
mov.u32 %r1, %ntid.x;
mov.u32 %r17, %ctaid.x;
mov.u32 %r18, %tid.x;
mad.lo.s32 %r33, %r1, %r17, %r18;
.loc 1 45 1
setp.ge.s32 %p1, %r33, %r14;
@%p1 bra BB28_14;
.loc 1 45 1
mov.u32 %r19, %tid.y;
mov.u32 %r20, %ntid.y;
mov.u32 %r21, %ctaid.y;
mad.lo.s32 %r3, %r20, %r21, %r19;
.loc 1 45 22
mov.u32 %r22, %nctaid.x;
mul.lo.s32 %r4, %r22, %r1;
.loc 1 45 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r5, %r23, %r20;
BB28_2:
.loc 1 45 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB28_13;
.loc 1 45 1
mul.lo.s32 %r7, %r33, %r16;
.loc 1 45 42
mul.lo.s32 %r8, %r33, %r15;
mov.u32 %r34, %r3;
BB28_4:
.loc 1 45 1
mov.u32 %r9, %r34;
add.s32 %r24, %r9, %r7;
mul.wide.s32 %rd6, %r24, 8;
add.s64 %rd7, %rd2, %rd6;
.loc 1 45 1
ld.global.f64 %fd1, [%rd7];
.loc 2 512 10
abs.f64 %fd8, %fd1;
setp.gtu.f64 %p3, %fd8, 0d7FF0000000000000;
@%p3 bra BB28_11;
setp.eq.f64 %p4, %fd8, 0d7FF0000000000000;
@%p4 bra BB28_12;
setp.neu.f64 %p5, %fd8, 0d0000000000000000;
@%p5 bra BB28_8;
mov.f64 %fd8, 0dFFF0000000000000;
bra.uni BB28_12;
BB28_8:
.loc 2 512 10
{
.reg .b32 %temp;
mov.b64 {%r25, %temp}, %fd8;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r10}, %fd8;
}
cvt.u64.u32 %rd8, %r10;
shl.b64 %rd9, %rd8, 32;
cvt.u64.u32 %rd10, %r25;
or.b64 %rd3, %rd9, %rd10;
setp.ltu.f64 %p6, %fd8, 0d0010000000000000;
@%p6 bra BB28_10;
shr.u32 %r26, %r10, 20;
and.b32 %r27, %r26, 2047;
add.s32 %r28, %r27, -1023;
cvt.rn.f64.s32 %fd8, %r28;
bra.uni BB28_12;
BB28_10:
.loc 2 512 10
clz.b64 %r29, %rd3;
mov.u32 %r30, -1011;
.loc 2 512 10
sub.s32 %r31, %r30, %r29;
cvt.rn.f64.s32 %fd8, %r31;
bra.uni BB28_12;
BB28_11:
.loc 2 512 10
add.f64 %fd8, %fd1, %fd1;
BB28_12:
.loc 1 45 42
add.s32 %r32, %r9, %r8;
mul.wide.s32 %rd11, %r32, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 1 45 42
st.global.f64 [%rd12], %fd8;
.loc 1 45 22
add.s32 %r11, %r5, %r9;
.loc 1 45 1
setp.lt.s32 %p7, %r11, %r13;
mov.u32 %r34, %r11;
@%p7 bra BB28_4;
BB28_13:
.loc 1 45 22
add.s32 %r33, %r4, %r33;
.loc 1 45 1
setp.lt.s32 %p8, %r33, %r14;
@%p8 bra BB28_2;
BB28_14:
.loc 1 45 2
ret;
}
.visible .entry map_log_double(
.param .u32 map_log_double_param_0,
.param .u32 map_log_double_param_1,
.param .u64 map_log_double_param_2,
.param .u32 map_log_double_param_3,
.param .u64 map_log_double_param_4,
.param .u32 map_log_double_param_5
)
{
.reg .pred %p<13>;
.reg .f32 %f<5>;
.reg .s32 %r<55>;
.reg .s64 %rd<9>;
.reg .f64 %fd<56>;
ld.param.u32 %r20, [map_log_double_param_0];
ld.param.u32 %r21, [map_log_double_param_1];
ld.param.u64 %rd3, [map_log_double_param_2];
ld.param.u32 %r22, [map_log_double_param_3];
ld.param.u64 %rd4, [map_log_double_param_4];
ld.param.u32 %r23, [map_log_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 46 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r49, %r24, %r25, %r26;
.loc 1 46 1
setp.ge.s32 %p1, %r49, %r21;
@%p1 bra BB29_18;
.loc 1 46 1
mov.u32 %r27, %ntid.y;
.loc 1 46 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
BB29_2:
.loc 1 46 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r50, %r27, %r29, %r31;
.loc 1 46 1
setp.ge.s32 %p2, %r50, %r20;
@%p2 bra BB29_17;
.loc 1 46 1
mul.lo.s32 %r4, %r49, %r23;
.loc 1 46 42
mul.lo.s32 %r5, %r49, %r22;
BB29_4:
.loc 1 46 1
add.s32 %r36, %r50, %r4;
mul.wide.s32 %rd5, %r36, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 46 1
ld.global.f64 %fd1, [%rd6];
.loc 2 223 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd1;
}
setp.gt.f64 %p3, %fd1, 0d0000000000000000;
setp.lt.f64 %p4, %fd1, 0d7FF0000000000000;
and.pred %p5, %p3, %p4;
@%p5 bra BB29_10;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p6, %fd9, 0d7FF0000000000000;
@%p6 bra BB29_9;
setp.neu.f64 %p7, %fd1, 0d0000000000000000;
@%p7 bra BB29_8;
mov.f64 %fd55, 0dFFF0000000000000;
bra.uni BB29_16;
BB29_8:
.loc 2 223 10
setp.eq.f64 %p8, %fd1, 0d7FF0000000000000;
selp.f64 %fd55, %fd1, 0dFFF8000000000000, %p8;
bra.uni BB29_16;
BB29_9:
.loc 2 223 10
add.f64 %fd55, %fd1, %fd1;
bra.uni BB29_16;
BB29_10:
.loc 2 223 10
setp.lt.u32 %p9, %r51, 1048576;
@%p9 bra BB29_12;
mov.u32 %r53, -1023;
bra.uni BB29_13;
BB29_12:
.loc 2 223 10
mul.f64 %fd11, %fd1, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd11;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd11;
}
mov.u32 %r53, -1077;
BB29_13:
.loc 2 223 10
shr.s32 %r39, %r51, 20;
add.s32 %r54, %r53, %r39;
and.b32 %r40, %r51, -2146435073;
or.b32 %r41, %r40, 1072693248;
mov.b64 %fd54, {%r52, %r41};
setp.lt.u32 %p10, %r41, 1073127583;
@%p10 bra BB29_15;
{
.reg .b32 %temp;
mov.b64 {%r42, %temp}, %fd54;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r43}, %fd54;
}
add.s32 %r44, %r43, -1048576;
mov.b64 %fd54, {%r42, %r44};
add.s32 %r54, %r54, 1;
BB29_15:
add.f64 %fd12, %fd54, 0d3FF0000000000000;
mov.f64 %fd14, 0d3FF0000000000000;
.loc 2 223 10
// inline asm
cvt.rn.f32.f64 %f1,%fd12;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd13,%f2;
// inline asm
neg.f64 %fd15, %fd12;
fma.rn.f64 %fd16, %fd15, %fd13, %fd14;
fma.rn.f64 %fd17, %fd16, %fd16, %fd16;
fma.rn.f64 %fd18, %fd17, %fd13, %fd13;
add.f64 %fd19, %fd54, 0dBFF0000000000000;
mul.f64 %fd20, %fd19, %fd18;
fma.rn.f64 %fd21, %fd19, %fd18, %fd20;
mul.f64 %fd22, %fd21, %fd21;
mov.f64 %fd23, 0d3ED0EE258B7A8B04;
mov.f64 %fd24, 0d3EB1380B3AE80F1E;
.loc 2 223 10
fma.rn.f64 %fd25, %fd24, %fd22, %fd23;
mov.f64 %fd26, 0d3EF3B2669F02676F;
.loc 2 223 10
fma.rn.f64 %fd27, %fd25, %fd22, %fd26;
mov.f64 %fd28, 0d3F1745CBA9AB0956;
.loc 2 223 10
fma.rn.f64 %fd29, %fd27, %fd22, %fd28;
mov.f64 %fd30, 0d3F3C71C72D1B5154;
.loc 2 223 10
fma.rn.f64 %fd31, %fd29, %fd22, %fd30;
mov.f64 %fd32, 0d3F624924923BE72D;
.loc 2 223 10
fma.rn.f64 %fd33, %fd31, %fd22, %fd32;
mov.f64 %fd34, 0d3F8999999999A3C4;
.loc 2 223 10
fma.rn.f64 %fd35, %fd33, %fd22, %fd34;
mov.f64 %fd36, 0d3FB5555555555554;
.loc 2 223 10
fma.rn.f64 %fd37, %fd35, %fd22, %fd36;
sub.f64 %fd38, %fd19, %fd21;
add.f64 %fd39, %fd38, %fd38;
neg.f64 %fd40, %fd21;
fma.rn.f64 %fd41, %fd40, %fd19, %fd39;
mul.f64 %fd42, %fd18, %fd41;
mul.f64 %fd43, %fd37, %fd22;
fma.rn.f64 %fd44, %fd43, %fd21, %fd42;
cvt.rn.f64.s32 %fd45, %r54;
mov.f64 %fd46, 0d3FE62E42FEFA39EF;
.loc 2 223 10
fma.rn.f64 %fd47, %fd45, %fd46, %fd21;
neg.s32 %r45, %r54;
cvt.rn.f64.s32 %fd48, %r45;
fma.rn.f64 %fd49, %fd48, %fd46, %fd47;
sub.f64 %fd50, %fd49, %fd21;
sub.f64 %fd51, %fd44, %fd50;
mov.f64 %fd52, 0d3C7ABC9E3B39803F;
.loc 2 223 10
fma.rn.f64 %fd53, %fd45, %fd52, %fd51;
add.f64 %fd55, %fd47, %fd53;
BB29_16:
.loc 1 46 42
add.s32 %r46, %r50, %r5;
mul.wide.s32 %rd7, %r46, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 46 42
st.global.f64 [%rd8], %fd55;
.loc 1 46 22
add.s32 %r50, %r2, %r50;
.loc 1 46 1
setp.lt.s32 %p11, %r50, %r20;
@%p11 bra BB29_4;
BB29_17:
.loc 1 46 22
mov.u32 %r47, %nctaid.x;
mad.lo.s32 %r49, %r47, %r24, %r49;
.loc 1 46 1
setp.lt.s32 %p12, %r49, %r21;
@%p12 bra BB29_2;
BB29_18:
.loc 1 46 2
ret;
}
.visible .entry map_nearbyint_double(
.param .u32 map_nearbyint_double_param_0,
.param .u32 map_nearbyint_double_param_1,
.param .u64 map_nearbyint_double_param_2,
.param .u32 map_nearbyint_double_param_3,
.param .u64 map_nearbyint_double_param_4,
.param .u32 map_nearbyint_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_nearbyint_double_param_0];
ld.param.u32 %r13, [map_nearbyint_double_param_1];
ld.param.u64 %rd3, [map_nearbyint_double_param_2];
ld.param.u32 %r14, [map_nearbyint_double_param_3];
ld.param.u64 %rd4, [map_nearbyint_double_param_4];
ld.param.u32 %r15, [map_nearbyint_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 47 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 47 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB30_6;
.loc 1 47 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 47 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 47 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB30_2:
.loc 1 47 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB30_5;
.loc 1 47 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 47 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB30_4:
.loc 1 47 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 47 1
ld.global.f64 %fd1, [%rd6];
.loc 2 86 10
cvt.rni.f64.f64 %fd2, %fd1;
.loc 1 47 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 47 42
st.global.f64 [%rd8], %fd2;
.loc 1 47 22
add.s32 %r10, %r5, %r9;
.loc 1 47 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB30_4;
BB30_5:
.loc 1 47 22
add.s32 %r25, %r4, %r25;
.loc 1 47 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB30_2;
BB30_6:
.loc 1 47 2
ret;
}
.visible .entry map_normcdf_double(
.param .u32 map_normcdf_double_param_0,
.param .u32 map_normcdf_double_param_1,
.param .u64 map_normcdf_double_param_2,
.param .u32 map_normcdf_double_param_3,
.param .u64 map_normcdf_double_param_4,
.param .u32 map_normcdf_double_param_5
)
{
.reg .pred %p<11>;
.reg .f32 %f<9>;
.reg .s32 %r<51>;
.reg .s64 %rd<9>;
.reg .f64 %fd<146>;
ld.param.u32 %r11, [map_normcdf_double_param_0];
ld.param.u32 %r12, [map_normcdf_double_param_1];
ld.param.u64 %rd1, [map_normcdf_double_param_2];
ld.param.u32 %r13, [map_normcdf_double_param_3];
ld.param.u64 %rd2, [map_normcdf_double_param_4];
ld.param.u32 %r14, [map_normcdf_double_param_5];
.loc 1 48 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r48, %r15, %r16, %r17;
.loc 1 48 1
setp.ge.s32 %p1, %r48, %r12;
@%p1 bra BB31_15;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB31_2:
.loc 1 48 1
mov.u32 %r18, %ctaid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %tid.y;
mad.lo.s32 %r49, %r19, %r18, %r20;
.loc 1 48 1
setp.ge.s32 %p2, %r49, %r11;
@%p2 bra BB31_14;
BB31_3:
.loc 1 48 1
mad.lo.s32 %r25, %r48, %r14, %r49;
mul.wide.s32 %rd4, %r25, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 1 48 1
ld.global.f64 %fd143, [%rd5];
.loc 2 413 10
abs.f64 %fd18, %fd143;
setp.leu.f64 %p3, %fd18, 0d4043400000000000;
@%p3 bra BB31_5;
{
.reg .b32 %temp;
mov.b64 {%temp, %r26}, %fd143;
}
and.b32 %r27, %r26, -2147483648;
mov.f64 %fd19, 0d4043400000000000;
.loc 2 413 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r28}, %fd19;
}
and.b32 %r29, %r28, 2147483647;
or.b32 %r30, %r29, %r27;
{
.reg .b32 %temp;
mov.b64 {%r31, %temp}, %fd19;
}
mov.b64 %fd143, {%r31, %r30};
BB31_5:
mov.f64 %fd20, 0dBFE6A09E667F3BCD;
.loc 2 413 10
mul.rn.f64 %fd4, %fd143, %fd20;
neg.f64 %fd21, %fd4;
fma.rn.f64 %fd22, %fd143, %fd20, %fd21;
mov.f64 %fd23, 0d3C8BDD3413B26456;
.loc 2 413 10
fma.rn.f64 %fd5, %fd143, %fd23, %fd22;
add.rn.f64 %fd6, %fd4, %fd5;
abs.f64 %fd7, %fd6;
setp.gtu.f64 %p4, %fd7, 0d7FF0000000000000;
@%p4 bra BB31_10;
add.f64 %fd28, %fd7, 0dC010000000000000;
mov.f64 %fd29, 0dC010000000000000;
.loc 2 413 10
add.f64 %fd24, %fd7, 0d4010000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd24;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd25,%f2;
// inline asm
neg.f64 %fd30, %fd24;
mov.f64 %fd31, 0d3FF0000000000000;
.loc 2 413 10
fma.rn.f64 %fd32, %fd30, %fd25, %fd31;
fma.rn.f64 %fd33, %fd32, %fd32, %fd32;
fma.rn.f64 %fd34, %fd33, %fd25, %fd25;
mul.f64 %fd35, %fd28, %fd34;
add.rn.f64 %fd36, %fd35, %fd31;
fma.rn.f64 %fd37, %fd29, %fd36, %fd7;
neg.f64 %fd38, %fd35;
fma.rn.f64 %fd39, %fd38, %fd7, %fd37;
fma.rn.f64 %fd40, %fd34, %fd39, %fd35;
mov.f64 %fd41, 0dBE44E1C6FD03D328;
mov.f64 %fd42, 0dBDF8774AD4E0BFD7;
.loc 2 413 10
fma.rn.f64 %fd43, %fd42, %fd40, %fd41;
mov.f64 %fd44, 0dBE4330149F7A56B6;
.loc 2 413 10
fma.rn.f64 %fd45, %fd43, %fd40, %fd44;
mov.f64 %fd46, 0d3E7BEDDED8376273;
.loc 2 413 10
fma.rn.f64 %fd47, %fd45, %fd40, %fd46;
mov.f64 %fd48, 0d3E6F9254C3ABF22B;
.loc 2 413 10
fma.rn.f64 %fd49, %fd47, %fd40, %fd48;
mov.f64 %fd50, 0dBEAB9068C2148CF0;
.loc 2 413 10
fma.rn.f64 %fd51, %fd49, %fd40, %fd50;
mov.f64 %fd52, 0d3E94C6454DB34009;
.loc 2 413 10
fma.rn.f64 %fd53, %fd51, %fd40, %fd52;
mov.f64 %fd54, 0d3ED7F1C378F2311D;
.loc 2 413 10
fma.rn.f64 %fd55, %fd53, %fd40, %fd54;
mov.f64 %fd56, 0dBEE78E051C6D5C58;
.loc 2 413 10
fma.rn.f64 %fd57, %fd55, %fd40, %fd56;
mov.f64 %fd58, 0dBEF995B4EAD14A90;
.loc 2 413 10
fma.rn.f64 %fd59, %fd57, %fd40, %fd58;
mov.f64 %fd60, 0d3F23BE27CF0A29B2;
.loc 2 413 10
fma.rn.f64 %fd61, %fd59, %fd40, %fd60;
mov.f64 %fd62, 0dBF2A1DEF3E81672E;
.loc 2 413 10
fma.rn.f64 %fd63, %fd61, %fd40, %fd62;
mov.f64 %fd64, 0dBF48D4ABE68C1713;
.loc 2 413 10
fma.rn.f64 %fd65, %fd63, %fd40, %fd64;
mov.f64 %fd66, 0d3F749C67210DD6B4;
.loc 2 413 10
fma.rn.f64 %fd67, %fd65, %fd40, %fd66;
mov.f64 %fd68, 0dBF9096238568E357;
.loc 2 413 10
fma.rn.f64 %fd69, %fd67, %fd40, %fd68;
mov.f64 %fd70, 0d3FA3079EDF8C2DC9;
.loc 2 413 10
fma.rn.f64 %fd71, %fd69, %fd40, %fd70;
mov.f64 %fd72, 0dBFB0FB06DFF601FC;
.loc 2 413 10
fma.rn.f64 %fd73, %fd71, %fd40, %fd72;
mov.f64 %fd74, 0d3FB7FEE004DFBCDC;
.loc 2 413 10
fma.rn.f64 %fd75, %fd73, %fd40, %fd74;
mov.f64 %fd76, 0dBFB9DDB23C3DB8C6;
.loc 2 413 10
fma.rn.f64 %fd77, %fd75, %fd40, %fd76;
mov.f64 %fd78, 0d3FB16ECEFCFA5FDA;
.loc 2 413 10
fma.rn.f64 %fd79, %fd77, %fd40, %fd78;
mov.f64 %fd80, 0d3F8F7F5DF66FB6D6;
.loc 2 413 10
fma.rn.f64 %fd81, %fd79, %fd40, %fd80;
mov.f64 %fd82, 0dBFC1DF1AD154A29D;
.loc 2 413 10
fma.rn.f64 %fd83, %fd81, %fd40, %fd82;
mov.f64 %fd84, 0d3FF3BA5916E9FD7F;
.loc 2 413 10
fma.rn.f64 %fd85, %fd83, %fd40, %fd84;
mov.f64 %fd86, 0d4000000000000000;
.loc 2 413 10
fma.rn.f64 %fd26, %fd86, %fd7, %fd31;
// inline asm
cvt.rn.f32.f64 %f5,%fd26;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd27,%f6;
// inline asm
neg.f64 %fd87, %fd26;
fma.rn.f64 %fd88, %fd87, %fd27, %fd31;
fma.rn.f64 %fd89, %fd88, %fd88, %fd88;
fma.rn.f64 %fd90, %fd89, %fd27, %fd27;
mul.f64 %fd91, %fd85, %fd90;
mul.f64 %fd92, %fd91, 0dC000000000000000;
fma.rn.f64 %fd93, %fd7, %fd92, %fd85;
neg.f64 %fd94, %fd91;
add.rn.f64 %fd95, %fd93, %fd94;
fma.rn.f64 %fd8, %fd95, %fd90, %fd91;
neg.f64 %fd96, %fd7;
mul.f64 %fd9, %fd7, %fd96;
mul.f64 %fd97, %fd9, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd98, %fd97;
cvt.rzi.s32.f64 %r5, %fd98;
mov.f64 %fd99, 0dBFE62E42FEFA39EF;
.loc 2 413 10
fma.rn.f64 %fd100, %fd98, %fd99, %fd9;
mov.f64 %fd101, 0dBC7ABC9E3B39803F;
.loc 2 413 10
fma.rn.f64 %fd102, %fd98, %fd101, %fd100;
mov.f64 %fd103, 0d3E928A27E30F5561;
mov.f64 %fd104, 0d3E5AE6449C0686C0;
.loc 2 413 10
fma.rn.f64 %fd105, %fd104, %fd102, %fd103;
mov.f64 %fd106, 0d3EC71DE8E6486D6B;
.loc 2 413 10
fma.rn.f64 %fd107, %fd105, %fd102, %fd106;
mov.f64 %fd108, 0d3EFA019A6B2464C5;
.loc 2 413 10
fma.rn.f64 %fd109, %fd107, %fd102, %fd108;
mov.f64 %fd110, 0d3F2A01A0171064A5;
.loc 2 413 10
fma.rn.f64 %fd111, %fd109, %fd102, %fd110;
mov.f64 %fd112, 0d3F56C16C17F29C8D;
.loc 2 413 10
fma.rn.f64 %fd113, %fd111, %fd102, %fd112;
mov.f64 %fd114, 0d3F8111111111A24E;
.loc 2 413 10
fma.rn.f64 %fd115, %fd113, %fd102, %fd114;
mov.f64 %fd116, 0d3FA555555555211D;
.loc 2 413 10
fma.rn.f64 %fd117, %fd115, %fd102, %fd116;
mov.f64 %fd118, 0d3FC5555555555530;
.loc 2 413 10
fma.rn.f64 %fd119, %fd117, %fd102, %fd118;
mov.f64 %fd120, 0d3FE0000000000005;
.loc 2 413 10
fma.rn.f64 %fd121, %fd119, %fd102, %fd120;
fma.rn.f64 %fd122, %fd121, %fd102, %fd31;
fma.rn.f64 %fd144, %fd122, %fd102, %fd31;
abs.s32 %r32, %r5;
setp.lt.s32 %p5, %r32, 1023;
@%p5 bra BB31_8;
add.s32 %r33, %r5, 2046;
shl.b32 %r34, %r33, 19;
and.b32 %r35, %r34, -1048576;
shl.b32 %r36, %r33, 20;
sub.s32 %r50, %r36, %r35;
mov.u32 %r37, 0;
.loc 2 413 10
mov.b64 %fd123, {%r37, %r35};
mul.f64 %fd144, %fd144, %fd123;
bra.uni BB31_9;
BB31_8:
.loc 2 413 10
shl.b32 %r38, %r5, 20;
add.s32 %r50, %r38, 1072693248;
BB31_9:
mov.f64 %fd140, 0d4000000000000000;
.loc 2 413 10
neg.f64 %fd139, %fd7;
mov.u32 %r39, 0;
.loc 2 413 10
mov.b64 %fd124, {%r39, %r50};
mul.f64 %fd125, %fd144, %fd124;
neg.f64 %fd127, %fd9;
fma.rn.f64 %fd128, %fd139, %fd7, %fd127;
fma.rn.f64 %fd129, %fd125, %fd128, %fd125;
mul.f64 %fd130, %fd8, %fd129;
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd7;
}
setp.gt.s32 %p6, %r40, 1077624832;
selp.f64 %fd131, 0d0000000000000000, %fd130, %p6;
{
.reg .b32 %temp;
mov.b64 {%temp, %r41}, %fd6;
}
setp.lt.s32 %p7, %r41, 0;
sub.f64 %fd133, %fd140, %fd131;
selp.f64 %fd145, %fd133, %fd131, %p7;
bra.uni BB31_11;
BB31_10:
.loc 2 413 10
add.f64 %fd145, %fd6, %fd6;
BB31_11:
setp.geu.f64 %p8, %fd143, 0dBFF0000000000000;
@%p8 bra BB31_13;
mov.f64 %fd142, 0dBFE6A09E667F3BCD;
.loc 2 413 10
mul.rn.f64 %fd141, %fd143, %fd142;
sub.f64 %fd134, %fd141, %fd6;
add.rn.f64 %fd135, %fd134, %fd5;
mul.f64 %fd136, %fd6, 0dC000000000000000;
mul.f64 %fd137, %fd136, %fd145;
fma.rn.f64 %fd145, %fd137, %fd135, %fd145;
BB31_13:
.loc 1 48 1
mov.u32 %r47, %ntid.y;
.loc 1 48 42
mad.lo.s32 %r42, %r48, %r13, %r49;
mul.wide.s32 %rd7, %r42, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 2 413 10
mul.f64 %fd138, %fd145, 0d3FE0000000000000;
.loc 1 48 42
st.global.f64 [%rd8], %fd138;
.loc 1 48 22
mov.u32 %r44, %nctaid.y;
mad.lo.s32 %r49, %r44, %r47, %r49;
.loc 1 48 1
setp.lt.s32 %p9, %r49, %r11;
@%p9 bra BB31_3;
BB31_14:
.loc 1 48 22
mov.u32 %r45, %nctaid.x;
mad.lo.s32 %r48, %r45, %r15, %r48;
.loc 1 48 1
setp.lt.s32 %p10, %r48, %r12;
@%p10 bra BB31_2;
BB31_15:
.loc 1 48 2
ret;
}
.visible .entry map_normcdfinv_double(
.param .u32 map_normcdfinv_double_param_0,
.param .u32 map_normcdfinv_double_param_1,
.param .u64 map_normcdfinv_double_param_2,
.param .u32 map_normcdfinv_double_param_3,
.param .u64 map_normcdfinv_double_param_4,
.param .u32 map_normcdfinv_double_param_5
)
{
.reg .pred %p<19>;
.reg .f32 %f<17>;
.reg .s32 %r<69>;
.reg .s64 %rd<9>;
.reg .f64 %fd<265>;
ld.param.u32 %r20, [map_normcdfinv_double_param_0];
ld.param.u32 %r21, [map_normcdfinv_double_param_1];
ld.param.u64 %rd2, [map_normcdfinv_double_param_2];
ld.param.u32 %r22, [map_normcdfinv_double_param_3];
ld.param.u64 %rd3, [map_normcdfinv_double_param_4];
ld.param.u32 %r23, [map_normcdfinv_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 1 49 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r63, %r24, %r25, %r26;
.loc 1 49 1
setp.ge.s32 %p1, %r63, %r21;
@%p1 bra BB32_23;
.loc 1 49 1
mov.u32 %r27, %ntid.y;
.loc 1 49 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
cvta.to.global.u64 %rd6, %rd2;
BB32_2:
.loc 1 49 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r64, %r27, %r29, %r31;
.loc 1 49 1
setp.ge.s32 %p2, %r64, %r20;
@%p2 bra BB32_22;
BB32_3:
.loc 1 49 1
mul.lo.s32 %r62, %r63, %r23;
add.s32 %r36, %r64, %r62;
mul.wide.s32 %rd4, %r36, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 1 49 1
ld.global.f64 %fd18, [%rd5];
.loc 2 394 10
add.f64 %fd1, %fd18, %fd18;
neg.f64 %fd2, %fd1;
mov.f64 %fd19, 0d4000000000000000;
.loc 2 394 10
add.rn.f64 %fd3, %fd19, %fd2;
setp.le.f64 %p3, %fd1, 0d3FFFFC0B65AA4E0E;
setp.ge.f64 %p4, %fd1, 0d3F4FA4D2AD8F904D;
and.pred %p5, %p4, %p3;
@%p5 bra BB32_20;
setp.gt.f64 %p6, %fd1, 0d3FF0000000000000;
selp.f64 %fd4, %fd3, %fd1, %p6;
setp.ltu.f64 %p7, %fd4, 0d2B2BFF2EE48E0530;
{
.reg .b32 %temp;
mov.b64 {%temp, %r65}, %fd4;
}
{
.reg .b32 %temp;
mov.b64 {%r66, %temp}, %fd4;
}
@%p7 bra BB32_6;
shr.u32 %r37, %r65, 20;
and.b32 %r38, %r37, 2046;
add.s32 %r39, %r38, -1022;
cvt.rn.f64.s32 %fd24, %r39;
and.b32 %r40, %r65, -2145386497;
add.s32 %r41, %r40, 1071644672;
mov.b64 %fd25, {%r66, %r41};
add.f64 %fd26, %fd25, 0dBFF0000000000000;
add.f64 %fd20, %fd25, 0d3FF0000000000000;
mov.f64 %fd27, 0d3FF0000000000000;
.loc 2 394 10
// inline asm
cvt.rn.f32.f64 %f1,%fd20;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd21,%f2;
// inline asm
neg.f64 %fd28, %fd20;
fma.rn.f64 %fd29, %fd28, %fd21, %fd27;
fma.rn.f64 %fd30, %fd29, %fd29, %fd29;
fma.rn.f64 %fd31, %fd30, %fd21, %fd21;
mul.f64 %fd32, %fd26, %fd31;
mov.f64 %fd33, 0dC000000000000000;
.loc 2 394 10
fma.rn.f64 %fd34, %fd33, %fd32, %fd26;
neg.f64 %fd35, %fd32;
fma.rn.f64 %fd36, %fd35, %fd26, %fd34;
fma.rn.f64 %fd37, %fd36, %fd31, %fd32;
mul.f64 %fd38, %fd37, %fd37;
mov.f64 %fd39, 0d3FA55CF59CDC5D89;
mov.f64 %fd40, 0d3FB5C5C218C775C9;
.loc 2 394 10
fma.rn.f64 %fd41, %fd40, %fd38, %fd39;
mov.f64 %fd42, 0d3FAEFD18CF6EBB9C;
.loc 2 394 10
fma.rn.f64 %fd43, %fd41, %fd38, %fd42;
mov.f64 %fd44, 0d3FB10682EDCB8D1B;
.loc 2 394 10
fma.rn.f64 %fd45, %fd43, %fd38, %fd44;
mov.f64 %fd46, 0d3FB3B1DD3AC7FC96;
.loc 2 394 10
fma.rn.f64 %fd47, %fd45, %fd38, %fd46;
mov.f64 %fd48, 0d3FB745CB459B54A6;
.loc 2 394 10
fma.rn.f64 %fd49, %fd47, %fd38, %fd48;
mov.f64 %fd50, 0d3FBC71C741A0669F;
.loc 2 394 10
fma.rn.f64 %fd51, %fd49, %fd38, %fd50;
mov.f64 %fd52, 0d3FC249249209112E;
.loc 2 394 10
fma.rn.f64 %fd53, %fd51, %fd38, %fd52;
mov.f64 %fd54, 0d3FC99999999A06C1;
.loc 2 394 10
fma.rn.f64 %fd55, %fd53, %fd38, %fd54;
mov.f64 %fd56, 0d3FD5555555555535;
.loc 2 394 10
fma.rn.f64 %fd57, %fd55, %fd38, %fd56;
mul.f64 %fd58, %fd57, %fd38;
fma.rn.f64 %fd59, %fd58, %fd37, %fd37;
add.f64 %fd60, %fd59, %fd59;
mov.f64 %fd61, 0d3FE62E42FEFA39EF;
.loc 2 394 10
fma.rn.f64 %fd62, %fd24, %fd61, %fd60;
neg.f64 %fd22, %fd62;
// inline asm
cvt.rn.f32.f64 %f5, %fd22;
// inline asm
// inline asm
rsqrt.approx.ftz.f32 %f6, %f5;
// inline asm
// inline asm
cvt.f64.f32 %fd23, %f6;
// inline asm
mul.rn.f64 %fd63, %fd23, %fd23;
neg.f64 %fd64, %fd63;
fma.rn.f64 %fd65, %fd22, %fd64, %fd27;
mov.f64 %fd66, 0d3FE0000000000000;
mov.f64 %fd67, 0d3FD8000000000000;
.loc 2 394 10
fma.rn.f64 %fd68, %fd67, %fd65, %fd66;
mul.rn.f64 %fd69, %fd65, %fd23;
fma.rn.f64 %fd70, %fd68, %fd69, %fd23;
mov.f64 %fd71, 0d4000A0E7333839AA;
mov.f64 %fd72, 0d3FEBE9222591AFAB;
.loc 2 394 10
fma.rn.f64 %fd73, %fd72, %fd70, %fd71;
mov.f64 %fd74, 0d4008768CF7E57D5C;
.loc 2 394 10
fma.rn.f64 %fd75, %fd73, %fd70, %fd74;
mov.f64 %fd76, 0d400B77E7E28DA583;
.loc 2 394 10
fma.rn.f64 %fd77, %fd75, %fd70, %fd76;
mov.f64 %fd78, 0d3FF34F26A4F99CF9;
.loc 2 394 10
fma.rn.f64 %fd79, %fd77, %fd70, %fd78;
mov.f64 %fd80, 0d3FC1F674ADB019ED;
.loc 2 394 10
fma.rn.f64 %fd81, %fd79, %fd70, %fd80;
mov.f64 %fd82, 0d3F75DDAE9506431D;
.loc 2 394 10
fma.rn.f64 %fd83, %fd81, %fd70, %fd82;
mov.f64 %fd84, 0d3F0ADA49AA32489C;
.loc 2 394 10
fma.rn.f64 %fd85, %fd83, %fd70, %fd84;
add.f64 %fd86, %fd70, 0d4001E90FF51C2197;
mov.f64 %fd87, 0d40111EA3A7CF3820;
.loc 2 394 10
fma.rn.f64 %fd88, %fd86, %fd70, %fd87;
mov.f64 %fd89, 0d4011A0E4A4749594;
.loc 2 394 10
fma.rn.f64 %fd90, %fd88, %fd70, %fd89;
mov.f64 %fd91, 0d400D4E977D38C14D;
.loc 2 394 10
fma.rn.f64 %fd92, %fd90, %fd70, %fd91;
mov.f64 %fd93, 0d3FF37FD567EC0D5F;
.loc 2 394 10
fma.rn.f64 %fd94, %fd92, %fd70, %fd93;
mov.f64 %fd95, 0d3FC1FB9D7F676033;
.loc 2 394 10
fma.rn.f64 %fd96, %fd94, %fd70, %fd95;
mov.f64 %fd97, 0d3F75DDCDF98946E4;
.loc 2 394 10
fma.rn.f64 %fd98, %fd96, %fd70, %fd97;
mov.f64 %fd99, 0d3F0ADA42D79D8DBB;
.loc 2 394 10
fma.rn.f64 %fd100, %fd98, %fd70, %fd99;
mul.f64 %fd101, %fd100, %fd70;
div.rn.f64 %fd263, %fd85, %fd101;
bra.uni BB32_19;
BB32_6:
.loc 2 394 10
setp.gt.f64 %p8, %fd4, 0d0000000000000000;
setp.lt.f64 %p9, %fd4, 0d7FF0000000000000;
and.pred %p10, %p8, %p9;
@%p10 bra BB32_12;
abs.f64 %fd102, %fd4;
setp.gtu.f64 %p11, %fd102, 0d7FF0000000000000;
@%p11 bra BB32_11;
setp.neu.f64 %p12, %fd4, 0d0000000000000000;
@%p12 bra BB32_10;
mov.f64 %fd262, 0dFFF0000000000000;
bra.uni BB32_18;
BB32_10:
.loc 2 394 10
setp.eq.f64 %p13, %fd4, 0d7FF0000000000000;
selp.f64 %fd262, %fd4, 0dFFF8000000000000, %p13;
bra.uni BB32_18;
BB32_11:
.loc 2 394 10
add.f64 %fd262, %fd4, %fd4;
bra.uni BB32_18;
BB32_12:
.loc 2 394 10
setp.lt.u32 %p14, %r65, 1048576;
@%p14 bra BB32_14;
mov.u32 %r67, -1023;
bra.uni BB32_15;
BB32_14:
.loc 2 394 10
mul.f64 %fd104, %fd4, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r65}, %fd104;
}
{
.reg .b32 %temp;
mov.b64 {%r66, %temp}, %fd104;
}
mov.u32 %r67, -1077;
BB32_15:
.loc 2 394 10
shr.s32 %r44, %r65, 20;
add.s32 %r68, %r67, %r44;
and.b32 %r45, %r65, -2146435073;
or.b32 %r46, %r45, 1072693248;
mov.b64 %fd261, {%r66, %r46};
setp.lt.u32 %p15, %r46, 1073127583;
@%p15 bra BB32_17;
{
.reg .b32 %temp;
mov.b64 {%r47, %temp}, %fd261;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r48}, %fd261;
}
add.s32 %r49, %r48, -1048576;
mov.b64 %fd261, {%r47, %r49};
add.s32 %r68, %r68, 1;
BB32_17:
add.f64 %fd105, %fd261, 0d3FF0000000000000;
mov.f64 %fd107, 0d3FF0000000000000;
.loc 2 394 10
// inline asm
cvt.rn.f32.f64 %f9,%fd105;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd106,%f10;
// inline asm
neg.f64 %fd108, %fd105;
fma.rn.f64 %fd109, %fd108, %fd106, %fd107;
fma.rn.f64 %fd110, %fd109, %fd109, %fd109;
fma.rn.f64 %fd111, %fd110, %fd106, %fd106;
add.f64 %fd112, %fd261, 0dBFF0000000000000;
mul.f64 %fd113, %fd112, %fd111;
fma.rn.f64 %fd114, %fd112, %fd111, %fd113;
mul.f64 %fd115, %fd114, %fd114;
mov.f64 %fd116, 0d3ED0EE258B7A8B04;
mov.f64 %fd117, 0d3EB1380B3AE80F1E;
.loc 2 394 10
fma.rn.f64 %fd118, %fd117, %fd115, %fd116;
mov.f64 %fd119, 0d3EF3B2669F02676F;
.loc 2 394 10
fma.rn.f64 %fd120, %fd118, %fd115, %fd119;
mov.f64 %fd121, 0d3F1745CBA9AB0956;
.loc 2 394 10
fma.rn.f64 %fd122, %fd120, %fd115, %fd121;
mov.f64 %fd123, 0d3F3C71C72D1B5154;
.loc 2 394 10
fma.rn.f64 %fd124, %fd122, %fd115, %fd123;
mov.f64 %fd125, 0d3F624924923BE72D;
.loc 2 394 10
fma.rn.f64 %fd126, %fd124, %fd115, %fd125;
mov.f64 %fd127, 0d3F8999999999A3C4;
.loc 2 394 10
fma.rn.f64 %fd128, %fd126, %fd115, %fd127;
mov.f64 %fd129, 0d3FB5555555555554;
.loc 2 394 10
fma.rn.f64 %fd130, %fd128, %fd115, %fd129;
sub.f64 %fd131, %fd112, %fd114;
add.f64 %fd132, %fd131, %fd131;
neg.f64 %fd133, %fd114;
fma.rn.f64 %fd134, %fd133, %fd112, %fd132;
mul.f64 %fd135, %fd111, %fd134;
mul.f64 %fd136, %fd130, %fd115;
fma.rn.f64 %fd137, %fd136, %fd114, %fd135;
cvt.rn.f64.s32 %fd138, %r68;
mov.f64 %fd139, 0d3FE62E42FEFA39EF;
.loc 2 394 10
fma.rn.f64 %fd140, %fd138, %fd139, %fd114;
neg.s32 %r50, %r68;
cvt.rn.f64.s32 %fd141, %r50;
fma.rn.f64 %fd142, %fd141, %fd139, %fd140;
sub.f64 %fd143, %fd142, %fd114;
sub.f64 %fd144, %fd137, %fd143;
mov.f64 %fd145, 0d3C7ABC9E3B39803F;
.loc 2 394 10
fma.rn.f64 %fd146, %fd138, %fd145, %fd144;
add.f64 %fd262, %fd140, %fd146;
BB32_18:
neg.f64 %fd147, %fd262;
rsqrt.approx.f64 %fd148, %fd147;
mov.f64 %fd149, 0d3FFA2013964E259C;
mov.f64 %fd150, 0d3FE8E2101C71B0BF;
.loc 2 394 10
fma.rn.f64 %fd151, %fd150, %fd148, %fd149;
mov.f64 %fd152, 0d3FDABFE90921BE68;
.loc 2 394 10
fma.rn.f64 %fd153, %fd151, %fd148, %fd152;
mov.f64 %fd154, 0d3F97E41314DE00D4;
.loc 2 394 10
fma.rn.f64 %fd155, %fd153, %fd148, %fd154;
mov.f64 %fd156, 0d3F311BD487102E94;
.loc 2 394 10
fma.rn.f64 %fd157, %fd155, %fd148, %fd156;
add.f64 %fd158, %fd148, 0d3FF59895C30BAA54;
mov.f64 %fd159, 0d3FFAE8E5956A143F;
.loc 2 394 10
fma.rn.f64 %fd160, %fd158, %fd148, %fd159;
mov.f64 %fd161, 0d3FDACCE85FF7383D;
.loc 2 394 10
fma.rn.f64 %fd162, %fd160, %fd148, %fd161;
mov.f64 %fd163, 0d3F97E43B6CAC34FE;
.loc 2 394 10
fma.rn.f64 %fd164, %fd162, %fd148, %fd163;
mov.f64 %fd165, 0d3F311BD08289EB12;
.loc 2 394 10
fma.rn.f64 %fd166, %fd164, %fd148, %fd165;
mul.f64 %fd167, %fd166, %fd148;
div.rn.f64 %fd263, %fd157, %fd167;
BB32_19:
neg.f64 %fd168, %fd263;
selp.f64 %fd264, %fd168, %fd263, %p6;
bra.uni BB32_21;
BB32_20:
.loc 2 394 10
mul.rn.f64 %fd171, %fd3, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd171;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd171;
}
shr.u32 %r53, %r51, 20;
and.b32 %r54, %r53, 2046;
add.s32 %r55, %r54, -1022;
cvt.rn.f64.s32 %fd172, %r55;
and.b32 %r56, %r51, -2145386497;
add.s32 %r57, %r56, 1071644672;
mov.b64 %fd173, {%r52, %r57};
add.f64 %fd174, %fd173, 0dBFF0000000000000;
add.f64 %fd169, %fd173, 0d3FF0000000000000;
mov.f64 %fd175, 0d3FF0000000000000;
.loc 2 394 10
// inline asm
cvt.rn.f32.f64 %f13,%fd169;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f14,%f13;
// inline asm
// inline asm
cvt.f64.f32 %fd170,%f14;
// inline asm
neg.f64 %fd176, %fd169;
fma.rn.f64 %fd177, %fd176, %fd170, %fd175;
fma.rn.f64 %fd178, %fd177, %fd177, %fd177;
fma.rn.f64 %fd179, %fd178, %fd170, %fd170;
mul.f64 %fd180, %fd174, %fd179;
mov.f64 %fd181, 0dC000000000000000;
.loc 2 394 10
fma.rn.f64 %fd182, %fd181, %fd180, %fd174;
neg.f64 %fd183, %fd180;
fma.rn.f64 %fd184, %fd183, %fd174, %fd182;
fma.rn.f64 %fd185, %fd184, %fd179, %fd180;
mul.f64 %fd186, %fd185, %fd185;
mov.f64 %fd187, 0d3FA55CF59CDC5D89;
mov.f64 %fd188, 0d3FB5C5C218C775C9;
.loc 2 394 10
fma.rn.f64 %fd189, %fd188, %fd186, %fd187;
mov.f64 %fd190, 0d3FAEFD18CF6EBB9C;
.loc 2 394 10
fma.rn.f64 %fd191, %fd189, %fd186, %fd190;
mov.f64 %fd192, 0d3FB10682EDCB8D1B;
.loc 2 394 10
fma.rn.f64 %fd193, %fd191, %fd186, %fd192;
mov.f64 %fd194, 0d3FB3B1DD3AC7FC96;
.loc 2 394 10
fma.rn.f64 %fd195, %fd193, %fd186, %fd194;
mov.f64 %fd196, 0d3FB745CB459B54A6;
.loc 2 394 10
fma.rn.f64 %fd197, %fd195, %fd186, %fd196;
mov.f64 %fd198, 0d3FBC71C741A0669F;
.loc 2 394 10
fma.rn.f64 %fd199, %fd197, %fd186, %fd198;
mov.f64 %fd200, 0d3FC249249209112E;
.loc 2 394 10
fma.rn.f64 %fd201, %fd199, %fd186, %fd200;
mov.f64 %fd202, 0d3FC99999999A06C1;
.loc 2 394 10
fma.rn.f64 %fd203, %fd201, %fd186, %fd202;
mov.f64 %fd204, 0d3FD5555555555535;
.loc 2 394 10
fma.rn.f64 %fd205, %fd203, %fd186, %fd204;
mul.f64 %fd206, %fd205, %fd186;
fma.rn.f64 %fd207, %fd206, %fd185, %fd185;
add.f64 %fd208, %fd207, %fd207;
mov.f64 %fd209, 0d3FE62E42FEFA39EF;
.loc 2 394 10
fma.rn.f64 %fd210, %fd172, %fd209, %fd208;
mov.f64 %fd211, 0dC009000000000000;
.loc 2 394 10
sub.f64 %fd212, %fd211, %fd210;
mov.f64 %fd213, 0dBC08DDF93324D327;
mov.f64 %fd214, 0dBBB135D2E746E627;
.loc 2 394 10
fma.rn.f64 %fd215, %fd214, %fd212, %fd213;
mov.f64 %fd216, 0d3C37B83EEF0B7C9F;
.loc 2 394 10
fma.rn.f64 %fd217, %fd215, %fd212, %fd216;
mov.f64 %fd218, 0d3C69BA72CD589B91;
.loc 2 394 10
fma.rn.f64 %fd219, %fd217, %fd212, %fd218;
mov.f64 %fd220, 0dBCA33689090A6B96;
.loc 2 394 10
fma.rn.f64 %fd221, %fd219, %fd212, %fd220;
mov.f64 %fd222, 0d3C782E11898132E0;
.loc 2 394 10
fma.rn.f64 %fd223, %fd221, %fd212, %fd222;
mov.f64 %fd224, 0d3CFDE4ACFD9E26BA;
.loc 2 394 10
fma.rn.f64 %fd225, %fd223, %fd212, %fd224;
mov.f64 %fd226, 0dBD26D33EED66C487;
.loc 2 394 10
fma.rn.f64 %fd227, %fd225, %fd212, %fd226;
mov.f64 %fd228, 0dBD36F2167040D8E2;
.loc 2 394 10
fma.rn.f64 %fd229, %fd227, %fd212, %fd228;
mov.f64 %fd230, 0d3D872A22C2D77E20;
.loc 2 394 10
fma.rn.f64 %fd231, %fd229, %fd212, %fd230;
mov.f64 %fd232, 0dBDAC8859C4E5C0AF;
.loc 2 394 10
fma.rn.f64 %fd233, %fd231, %fd212, %fd232;
mov.f64 %fd234, 0dBDCDC583D118A561;
.loc 2 394 10
fma.rn.f64 %fd235, %fd233, %fd212, %fd234;
mov.f64 %fd236, 0d3E120F47CCF46B3C;
.loc 2 394 10
fma.rn.f64 %fd237, %fd235, %fd212, %fd236;
mov.f64 %fd238, 0dBE31A9E38DC84D60;
.loc 2 394 10
fma.rn.f64 %fd239, %fd237, %fd212, %fd238;
mov.f64 %fd240, 0dBE5F36CD6D3D46A9;
.loc 2 394 10
fma.rn.f64 %fd241, %fd239, %fd212, %fd240;
mov.f64 %fd242, 0d3E9C6B4F5D03B787;
.loc 2 394 10
fma.rn.f64 %fd243, %fd241, %fd212, %fd242;
mov.f64 %fd244, 0dBEB6E8A5434AE8A2;
.loc 2 394 10
fma.rn.f64 %fd245, %fd243, %fd212, %fd244;
mov.f64 %fd246, 0dBEED1D1F7B8736F6;
.loc 2 394 10
fma.rn.f64 %fd247, %fd245, %fd212, %fd246;
mov.f64 %fd248, 0d3F2879C2A212F024;
.loc 2 394 10
fma.rn.f64 %fd249, %fd247, %fd212, %fd248;
mov.f64 %fd250, 0dBF4845769484FCA8;
.loc 2 394 10
fma.rn.f64 %fd251, %fd249, %fd212, %fd250;
mov.f64 %fd252, 0dBF78B6C33114F909;
.loc 2 394 10
fma.rn.f64 %fd253, %fd251, %fd212, %fd252;
mov.f64 %fd254, 0d3FCEBD80D9B13E28;
.loc 2 394 10
fma.rn.f64 %fd255, %fd253, %fd212, %fd254;
mov.f64 %fd256, 0d3FFA755E7C99AE86;
.loc 2 394 10
fma.rn.f64 %fd257, %fd255, %fd212, %fd256;
fma.rn.f64 %fd264, %fd257, %fd2, %fd257;
BB32_21:
.loc 1 49 42
mul.lo.s32 %r61, %r63, %r22;
.loc 2 394 10
mul.f64 %fd258, %fd264, 0dBCA21165F626CDD5;
mov.f64 %fd259, 0dBFF6A09E667F3BCC;
.loc 2 394 10
fma.rn.f64 %fd260, %fd259, %fd264, %fd258;
.loc 1 49 42
add.s32 %r58, %r64, %r61;
mul.wide.s32 %rd7, %r58, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 1 49 42
st.global.f64 [%rd8], %fd260;
.loc 1 49 22
add.s32 %r64, %r2, %r64;
.loc 1 49 1
setp.lt.s32 %p17, %r64, %r20;
@%p17 bra BB32_3;
BB32_22:
.loc 1 49 22
mov.u32 %r59, %nctaid.x;
mad.lo.s32 %r63, %r59, %r24, %r63;
.loc 1 49 1
setp.lt.s32 %p18, %r63, %r21;
@%p18 bra BB32_2;
BB32_23:
.loc 1 49 2
ret;
}
.visible .entry map_rcbrt_double(
.param .u32 map_rcbrt_double_param_0,
.param .u32 map_rcbrt_double_param_1,
.param .u64 map_rcbrt_double_param_2,
.param .u32 map_rcbrt_double_param_3,
.param .u64 map_rcbrt_double_param_4,
.param .u32 map_rcbrt_double_param_5
)
{
.reg .pred %p<12>;
.reg .f32 %f<7>;
.reg .s32 %r<59>;
.reg .s64 %rd<9>;
.reg .f64 %fd<27>;
ld.param.u32 %r20, [map_rcbrt_double_param_0];
ld.param.u32 %r21, [map_rcbrt_double_param_1];
ld.param.u64 %rd3, [map_rcbrt_double_param_2];
ld.param.u32 %r22, [map_rcbrt_double_param_3];
ld.param.u64 %rd4, [map_rcbrt_double_param_4];
ld.param.u32 %r23, [map_rcbrt_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 50 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r53, %r24, %r25, %r26;
.loc 1 50 1
setp.ge.s32 %p1, %r53, %r21;
@%p1 bra BB33_12;
.loc 1 50 1
mov.u32 %r27, %tid.y;
mov.u32 %r28, %ntid.y;
mov.u32 %r29, %ctaid.y;
mad.lo.s32 %r2, %r28, %r29, %r27;
.loc 1 50 22
mov.u32 %r30, %nctaid.y;
mul.lo.s32 %r3, %r30, %r28;
BB33_2:
.loc 1 50 1
setp.ge.s32 %p2, %r2, %r20;
@%p2 bra BB33_11;
.loc 1 50 1
mul.lo.s32 %r5, %r53, %r23;
.loc 1 50 42
mul.lo.s32 %r6, %r53, %r22;
mov.u32 %r54, %r2;
BB33_4:
.loc 1 50 1
mov.u32 %r7, %r54;
add.s32 %r31, %r7, %r5;
mul.wide.s32 %rd5, %r31, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 50 1
ld.global.f64 %fd1, [%rd6];
.loc 2 323 10
setp.eq.f64 %p3, %fd1, 0d0000000000000000;
abs.f64 %fd2, %fd1;
setp.geu.f64 %p4, %fd2, 0d7FF0000000000000;
or.pred %p5, %p3, %p4;
@%p5 bra BB33_9;
{
.reg .b32 %temp;
mov.b64 {%r55, %temp}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r56}, %fd2;
}
shr.u32 %r32, %r56, 20;
and.b32 %r57, %r32, 2047;
setp.eq.s32 %p6, %r57, 0;
@%p6 bra BB33_7;
mov.u32 %r58, 0;
bra.uni BB33_8;
BB33_7:
.loc 2 323 10
mul.f64 %fd6, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%r55, %temp}, %fd6;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r56}, %fd6;
}
shr.u32 %r35, %r56, 20;
and.b32 %r57, %r35, 2047;
mov.u32 %r58, 18;
BB33_8:
.loc 2 323 10
add.s32 %r36, %r57, -1022;
cvt.rn.f32.s32 %f1, %r36;
mul.f32 %f2, %f1, 0f3EAAAAAB;
cvt.rni.s32.f32 %r37, %f2;
mad.lo.s32 %r38, %r37, -3145728, %r56;
mov.b64 %fd7, {%r55, %r38};
cvt.rn.f32.f64 %f3, %fd7;
lg2.approx.f32 %f4, %f3;
mul.f32 %f5, %f4, 0fBEAAAAAB;
ex2.approx.f32 %f6, %f5;
cvt.f64.f32 %fd8, %f6;
mul.f64 %fd9, %fd8, %fd8;
neg.f64 %fd10, %fd7;
mul.f64 %fd11, %fd8, %fd10;
mov.f64 %fd12, 0d3FF0000000000000;
.loc 2 323 10
fma.rn.f64 %fd13, %fd9, %fd11, %fd12;
mul.f64 %fd14, %fd8, 0d3FD5555555555555;
fma.rn.f64 %fd15, %fd13, %fd14, %fd8;
mul.f64 %fd16, %fd15, %fd15;
mul.f64 %fd17, %fd15, %fd10;
fma.rn.f64 %fd18, %fd16, %fd17, %fd12;
mul.f64 %fd19, %fd15, 0d3FD5555555555555;
fma.rn.f64 %fd20, %fd18, %fd19, %fd15;
{
.reg .b32 %temp;
mov.b64 {%r39, %temp}, %fd20;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd20;
}
sub.s32 %r41, %r58, %r37;
shl.b32 %r42, %r41, 20;
add.s32 %r43, %r40, %r42;
mov.b64 %fd21, {%r39, %r43};
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd1;
}
setp.lt.s32 %p7, %r44, 0;
neg.f64 %fd22, %fd21;
selp.f64 %fd26, %fd22, %fd21, %p7;
bra.uni BB33_10;
BB33_9:
.loc 2 323 10
selp.f64 %fd23, 0d7FF0000000000000, 0d0000000000000000, %p3;
add.f64 %fd24, %fd1, %fd1;
setp.gtu.f64 %p9, %fd2, 0d7FF0000000000000;
selp.f64 %fd25, %fd24, %fd23, %p9;
{
.reg .b32 %temp;
mov.b64 {%r45, %temp}, %fd25;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r46}, %fd25;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r47}, %fd1;
}
and.b32 %r48, %r47, -2147483648;
or.b32 %r49, %r46, %r48;
mov.b64 %fd26, {%r45, %r49};
BB33_10:
.loc 1 50 42
add.s32 %r50, %r7, %r6;
mul.wide.s32 %rd7, %r50, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 50 42
st.global.f64 [%rd8], %fd26;
.loc 1 50 22
add.s32 %r18, %r3, %r7;
.loc 1 50 1
setp.lt.s32 %p10, %r18, %r20;
mov.u32 %r54, %r18;
@%p10 bra BB33_4;
BB33_11:
.loc 1 50 22
mov.u32 %r51, %nctaid.x;
mad.lo.s32 %r53, %r51, %r24, %r53;
.loc 1 50 1
setp.lt.s32 %p11, %r53, %r21;
@%p11 bra BB33_2;
BB33_12:
.loc 1 50 2
ret;
}
.visible .entry map_rint_double(
.param .u32 map_rint_double_param_0,
.param .u32 map_rint_double_param_1,
.param .u64 map_rint_double_param_2,
.param .u32 map_rint_double_param_3,
.param .u64 map_rint_double_param_4,
.param .u32 map_rint_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_rint_double_param_0];
ld.param.u32 %r13, [map_rint_double_param_1];
ld.param.u64 %rd3, [map_rint_double_param_2];
ld.param.u32 %r14, [map_rint_double_param_3];
ld.param.u64 %rd4, [map_rint_double_param_4];
ld.param.u32 %r15, [map_rint_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 51 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 51 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB34_6;
.loc 1 51 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 51 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 51 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB34_2:
.loc 1 51 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB34_5;
.loc 1 51 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 51 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB34_4:
.loc 1 51 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 51 1
ld.global.f64 %fd1, [%rd6];
.loc 2 67 10
cvt.rni.f64.f64 %fd2, %fd1;
.loc 1 51 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 51 42
st.global.f64 [%rd8], %fd2;
.loc 1 51 22
add.s32 %r10, %r5, %r9;
.loc 1 51 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB34_4;
BB34_5:
.loc 1 51 22
add.s32 %r25, %r4, %r25;
.loc 1 51 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB34_2;
BB34_6:
.loc 1 51 2
ret;
}
.visible .entry map_round_double(
.param .u32 map_round_double_param_0,
.param .u32 map_round_double_param_1,
.param .u64 map_round_double_param_2,
.param .u32 map_round_double_param_3,
.param .u64 map_round_double_param_4,
.param .u32 map_round_double_param_5
)
{
.reg .pred %p<7>;
.reg .s32 %r<32>;
.reg .s64 %rd<9>;
.reg .f64 %fd<9>;
ld.param.u32 %r10, [map_round_double_param_0];
ld.param.u32 %r11, [map_round_double_param_1];
ld.param.u64 %rd3, [map_round_double_param_2];
ld.param.u32 %r12, [map_round_double_param_3];
ld.param.u64 %rd4, [map_round_double_param_4];
ld.param.u32 %r13, [map_round_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 52 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r30, %r14, %r15, %r16;
.loc 1 52 1
setp.ge.s32 %p1, %r30, %r11;
@%p1 bra BB35_8;
.loc 1 52 1
mov.u32 %r17, %tid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %ctaid.y;
mad.lo.s32 %r2, %r18, %r19, %r17;
.loc 1 52 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r3, %r20, %r18;
BB35_2:
.loc 1 52 1
setp.ge.s32 %p2, %r2, %r10;
@%p2 bra BB35_7;
.loc 1 52 1
mul.lo.s32 %r5, %r30, %r13;
.loc 1 52 42
mul.lo.s32 %r6, %r30, %r12;
mov.u32 %r31, %r2;
BB35_4:
.loc 1 52 1
mov.u32 %r7, %r31;
add.s32 %r21, %r7, %r5;
mul.wide.s32 %rd5, %r21, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 52 1
ld.global.f64 %fd8, [%rd6];
.loc 2 483 10
abs.f64 %fd2, %fd8;
setp.ge.f64 %p3, %fd2, 0d4330000000000000;
@%p3 bra BB35_6;
add.f64 %fd5, %fd2, 0d3FE0000000000000;
cvt.rzi.f64.f64 %fd6, %fd5;
setp.lt.f64 %p4, %fd2, 0d3FE0000000000000;
selp.f64 %fd7, 0d0000000000000000, %fd6, %p4;
{
.reg .b32 %temp;
mov.b64 {%r22, %temp}, %fd7;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r23}, %fd7;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r24}, %fd8;
}
and.b32 %r25, %r24, -2147483648;
or.b32 %r26, %r23, %r25;
mov.b64 %fd8, {%r22, %r26};
BB35_6:
.loc 1 52 42
add.s32 %r27, %r7, %r6;
mul.wide.s32 %rd7, %r27, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 52 42
st.global.f64 [%rd8], %fd8;
.loc 1 52 22
add.s32 %r8, %r3, %r7;
.loc 1 52 1
setp.lt.s32 %p5, %r8, %r10;
mov.u32 %r31, %r8;
@%p5 bra BB35_4;
BB35_7:
.loc 1 52 22
mov.u32 %r28, %nctaid.x;
mad.lo.s32 %r30, %r28, %r14, %r30;
.loc 1 52 1
setp.lt.s32 %p6, %r30, %r11;
@%p6 bra BB35_2;
BB35_8:
.loc 1 52 2
ret;
}
.visible .entry map_rsqrt_double(
.param .u32 map_rsqrt_double_param_0,
.param .u32 map_rsqrt_double_param_1,
.param .u64 map_rsqrt_double_param_2,
.param .u32 map_rsqrt_double_param_3,
.param .u64 map_rsqrt_double_param_4,
.param .u32 map_rsqrt_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_rsqrt_double_param_0];
ld.param.u32 %r13, [map_rsqrt_double_param_1];
ld.param.u64 %rd3, [map_rsqrt_double_param_2];
ld.param.u32 %r14, [map_rsqrt_double_param_3];
ld.param.u64 %rd4, [map_rsqrt_double_param_4];
ld.param.u32 %r15, [map_rsqrt_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 53 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 53 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB36_6;
.loc 1 53 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 53 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 53 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB36_2:
.loc 1 53 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB36_5;
.loc 1 53 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 53 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB36_4:
.loc 1 53 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 53 1
ld.global.f64 %fd1, [%rd6];
.loc 3 2790 10
rsqrt.approx.f64 %fd2, %fd1;
.loc 1 53 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 53 42
st.global.f64 [%rd8], %fd2;
.loc 1 53 22
add.s32 %r10, %r5, %r9;
.loc 1 53 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB36_4;
BB36_5:
.loc 1 53 22
add.s32 %r25, %r4, %r25;
.loc 1 53 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB36_2;
BB36_6:
.loc 1 53 2
ret;
}
.visible .entry map_sin_double(
.param .u32 map_sin_double_param_0,
.param .u32 map_sin_double_param_1,
.param .u64 map_sin_double_param_2,
.param .u32 map_sin_double_param_3,
.param .u64 map_sin_double_param_4,
.param .u32 map_sin_double_param_5
)
{
.local .align 4 .b8 __local_depot37[4];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<10>;
.reg .s32 %r<40>;
.reg .s64 %rd<16>;
.reg .f64 %fd<43>;
mov.u64 %SPL, __local_depot37;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r13, [map_sin_double_param_0];
ld.param.u32 %r14, [map_sin_double_param_1];
ld.param.u64 %rd1, [map_sin_double_param_2];
ld.param.u32 %r15, [map_sin_double_param_3];
ld.param.u64 %rd2, [map_sin_double_param_4];
ld.param.u32 %r16, [map_sin_double_param_5];
.loc 1 54 1
mov.u32 %r17, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r37, %r17, %r18, %r19;
.loc 1 54 1
setp.ge.s32 %p1, %r37, %r14;
@%p1 bra BB37_14;
.loc 1 54 1
mov.u32 %r20, %ntid.y;
.loc 1 54 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r2, %r21, %r20;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd13, %rd1;
BB37_2:
.loc 1 54 1
mov.u32 %r22, %ctaid.y;
mov.u32 %r24, %tid.y;
mad.lo.s32 %r38, %r20, %r22, %r24;
.loc 1 54 1
setp.ge.s32 %p2, %r38, %r13;
@%p2 bra BB37_13;
.loc 1 54 1
mul.lo.s32 %r4, %r37, %r16;
.loc 1 54 42
mul.lo.s32 %r5, %r37, %r15;
BB37_4:
.loc 1 54 1
add.s32 %r29, %r38, %r4;
mul.wide.s32 %rd4, %r29, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd40, [%rd5];
.loc 2 198 10
abs.f64 %fd14, %fd40;
setp.neu.f64 %p3, %fd14, 0d7FF0000000000000;
@%p3 bra BB37_6;
mov.f64 %fd15, 0d0000000000000000;
.loc 2 198 10
mul.rn.f64 %fd40, %fd40, %fd15;
BB37_6:
add.u64 %rd6, %SP, 0;
.loc 2 198 10
mul.f64 %fd16, %fd40, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r39, %fd16;
cvta.to.local.u64 %rd7, %rd6;
.loc 2 198 10
st.local.u32 [%rd7], %r39;
cvt.rn.f64.s32 %fd17, %r39;
neg.f64 %fd18, %fd17;
mov.f64 %fd19, 0d3FF921FB54442D18;
.loc 2 198 10
fma.rn.f64 %fd20, %fd18, %fd19, %fd40;
mov.f64 %fd21, 0d3C91A62633145C00;
.loc 2 198 10
fma.rn.f64 %fd22, %fd18, %fd21, %fd20;
mov.f64 %fd23, 0d397B839A252049C0;
.loc 2 198 10
fma.rn.f64 %fd41, %fd18, %fd23, %fd22;
abs.f64 %fd24, %fd40;
setp.leu.f64 %p4, %fd24, 0d41E0000000000000;
@%p4 bra BB37_8;
// Callseq Start 5
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd40;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 2 198 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd41, [retval0+0];
}
// Callseq End 5
ld.local.u32 %r39, [%rd7];
BB37_8:
shl.b32 %r30, %r39, 3;
and.b32 %r31, %r30, 8;
and.b32 %r32, %r39, 1;
setp.eq.b32 %p5, %r32, 1;
not.pred %p6, %p5;
selp.f64 %fd25, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p6;
mul.wide.u32 %rd10, %r31, 8;
mov.u64 %rd11, __cudart_sin_cos_coeffs;
add.s64 %rd12, %rd11, %rd10;
.loc 2 198 10
ld.const.f64 %fd26, [%rd12+8];
mul.rn.f64 %fd7, %fd41, %fd41;
fma.rn.f64 %fd27, %fd25, %fd7, %fd26;
ld.const.f64 %fd28, [%rd12+16];
fma.rn.f64 %fd29, %fd27, %fd7, %fd28;
ld.const.f64 %fd30, [%rd12+24];
fma.rn.f64 %fd31, %fd29, %fd7, %fd30;
ld.const.f64 %fd32, [%rd12+32];
fma.rn.f64 %fd33, %fd31, %fd7, %fd32;
ld.const.f64 %fd34, [%rd12+40];
fma.rn.f64 %fd35, %fd33, %fd7, %fd34;
ld.const.f64 %fd36, [%rd12+48];
fma.rn.f64 %fd8, %fd35, %fd7, %fd36;
fma.rn.f64 %fd42, %fd8, %fd41, %fd41;
@%p6 bra BB37_10;
mov.f64 %fd37, 0d3FF0000000000000;
.loc 2 198 10
fma.rn.f64 %fd42, %fd8, %fd7, %fd37;
BB37_10:
and.b32 %r33, %r39, 2;
setp.eq.s32 %p7, %r33, 0;
@%p7 bra BB37_12;
mov.f64 %fd38, 0d0000000000000000;
mov.f64 %fd39, 0dBFF0000000000000;
.loc 2 198 10
fma.rn.f64 %fd42, %fd42, %fd39, %fd38;
BB37_12:
.loc 1 54 42
add.s32 %r34, %r38, %r5;
mul.wide.s32 %rd14, %r34, 8;
add.s64 %rd15, %rd13, %rd14;
st.global.f64 [%rd15], %fd42;
.loc 1 54 22
add.s32 %r38, %r2, %r38;
.loc 1 54 1
setp.lt.s32 %p8, %r38, %r13;
@%p8 bra BB37_4;
BB37_13:
.loc 1 54 22
mov.u32 %r35, %nctaid.x;
mad.lo.s32 %r37, %r35, %r17, %r37;
.loc 1 54 1
setp.lt.s32 %p9, %r37, %r14;
@%p9 bra BB37_2;
BB37_14:
.loc 1 54 2
ret;
}
.visible .entry map_sinh_double(
.param .u32 map_sinh_double_param_0,
.param .u32 map_sinh_double_param_1,
.param .u64 map_sinh_double_param_2,
.param .u32 map_sinh_double_param_3,
.param .u64 map_sinh_double_param_4,
.param .u32 map_sinh_double_param_5
)
{
.reg .pred %p<10>;
.reg .s32 %r<45>;
.reg .s64 %rd<9>;
.reg .f64 %fd<65>;
ld.param.u32 %r11, [map_sinh_double_param_0];
ld.param.u32 %r12, [map_sinh_double_param_1];
ld.param.u64 %rd3, [map_sinh_double_param_2];
ld.param.u32 %r13, [map_sinh_double_param_3];
ld.param.u64 %rd4, [map_sinh_double_param_4];
ld.param.u32 %r14, [map_sinh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 55 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r43, %r15, %r16, %r17;
.loc 1 55 1
setp.ge.s32 %p1, %r43, %r12;
@%p1 bra BB38_9;
.loc 1 55 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r2, %r19, %r20, %r18;
.loc 1 55 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r3, %r21, %r19;
BB38_2:
.loc 1 55 1
setp.ge.s32 %p2, %r2, %r11;
@%p2 bra BB38_8;
.loc 1 55 1
mul.lo.s32 %r5, %r43, %r14;
.loc 1 55 42
mul.lo.s32 %r6, %r43, %r13;
mov.u32 %r44, %r2;
BB38_4:
.loc 1 55 1
mov.u32 %r7, %r44;
add.s32 %r22, %r7, %r5;
mul.wide.s32 %rd5, %r22, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 55 1
ld.global.f64 %fd5, [%rd6];
.loc 2 268 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd5;
}
and.b32 %r23, %r8, 2147483647;
{
.reg .b32 %temp;
mov.b64 {%r24, %temp}, %fd5;
}
mov.b64 %fd1, {%r24, %r23};
setp.lt.u32 %p3, %r23, 1072693248;
@%p3 bra BB38_6;
{
.reg .b32 %temp;
mov.b64 {%temp, %r25}, %fd1;
}
mul.f64 %fd6, %fd1, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd7, %fd6;
cvt.rzi.s32.f64 %r26, %fd7;
add.s32 %r27, %r26, -1;
mov.f64 %fd8, 0dBFE62E42FEFA39EF;
.loc 2 268 10
fma.rn.f64 %fd9, %fd7, %fd8, %fd1;
mov.f64 %fd10, 0dBC7ABC9E3B39803F;
.loc 2 268 10
fma.rn.f64 %fd11, %fd7, %fd10, %fd9;
shl.b32 %r28, %r25, 1;
setp.lt.u32 %p4, %r28, 2142496327;
selp.b32 %r29, 0, %r27, %p4;
mov.u32 %r30, 0;
.loc 2 268 10
selp.f64 %fd12, %fd1, %fd11, %p4;
mov.f64 %fd13, 0d3E5AF86D8EBD13CD;
mov.f64 %fd14, 0d3E21F4076ACD15B6;
.loc 2 268 10
fma.rn.f64 %fd15, %fd14, %fd12, %fd13;
mov.f64 %fd16, 0d3E927E5092BA033D;
.loc 2 268 10
fma.rn.f64 %fd17, %fd15, %fd12, %fd16;
mov.f64 %fd18, 0d3EC71DDE6C5F9DA1;
.loc 2 268 10
fma.rn.f64 %fd19, %fd17, %fd12, %fd18;
mov.f64 %fd20, 0d3EFA01A018D034E6;
.loc 2 268 10
fma.rn.f64 %fd21, %fd19, %fd12, %fd20;
mov.f64 %fd22, 0d3F2A01A01B3B6940;
.loc 2 268 10
fma.rn.f64 %fd23, %fd21, %fd12, %fd22;
mov.f64 %fd24, 0d3F56C16C16C1B5DD;
.loc 2 268 10
fma.rn.f64 %fd25, %fd23, %fd12, %fd24;
mov.f64 %fd26, 0d3F8111111110F74D;
.loc 2 268 10
fma.rn.f64 %fd27, %fd25, %fd12, %fd26;
mov.f64 %fd28, 0d3FA555555555554D;
.loc 2 268 10
fma.rn.f64 %fd29, %fd27, %fd12, %fd28;
mov.f64 %fd30, 0d3FC5555555555557;
.loc 2 268 10
fma.rn.f64 %fd31, %fd29, %fd12, %fd30;
mov.f64 %fd32, 0d3FE0000000000000;
.loc 2 268 10
fma.rn.f64 %fd33, %fd31, %fd12, %fd32;
mul.f64 %fd34, %fd33, %fd12;
fma.rn.f64 %fd35, %fd34, %fd12, %fd12;
setp.eq.s32 %p5, %r29, 1024;
selp.b32 %r31, -1, 0, %p5;
add.s32 %r32, %r31, %r29;
shl.b32 %r33, %r32, 20;
add.s32 %r34, %r33, 1072693248;
mov.b64 %fd36, {%r30, %r34};
mov.u32 %r35, 1071644672;
.loc 2 268 10
mov.b64 %fd37, {%r30, %r35};
sub.f64 %fd38, %fd36, %fd37;
fma.rn.f64 %fd39, %fd35, %fd36, %fd38;
add.f64 %fd40, %fd39, %fd39;
selp.f64 %fd41, %fd40, %fd39, %p5;
setp.eq.s32 %p6, %r28, 0;
selp.f64 %fd42, %fd12, %fd41, %p6;
mov.f64 %fd43, 0d3FF0000000000000;
mov.f64 %fd44, 0d4000000000000000;
.loc 2 268 10
fma.rn.f64 %fd45, %fd44, %fd42, %fd43;
div.rn.f64 %fd46, %fd42, %fd45;
add.f64 %fd47, %fd42, %fd46;
setp.ltu.f64 %p7, %fd1, 0d408633CE8FB9F87E;
selp.f64 %fd64, %fd47, 0d7FF0000000000000, %p7;
bra.uni BB38_7;
BB38_6:
.loc 2 268 10
mul.f64 %fd48, %fd1, %fd1;
mov.f64 %fd49, 0d3DE611A561D87DEF;
mov.f64 %fd50, 0d3D6B4C75AB274C53;
.loc 2 268 10
fma.rn.f64 %fd51, %fd50, %fd48, %fd49;
mov.f64 %fd52, 0d3E5AE64671B18F5C;
.loc 2 268 10
fma.rn.f64 %fd53, %fd51, %fd48, %fd52;
mov.f64 %fd54, 0d3EC71DE3A465B1E4;
.loc 2 268 10
fma.rn.f64 %fd55, %fd53, %fd48, %fd54;
mov.f64 %fd56, 0d3F2A01A01A02899D;
.loc 2 268 10
fma.rn.f64 %fd57, %fd55, %fd48, %fd56;
mov.f64 %fd58, 0d3F811111111110A6;
.loc 2 268 10
fma.rn.f64 %fd59, %fd57, %fd48, %fd58;
mov.f64 %fd60, 0d3FC5555555555556;
.loc 2 268 10
fma.rn.f64 %fd61, %fd59, %fd48, %fd60;
mul.f64 %fd62, %fd61, %fd48;
fma.rn.f64 %fd64, %fd62, %fd1, %fd1;
BB38_7:
and.b32 %r36, %r8, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%temp, %r37}, %fd64;
}
or.b32 %r38, %r37, %r36;
{
.reg .b32 %temp;
mov.b64 {%r39, %temp}, %fd64;
}
mov.b64 %fd63, {%r39, %r38};
.loc 1 55 42
add.s32 %r40, %r7, %r6;
mul.wide.s32 %rd7, %r40, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 55 42
st.global.f64 [%rd8], %fd63;
.loc 1 55 22
add.s32 %r9, %r3, %r7;
.loc 1 55 1
setp.lt.s32 %p8, %r9, %r11;
mov.u32 %r44, %r9;
@%p8 bra BB38_4;
BB38_8:
.loc 1 55 22
mov.u32 %r41, %nctaid.x;
mad.lo.s32 %r43, %r41, %r15, %r43;
.loc 1 55 1
setp.lt.s32 %p9, %r43, %r12;
@%p9 bra BB38_2;
BB38_9:
.loc 1 55 2
ret;
}
.visible .entry map_sinpi_double(
.param .u32 map_sinpi_double_param_0,
.param .u32 map_sinpi_double_param_1,
.param .u64 map_sinpi_double_param_2,
.param .u32 map_sinpi_double_param_3,
.param .u64 map_sinpi_double_param_4,
.param .u32 map_sinpi_double_param_5
)
{
.reg .pred %p<9>;
.reg .s32 %r<38>;
.reg .s64 %rd<14>;
.reg .f64 %fd<37>;
ld.param.u32 %r10, [map_sinpi_double_param_0];
ld.param.u32 %r11, [map_sinpi_double_param_1];
ld.param.u64 %rd1, [map_sinpi_double_param_2];
ld.param.u32 %r12, [map_sinpi_double_param_3];
ld.param.u64 %rd2, [map_sinpi_double_param_4];
ld.param.u32 %r13, [map_sinpi_double_param_5];
.loc 1 56 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r36, %r14, %r15, %r16;
.loc 1 56 1
setp.ge.s32 %p1, %r36, %r11;
@%p1 bra BB39_12;
.loc 1 56 1
mov.u32 %r17, %ntid.y;
.loc 1 56 22
mov.u32 %r18, %nctaid.y;
mul.lo.s32 %r2, %r18, %r17;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd11, %rd1;
BB39_2:
.loc 1 56 1
mov.u32 %r19, %ctaid.y;
mov.u32 %r21, %tid.y;
mad.lo.s32 %r37, %r17, %r19, %r21;
.loc 1 56 1
setp.ge.s32 %p2, %r37, %r10;
@%p2 bra BB39_11;
.loc 1 56 1
mul.lo.s32 %r4, %r36, %r13;
BB39_4:
add.s32 %r26, %r37, %r4;
mul.wide.s32 %rd4, %r26, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 1 56 1
ld.global.f64 %fd1, [%rd5];
.loc 2 208 10
{
.reg .b32 %temp;
mov.b64 {%r27, %temp}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r28}, %fd1;
}
add.s32 %r29, %r28, 1048576;
mov.b64 %fd11, {%r27, %r29};
cvt.rni.f64.f64 %fd12, %fd11;
cvt.rzi.s64.f64 %rd6, %fd12;
cvt.u32.u64 %r7, %rd6;
neg.f64 %fd13, %fd12;
mov.f64 %fd14, 0d3FE0000000000000;
.loc 2 208 10
fma.rn.f64 %fd15, %fd13, %fd14, %fd1;
mul.f64 %fd16, %fd15, 0d3CA1A62633145C07;
mov.f64 %fd17, 0d400921FB54442D18;
.loc 2 208 10
fma.rn.f64 %fd18, %fd15, %fd17, %fd16;
shl.b32 %r30, %r7, 3;
and.b32 %r31, %r30, 8;
mul.rn.f64 %fd2, %fd18, %fd18;
and.b64 %rd7, %rd6, 1;
setp.eq.b64 %p3, %rd7, 1;
not.pred %p4, %p3;
selp.f64 %fd19, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p4;
mul.wide.u32 %rd8, %r31, 8;
mov.u64 %rd9, __cudart_sin_cos_coeffs;
add.s64 %rd10, %rd9, %rd8;
.loc 2 208 10
ld.const.f64 %fd20, [%rd10+8];
fma.rn.f64 %fd21, %fd19, %fd2, %fd20;
ld.const.f64 %fd22, [%rd10+16];
fma.rn.f64 %fd23, %fd21, %fd2, %fd22;
ld.const.f64 %fd24, [%rd10+24];
fma.rn.f64 %fd25, %fd23, %fd2, %fd24;
ld.const.f64 %fd26, [%rd10+32];
fma.rn.f64 %fd27, %fd25, %fd2, %fd26;
ld.const.f64 %fd28, [%rd10+40];
fma.rn.f64 %fd29, %fd27, %fd2, %fd28;
ld.const.f64 %fd30, [%rd10+48];
fma.rn.f64 %fd3, %fd29, %fd2, %fd30;
fma.rn.f64 %fd36, %fd3, %fd18, %fd18;
@%p4 bra BB39_6;
mov.f64 %fd31, 0d3FF0000000000000;
.loc 2 208 10
fma.rn.f64 %fd36, %fd3, %fd2, %fd31;
BB39_6:
and.b32 %r32, %r7, 2;
setp.eq.s32 %p5, %r32, 0;
@%p5 bra BB39_8;
mov.f64 %fd32, 0d0000000000000000;
mov.f64 %fd33, 0dBFF0000000000000;
.loc 2 208 10
fma.rn.f64 %fd36, %fd36, %fd33, %fd32;
BB39_8:
cvt.rzi.f64.f64 %fd34, %fd1;
setp.neu.f64 %p6, %fd1, %fd34;
@%p6 bra BB39_10;
mov.f64 %fd35, 0d0000000000000000;
.loc 2 208 10
mul.rn.f64 %fd36, %fd1, %fd35;
BB39_10:
.loc 1 56 42
mad.lo.s32 %r33, %r36, %r12, %r37;
mul.wide.s32 %rd12, %r33, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 1 56 42
st.global.f64 [%rd13], %fd36;
.loc 1 56 22
add.s32 %r37, %r2, %r37;
.loc 1 56 1
setp.lt.s32 %p7, %r37, %r10;
@%p7 bra BB39_4;
BB39_11:
.loc 1 56 22
mov.u32 %r34, %nctaid.x;
mad.lo.s32 %r36, %r34, %r14, %r36;
.loc 1 56 1
setp.lt.s32 %p8, %r36, %r11;
@%p8 bra BB39_2;
BB39_12:
.loc 1 56 2
ret;
}
.visible .entry map_sqrt_double(
.param .u32 map_sqrt_double_param_0,
.param .u32 map_sqrt_double_param_1,
.param .u64 map_sqrt_double_param_2,
.param .u32 map_sqrt_double_param_3,
.param .u64 map_sqrt_double_param_4,
.param .u32 map_sqrt_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_sqrt_double_param_0];
ld.param.u32 %r13, [map_sqrt_double_param_1];
ld.param.u64 %rd3, [map_sqrt_double_param_2];
ld.param.u32 %r14, [map_sqrt_double_param_3];
ld.param.u64 %rd4, [map_sqrt_double_param_4];
ld.param.u32 %r15, [map_sqrt_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 57 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 57 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB40_6;
.loc 1 57 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 57 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 57 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB40_2:
.loc 1 57 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB40_5;
.loc 1 57 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 57 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB40_4:
.loc 1 57 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 57 1
ld.global.f64 %fd1, [%rd6];
.loc 3 3060 10
sqrt.rn.f64 %fd2, %fd1;
.loc 1 57 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 57 42
st.global.f64 [%rd8], %fd2;
.loc 1 57 22
add.s32 %r10, %r5, %r9;
.loc 1 57 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB40_4;
BB40_5:
.loc 1 57 22
add.s32 %r25, %r4, %r25;
.loc 1 57 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB40_2;
BB40_6:
.loc 1 57 2
ret;
}
.visible .entry map_tan_double(
.param .u32 map_tan_double_param_0,
.param .u32 map_tan_double_param_1,
.param .u64 map_tan_double_param_2,
.param .u32 map_tan_double_param_3,
.param .u64 map_tan_double_param_4,
.param .u32 map_tan_double_param_5
)
{
.local .align 4 .b8 __local_depot41[4];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<8>;
.reg .f32 %f<5>;
.reg .s32 %r<37>;
.reg .s64 %rd<13>;
.reg .f64 %fd<68>;
mov.u64 %SPL, __local_depot41;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r13, [map_tan_double_param_0];
ld.param.u32 %r14, [map_tan_double_param_1];
ld.param.u64 %rd2, [map_tan_double_param_2];
ld.param.u32 %r15, [map_tan_double_param_3];
ld.param.u64 %rd3, [map_tan_double_param_4];
ld.param.u32 %r16, [map_tan_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 1 58 1
mov.u32 %r17, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r34, %r17, %r18, %r19;
.loc 1 58 1
setp.ge.s32 %p1, %r34, %r14;
@%p1 bra BB41_12;
.loc 1 58 1
mov.u32 %r20, %ntid.y;
.loc 1 58 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r2, %r21, %r20;
cvta.to.global.u64 %rd10, %rd2;
BB41_2:
.loc 1 58 1
mov.u32 %r22, %ctaid.y;
mov.u32 %r24, %tid.y;
mad.lo.s32 %r35, %r20, %r22, %r24;
.loc 1 58 1
setp.ge.s32 %p2, %r35, %r13;
@%p2 bra BB41_11;
.loc 1 58 1
mul.lo.s32 %r4, %r34, %r16;
.loc 1 58 42
mul.lo.s32 %r5, %r34, %r15;
BB41_4:
.loc 1 58 1
add.s32 %r29, %r35, %r4;
mul.wide.s32 %rd4, %r29, 8;
add.s64 %rd5, %rd1, %rd4;
ld.global.f64 %fd65, [%rd5];
.loc 2 218 10
abs.f64 %fd11, %fd65;
setp.neu.f64 %p3, %fd11, 0d7FF0000000000000;
@%p3 bra BB41_6;
mov.f64 %fd12, 0d0000000000000000;
.loc 2 218 10
mul.rn.f64 %fd65, %fd65, %fd12;
BB41_6:
add.u64 %rd6, %SP, 0;
.loc 2 218 10
mul.f64 %fd13, %fd65, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r36, %fd13;
cvta.to.local.u64 %rd7, %rd6;
.loc 2 218 10
st.local.u32 [%rd7], %r36;
cvt.rn.f64.s32 %fd14, %r36;
neg.f64 %fd15, %fd14;
mov.f64 %fd16, 0d3FF921FB54442D18;
.loc 2 218 10
fma.rn.f64 %fd17, %fd15, %fd16, %fd65;
mov.f64 %fd18, 0d3C91A62633145C00;
.loc 2 218 10
fma.rn.f64 %fd19, %fd15, %fd18, %fd17;
mov.f64 %fd20, 0d397B839A252049C0;
.loc 2 218 10
fma.rn.f64 %fd66, %fd15, %fd20, %fd19;
abs.f64 %fd21, %fd65;
setp.leu.f64 %p4, %fd21, 0d41E0000000000000;
@%p4 bra BB41_8;
// Callseq Start 6
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd65;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 2 218 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd66, [retval0+0];
}
// Callseq End 6
ld.local.u32 %r36, [%rd7];
BB41_8:
mul.f64 %fd22, %fd66, %fd66;
mov.f64 %fd23, 0dBEF9757C5B27EBB1;
mov.f64 %fd24, 0d3EE48DAC2799BCB9;
.loc 2 218 10
fma.rn.f64 %fd25, %fd24, %fd22, %fd23;
mov.f64 %fd26, 0d3F0980E90FD91E04;
.loc 2 218 10
fma.rn.f64 %fd27, %fd25, %fd22, %fd26;
mov.f64 %fd28, 0dBEFAE2B0417D7E1D;
.loc 2 218 10
fma.rn.f64 %fd29, %fd27, %fd22, %fd28;
mov.f64 %fd30, 0d3F119F5341BFBA57;
.loc 2 218 10
fma.rn.f64 %fd31, %fd29, %fd22, %fd30;
mov.f64 %fd32, 0d3F15E791A00F6919;
.loc 2 218 10
fma.rn.f64 %fd33, %fd31, %fd22, %fd32;
mov.f64 %fd34, 0d3F2FF2E7FADEC73A;
.loc 2 218 10
fma.rn.f64 %fd35, %fd33, %fd22, %fd34;
mov.f64 %fd36, 0d3F434BC1B206DA62;
.loc 2 218 10
fma.rn.f64 %fd37, %fd35, %fd22, %fd36;
mov.f64 %fd38, 0d3F57DB18EF2F83F9;
.loc 2 218 10
fma.rn.f64 %fd39, %fd37, %fd22, %fd38;
mov.f64 %fd40, 0d3F6D6D2E7AE49FBC;
.loc 2 218 10
fma.rn.f64 %fd41, %fd39, %fd22, %fd40;
mov.f64 %fd42, 0d3F8226E3A816A776;
.loc 2 218 10
fma.rn.f64 %fd43, %fd41, %fd22, %fd42;
mov.f64 %fd44, 0d3F9664F485D25660;
.loc 2 218 10
fma.rn.f64 %fd45, %fd43, %fd22, %fd44;
mov.f64 %fd46, 0d3FABA1BA1BABF31D;
.loc 2 218 10
fma.rn.f64 %fd47, %fd45, %fd22, %fd46;
mov.f64 %fd48, 0d3FC11111111105D2;
.loc 2 218 10
fma.rn.f64 %fd49, %fd47, %fd22, %fd48;
mov.f64 %fd50, 0d3FD555555555555E;
.loc 2 218 10
fma.rn.f64 %fd51, %fd49, %fd22, %fd50;
mul.f64 %fd7, %fd51, %fd22;
fma.rn.f64 %fd67, %fd7, %fd66, %fd66;
and.b32 %r30, %r36, 1;
setp.eq.b32 %p5, %r30, 1;
@!%p5 bra BB41_10;
bra.uni BB41_9;
BB41_9:
sub.f64 %fd54, %fd67, %fd66;
neg.f64 %fd55, %fd54;
fma.rn.f64 %fd56, %fd7, %fd66, %fd55;
// inline asm
cvt.rn.f32.f64 %f1,%fd67;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd53,%f2;
// inline asm
neg.f64 %fd57, %fd67;
mov.f64 %fd58, 0d3FF0000000000000;
.loc 2 218 10
fma.rn.f64 %fd59, %fd57, %fd53, %fd58;
fma.rn.f64 %fd60, %fd59, %fd59, %fd59;
fma.rn.f64 %fd61, %fd60, %fd53, %fd53;
neg.f64 %fd62, %fd61;
fma.rn.f64 %fd63, %fd67, %fd62, %fd58;
fma.rn.f64 %fd64, %fd62, %fd56, %fd63;
fma.rn.f64 %fd67, %fd64, %fd62, %fd62;
BB41_10:
.loc 1 58 42
add.s32 %r31, %r35, %r5;
mul.wide.s32 %rd11, %r31, 8;
add.s64 %rd12, %rd10, %rd11;
st.global.f64 [%rd12], %fd67;
.loc 1 58 22
add.s32 %r35, %r2, %r35;
.loc 1 58 1
setp.lt.s32 %p6, %r35, %r13;
@%p6 bra BB41_4;
BB41_11:
.loc 1 58 22
mov.u32 %r32, %nctaid.x;
mad.lo.s32 %r34, %r32, %r17, %r34;
.loc 1 58 1
setp.lt.s32 %p7, %r34, %r14;
@%p7 bra BB41_2;
BB41_12:
.loc 1 58 2
ret;
}
.visible .entry map_tanh_double(
.param .u32 map_tanh_double_param_0,
.param .u32 map_tanh_double_param_1,
.param .u64 map_tanh_double_param_2,
.param .u32 map_tanh_double_param_3,
.param .u64 map_tanh_double_param_4,
.param .u32 map_tanh_double_param_5
)
{
.reg .pred %p<9>;
.reg .f32 %f<5>;
.reg .s32 %r<53>;
.reg .s64 %rd<9>;
.reg .f64 %fd<78>;
ld.param.u32 %r12, [map_tanh_double_param_0];
ld.param.u32 %r13, [map_tanh_double_param_1];
ld.param.u64 %rd1, [map_tanh_double_param_2];
ld.param.u32 %r14, [map_tanh_double_param_3];
ld.param.u64 %rd2, [map_tanh_double_param_4];
ld.param.u32 %r15, [map_tanh_double_param_5];
.loc 1 59 1
mov.u32 %r16, %ntid.x;
mov.u32 %r17, %ctaid.x;
mov.u32 %r18, %tid.x;
mad.lo.s32 %r51, %r16, %r17, %r18;
.loc 1 59 1
setp.ge.s32 %p1, %r51, %r13;
@%p1 bra BB42_12;
.loc 1 59 1
mov.u32 %r19, %ntid.y;
.loc 1 59 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r2, %r20, %r19;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB42_2:
.loc 1 59 1
mov.u32 %r21, %ctaid.y;
mov.u32 %r23, %tid.y;
mad.lo.s32 %r52, %r19, %r21, %r23;
.loc 1 59 1
setp.ge.s32 %p2, %r52, %r12;
@%p2 bra BB42_11;
.loc 1 59 1
mul.lo.s32 %r4, %r51, %r15;
BB42_4:
add.s32 %r28, %r52, %r4;
mul.wide.s32 %rd4, %r28, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 1 59 1
ld.global.f64 %fd1, [%rd5];
.loc 2 273 10
abs.f64 %fd2, %fd1;
setp.ltu.f64 %p3, %fd2, 0d3FE199999999999A;
@%p3 bra BB42_9;
add.f64 %fd10, %fd2, %fd2;
mul.f64 %fd11, %fd10, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd12, %fd11;
cvt.rzi.s32.f64 %r7, %fd12;
mov.f64 %fd13, 0dBFE62E42FEFA39EF;
.loc 2 273 10
fma.rn.f64 %fd14, %fd12, %fd13, %fd10;
mov.f64 %fd15, 0dBC7ABC9E3B39803F;
.loc 2 273 10
fma.rn.f64 %fd16, %fd12, %fd15, %fd14;
mov.f64 %fd17, 0d3E5AF86D8EBD13CD;
mov.f64 %fd18, 0d3E21F4076ACD15B6;
.loc 2 273 10
fma.rn.f64 %fd19, %fd18, %fd16, %fd17;
mov.f64 %fd20, 0d3E927E5092BA033D;
.loc 2 273 10
fma.rn.f64 %fd21, %fd19, %fd16, %fd20;
mov.f64 %fd22, 0d3EC71DDE6C5F9DA1;
.loc 2 273 10
fma.rn.f64 %fd23, %fd21, %fd16, %fd22;
mov.f64 %fd24, 0d3EFA01A018D034E6;
.loc 2 273 10
fma.rn.f64 %fd25, %fd23, %fd16, %fd24;
mov.f64 %fd26, 0d3F2A01A01B3B6940;
.loc 2 273 10
fma.rn.f64 %fd27, %fd25, %fd16, %fd26;
mov.f64 %fd28, 0d3F56C16C16C1B5DD;
.loc 2 273 10
fma.rn.f64 %fd29, %fd27, %fd16, %fd28;
mov.f64 %fd30, 0d3F8111111110F74D;
.loc 2 273 10
fma.rn.f64 %fd31, %fd29, %fd16, %fd30;
mov.f64 %fd32, 0d3FA555555555554D;
.loc 2 273 10
fma.rn.f64 %fd33, %fd31, %fd16, %fd32;
mov.f64 %fd34, 0d3FC5555555555557;
.loc 2 273 10
fma.rn.f64 %fd35, %fd33, %fd16, %fd34;
mov.f64 %fd36, 0d3FE0000000000000;
.loc 2 273 10
fma.rn.f64 %fd37, %fd35, %fd16, %fd36;
mul.f64 %fd38, %fd37, %fd16;
fma.rn.f64 %fd3, %fd38, %fd16, %fd16;
shl.b32 %r8, %r7, 20;
add.s32 %r9, %r8, 1072693248;
abs.s32 %r29, %r7;
setp.lt.s32 %p4, %r29, 1021;
@%p4 bra BB42_7;
add.s32 %r30, %r8, 1130364928;
setp.lt.s32 %p5, %r7, 0;
mov.u32 %r31, 0;
.loc 2 273 10
selp.b32 %r32, %r30, %r9, %p5;
shr.s32 %r33, %r7, 31;
add.s32 %r34, %r33, 1073741824;
and.b32 %r35, %r34, -57671680;
add.s32 %r36, %r32, -1048576;
mov.b64 %fd39, {%r31, %r35};
fma.rn.f64 %fd40, %fd3, %fd39, %fd39;
mov.b64 %fd41, {%r31, %r36};
mul.f64 %fd76, %fd40, %fd41;
bra.uni BB42_8;
BB42_7:
mov.u32 %r37, 0;
.loc 2 273 10
mov.b64 %fd42, {%r37, %r9};
fma.rn.f64 %fd76, %fd3, %fd42, %fd42;
BB42_8:
add.f64 %fd43, %fd76, 0d3FF0000000000000;
mov.f64 %fd45, 0d3FF0000000000000;
.loc 2 273 10
// inline asm
cvt.rn.f32.f64 %f1,%fd43;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd44,%f2;
// inline asm
neg.f64 %fd46, %fd43;
fma.rn.f64 %fd47, %fd46, %fd44, %fd45;
fma.rn.f64 %fd48, %fd47, %fd47, %fd47;
fma.rn.f64 %fd49, %fd48, %fd44, %fd44;
neg.f64 %fd50, %fd49;
mov.f64 %fd51, 0d4000000000000000;
.loc 2 273 10
fma.rn.f64 %fd52, %fd51, %fd50, %fd45;
setp.gt.f64 %p6, %fd2, 0d4075E00000000000;
selp.f64 %fd53, 0d3FF0000000000000, %fd52, %p6;
{
.reg .b32 %temp;
mov.b64 {%r38, %temp}, %fd53;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r39}, %fd53;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd1;
}
and.b32 %r41, %r40, -2147483648;
or.b32 %r42, %r39, %r41;
mov.b64 %fd77, {%r38, %r42};
bra.uni BB42_10;
BB42_9:
.loc 2 273 10
mul.f64 %fd54, %fd1, %fd1;
mov.f64 %fd55, 0dBF2B9093D89F0E23;
mov.f64 %fd56, 0d3F0ABFFC9B5786C4;
.loc 2 273 10
fma.rn.f64 %fd57, %fd56, %fd54, %fd55;
mov.f64 %fd58, 0d3F42FA2744C30B61;
.loc 2 273 10
fma.rn.f64 %fd59, %fd57, %fd54, %fd58;
mov.f64 %fd60, 0dBF57CF3B9C1E491D;
.loc 2 273 10
fma.rn.f64 %fd61, %fd59, %fd54, %fd60;
mov.f64 %fd62, 0d3F6D6C61D450119A;
.loc 2 273 10
fma.rn.f64 %fd63, %fd61, %fd54, %fd62;
mov.f64 %fd64, 0dBF8226DDD44294F5;
.loc 2 273 10
fma.rn.f64 %fd65, %fd63, %fd54, %fd64;
mov.f64 %fd66, 0d3F9664F45C2B04A6;
.loc 2 273 10
fma.rn.f64 %fd67, %fd65, %fd54, %fd66;
mov.f64 %fd68, 0dBFABA1BA1AD70754;
.loc 2 273 10
fma.rn.f64 %fd69, %fd67, %fd54, %fd68;
mov.f64 %fd70, 0d3FC111111110295E;
.loc 2 273 10
fma.rn.f64 %fd71, %fd69, %fd54, %fd70;
mov.f64 %fd72, 0dBFD555555555549F;
.loc 2 273 10
fma.rn.f64 %fd73, %fd71, %fd54, %fd72;
mul.f64 %fd74, %fd73, %fd54;
fma.rn.f64 %fd75, %fd74, %fd1, %fd1;
{
.reg .b32 %temp;
mov.b64 {%r43, %temp}, %fd75;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd75;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r45}, %fd1;
}
and.b32 %r46, %r45, -2147483648;
or.b32 %r47, %r44, %r46;
mov.b64 %fd77, {%r43, %r47};
BB42_10:
.loc 1 59 42
mad.lo.s32 %r48, %r51, %r14, %r52;
mul.wide.s32 %rd7, %r48, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 1 59 42
st.global.f64 [%rd8], %fd77;
.loc 1 59 22
add.s32 %r52, %r2, %r52;
.loc 1 59 1
setp.lt.s32 %p7, %r52, %r12;
@%p7 bra BB42_4;
BB42_11:
.loc 1 59 22
mov.u32 %r49, %nctaid.x;
mad.lo.s32 %r51, %r49, %r16, %r51;
.loc 1 59 1
setp.lt.s32 %p8, %r51, %r13;
@%p8 bra BB42_2;
BB42_12:
.loc 1 59 2
ret;
}
.visible .entry map_tgamma_double(
.param .u32 map_tgamma_double_param_0,
.param .u32 map_tgamma_double_param_1,
.param .u64 map_tgamma_double_param_2,
.param .u32 map_tgamma_double_param_3,
.param .u64 map_tgamma_double_param_4,
.param .u32 map_tgamma_double_param_5
)
{
.reg .pred %p<35>;
.reg .f32 %f<13>;
.reg .s32 %r<91>;
.reg .s64 %rd<14>;
.reg .f64 %fd<401>;
ld.param.u32 %r30, [map_tgamma_double_param_0];
ld.param.u32 %r31, [map_tgamma_double_param_1];
ld.param.u64 %rd1, [map_tgamma_double_param_2];
ld.param.u32 %r32, [map_tgamma_double_param_3];
ld.param.u64 %rd2, [map_tgamma_double_param_4];
ld.param.u32 %r33, [map_tgamma_double_param_5];
.loc 1 60 1
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %ctaid.x;
mov.u32 %r36, %tid.x;
mad.lo.s32 %r83, %r34, %r35, %r36;
.loc 1 60 1
setp.ge.s32 %p1, %r83, %r31;
@%p1 bra BB43_49;
.loc 1 60 1
mov.u32 %r37, %ntid.y;
.loc 1 60 22
mov.u32 %r38, %nctaid.y;
mul.lo.s32 %r2, %r38, %r37;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd11, %rd1;
BB43_2:
.loc 1 60 1
mov.u32 %r39, %ctaid.y;
mov.u32 %r41, %tid.y;
mad.lo.s32 %r84, %r37, %r39, %r41;
.loc 1 60 1
setp.ge.s32 %p2, %r84, %r30;
@%p2 bra BB43_48;
BB43_3:
.loc 1 60 1
mad.lo.s32 %r46, %r83, %r33, %r84;
mul.wide.s32 %rd4, %r46, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd397, [%rd5];
.loc 2 418 10
abs.f64 %fd2, %fd397;
setp.gtu.f64 %p3, %fd2, 0d7FF0000000000000;
@%p3 bra BB43_46;
setp.lt.f64 %p4, %fd2, 0d402E000000000000;
@%p4 bra BB43_35;
setp.ltu.f64 %p5, %fd397, 0d0000000000000000;
.loc 2 418 10
@%p5 bra BB43_21;
setp.lt.f64 %p6, %fd397, 0d406573FAE561F648;
@%p6 bra BB43_8;
mov.f64 %fd400, 0d7FF0000000000000;
bra.uni BB43_47;
BB43_8:
.loc 2 418 10
// inline asm
cvt.rn.f32.f64 %f1,%fd397;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd53,%f2;
// inline asm
neg.f64 %fd54, %fd397;
mov.f64 %fd55, 0d3FF0000000000000;
.loc 2 418 10
fma.rn.f64 %fd56, %fd54, %fd53, %fd55;
fma.rn.f64 %fd57, %fd56, %fd56, %fd56;
fma.rn.f64 %fd58, %fd57, %fd53, %fd53;
mov.f64 %fd59, 0d3F4B8239C670E690;
mov.f64 %fd60, 0d0000000000000000;
.loc 2 418 10
fma.rn.f64 %fd61, %fd60, %fd58, %fd59;
mov.f64 %fd62, 0dBF0B1D75D3346711;
.loc 2 418 10
fma.rn.f64 %fd63, %fd61, %fd58, %fd62;
mov.f64 %fd64, 0dBF436773BDB97B48;
.loc 2 418 10
fma.rn.f64 %fd65, %fd63, %fd58, %fd64;
mov.f64 %fd66, 0d3F1247604839C038;
.loc 2 418 10
fma.rn.f64 %fd67, %fd65, %fd58, %fd66;
mov.f64 %fd68, 0d3F49B0FF6874F2C4;
.loc 2 418 10
fma.rn.f64 %fd69, %fd67, %fd58, %fd68;
mov.f64 %fd70, 0dBF2E13CE465FA859;
.loc 2 418 10
fma.rn.f64 %fd71, %fd69, %fd58, %fd70;
mov.f64 %fd72, 0dBF65F7268EDAB4C8;
.loc 2 418 10
fma.rn.f64 %fd73, %fd71, %fd58, %fd72;
mov.f64 %fd74, 0d3F6C71C71C71C71C;
.loc 2 418 10
fma.rn.f64 %fd75, %fd73, %fd58, %fd74;
mov.f64 %fd76, 0d3FB5555555555555;
.loc 2 418 10
fma.rn.f64 %fd77, %fd75, %fd58, %fd76;
fma.rn.f64 %fd3, %fd77, %fd58, %fd55;
add.f64 %fd4, %fd397, 0dBFE0000000000000;
{
.reg .b32 %temp;
mov.b64 {%r85, %temp}, %fd397;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r86}, %fd397;
}
shr.u32 %r47, %r86, 20;
and.b32 %r87, %r47, 2047;
setp.ne.s32 %p7, %r87, 0;
@%p7 bra BB43_10;
mul.f64 %fd78, %fd397, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r86}, %fd78;
}
{
.reg .b32 %temp;
mov.b64 {%r85, %temp}, %fd78;
}
shr.u32 %r48, %r86, 20;
and.b32 %r49, %r48, 2047;
add.s32 %r87, %r49, -54;
BB43_10:
add.s32 %r88, %r87, -1023;
and.b32 %r50, %r86, -2146435073;
or.b32 %r51, %r50, 1072693248;
mov.b64 %fd379, {%r85, %r51};
setp.lt.u32 %p8, %r51, 1073127583;
@%p8 bra BB43_12;
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd379;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r53}, %fd379;
}
add.s32 %r54, %r53, -1048576;
mov.b64 %fd379, {%r52, %r54};
add.s32 %r88, %r87, -1022;
BB43_12:
add.f64 %fd79, %fd379, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f5,%fd79;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd80,%f6;
// inline asm
neg.f64 %fd82, %fd79;
fma.rn.f64 %fd83, %fd82, %fd80, %fd55;
fma.rn.f64 %fd84, %fd83, %fd83, %fd83;
fma.rn.f64 %fd85, %fd84, %fd80, %fd80;
add.f64 %fd86, %fd379, 0dBFF0000000000000;
mul.f64 %fd87, %fd86, %fd85;
fma.rn.f64 %fd88, %fd86, %fd85, %fd87;
mul.f64 %fd89, %fd88, %fd88;
mov.f64 %fd90, 0d3ED0F5D241AD3B5A;
mov.f64 %fd91, 0d3EB0F5FF7D2CAFE2;
.loc 2 418 10
fma.rn.f64 %fd92, %fd91, %fd89, %fd90;
mov.f64 %fd93, 0d3EF3B20A75488A3F;
.loc 2 418 10
fma.rn.f64 %fd94, %fd92, %fd89, %fd93;
mov.f64 %fd95, 0d3F1745CDE4FAECD5;
.loc 2 418 10
fma.rn.f64 %fd96, %fd94, %fd89, %fd95;
mov.f64 %fd97, 0d3F3C71C7258A578B;
.loc 2 418 10
fma.rn.f64 %fd98, %fd96, %fd89, %fd97;
mov.f64 %fd99, 0d3F6249249242B910;
.loc 2 418 10
fma.rn.f64 %fd100, %fd98, %fd89, %fd99;
mov.f64 %fd101, 0d3F89999999999DFB;
.loc 2 418 10
fma.rn.f64 %fd102, %fd100, %fd89, %fd101;
sub.f64 %fd103, %fd86, %fd88;
add.f64 %fd104, %fd103, %fd103;
neg.f64 %fd105, %fd88;
fma.rn.f64 %fd106, %fd105, %fd86, %fd104;
mul.f64 %fd107, %fd85, %fd106;
fma.rn.f64 %fd108, %fd102, %fd89, 0d3FB5555555555555;
sub.f64 %fd110, %fd76, %fd108;
fma.rn.f64 %fd111, %fd102, %fd89, %fd110;
add.f64 %fd112, %fd111, 0d0000000000000000;
add.f64 %fd113, %fd112, 0dBC46A4CB00B9E7B0;
add.f64 %fd114, %fd108, %fd113;
sub.f64 %fd115, %fd108, %fd114;
add.f64 %fd116, %fd115, %fd113;
mul.rn.f64 %fd117, %fd114, %fd88;
neg.f64 %fd118, %fd117;
fma.rn.f64 %fd119, %fd114, %fd88, %fd118;
fma.rn.f64 %fd120, %fd114, %fd107, %fd119;
fma.rn.f64 %fd121, %fd116, %fd88, %fd120;
add.f64 %fd122, %fd117, %fd121;
sub.f64 %fd123, %fd117, %fd122;
add.f64 %fd124, %fd123, %fd121;
mul.rn.f64 %fd125, %fd122, %fd88;
neg.f64 %fd126, %fd125;
fma.rn.f64 %fd127, %fd122, %fd88, %fd126;
fma.rn.f64 %fd128, %fd122, %fd107, %fd127;
fma.rn.f64 %fd129, %fd124, %fd88, %fd128;
add.f64 %fd130, %fd125, %fd129;
sub.f64 %fd131, %fd125, %fd130;
add.f64 %fd132, %fd131, %fd129;
mul.rn.f64 %fd133, %fd130, %fd88;
neg.f64 %fd134, %fd133;
fma.rn.f64 %fd135, %fd130, %fd88, %fd134;
fma.rn.f64 %fd136, %fd130, %fd107, %fd135;
fma.rn.f64 %fd137, %fd132, %fd88, %fd136;
add.f64 %fd138, %fd133, %fd137;
sub.f64 %fd139, %fd133, %fd138;
add.f64 %fd140, %fd139, %fd137;
add.f64 %fd141, %fd88, %fd138;
sub.f64 %fd142, %fd88, %fd141;
add.f64 %fd143, %fd142, %fd138;
add.f64 %fd144, %fd143, %fd140;
add.f64 %fd145, %fd144, %fd107;
add.f64 %fd146, %fd141, %fd145;
sub.f64 %fd147, %fd141, %fd146;
add.f64 %fd148, %fd147, %fd145;
cvt.rn.f64.s32 %fd149, %r88;
mov.f64 %fd150, 0d3FE62E42FEFA3000;
.loc 2 418 10
mul.rn.f64 %fd151, %fd149, %fd150;
mov.f64 %fd152, 0d3D53DE6AF278ECE6;
.loc 2 418 10
mul.rn.f64 %fd153, %fd149, %fd152;
add.f64 %fd154, %fd151, %fd146;
sub.f64 %fd155, %fd151, %fd154;
add.f64 %fd156, %fd155, %fd146;
add.f64 %fd157, %fd156, %fd148;
add.f64 %fd158, %fd157, %fd153;
add.f64 %fd159, %fd154, %fd158;
sub.f64 %fd160, %fd154, %fd159;
add.f64 %fd161, %fd160, %fd158;
mul.rn.f64 %fd162, %fd159, %fd4;
neg.f64 %fd163, %fd162;
fma.rn.f64 %fd164, %fd159, %fd4, %fd163;
fma.rn.f64 %fd165, %fd161, %fd4, %fd164;
add.f64 %fd166, %fd162, %fd165;
sub.f64 %fd167, %fd162, %fd166;
add.f64 %fd168, %fd167, %fd165;
sub.f64 %fd169, %fd166, %fd397;
sub.f64 %fd170, %fd166, %fd169;
sub.f64 %fd171, %fd170, %fd397;
add.f64 %fd172, %fd171, 0d0000000000000000;
add.f64 %fd173, %fd172, %fd168;
add.f64 %fd8, %fd169, %fd173;
sub.f64 %fd174, %fd169, %fd8;
add.f64 %fd9, %fd174, %fd173;
{
.reg .b32 %temp;
mov.b64 {%temp, %r18}, %fd8;
}
setp.lt.u32 %p9, %r18, 1082535491;
setp.lt.s32 %p10, %r18, -1064875759;
or.pred %p11, %p9, %p10;
@%p11 bra BB43_14;
setp.lt.s32 %p12, %r18, 0;
selp.f64 %fd175, 0d0000000000000000, 0d7FF0000000000000, %p12;
abs.f64 %fd176, %fd8;
setp.gtu.f64 %p13, %fd176, 0d7FF0000000000000;
add.f64 %fd177, %fd8, %fd8;
selp.f64 %fd381, %fd177, %fd175, %p13;
bra.uni BB43_18;
BB43_14:
mov.f64 %fd378, 0d3FF0000000000000;
.loc 2 418 10
mul.f64 %fd178, %fd8, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd179, %fd178;
cvt.rzi.s32.f64 %r19, %fd179;
mov.f64 %fd180, 0dBFE62E42FEFA39EF;
.loc 2 418 10
fma.rn.f64 %fd181, %fd179, %fd180, %fd8;
mov.f64 %fd182, 0dBC7ABC9E3B39803F;
.loc 2 418 10
fma.rn.f64 %fd183, %fd179, %fd182, %fd181;
mov.f64 %fd184, 0d3E928A27E30F5561;
mov.f64 %fd185, 0d3E5AE6449C0686C0;
.loc 2 418 10
fma.rn.f64 %fd186, %fd185, %fd183, %fd184;
mov.f64 %fd187, 0d3EC71DE8E6486D6B;
.loc 2 418 10
fma.rn.f64 %fd188, %fd186, %fd183, %fd187;
mov.f64 %fd189, 0d3EFA019A6B2464C5;
.loc 2 418 10
fma.rn.f64 %fd190, %fd188, %fd183, %fd189;
mov.f64 %fd191, 0d3F2A01A0171064A5;
.loc 2 418 10
fma.rn.f64 %fd192, %fd190, %fd183, %fd191;
mov.f64 %fd193, 0d3F56C16C17F29C8D;
.loc 2 418 10
fma.rn.f64 %fd194, %fd192, %fd183, %fd193;
mov.f64 %fd195, 0d3F8111111111A24E;
.loc 2 418 10
fma.rn.f64 %fd196, %fd194, %fd183, %fd195;
mov.f64 %fd197, 0d3FA555555555211D;
.loc 2 418 10
fma.rn.f64 %fd198, %fd196, %fd183, %fd197;
mov.f64 %fd199, 0d3FC5555555555530;
.loc 2 418 10
fma.rn.f64 %fd200, %fd198, %fd183, %fd199;
mov.f64 %fd201, 0d3FE0000000000005;
.loc 2 418 10
fma.rn.f64 %fd202, %fd200, %fd183, %fd201;
fma.rn.f64 %fd204, %fd202, %fd183, %fd378;
fma.rn.f64 %fd380, %fd204, %fd183, %fd378;
abs.s32 %r55, %r19;
setp.lt.s32 %p14, %r55, 1023;
@%p14 bra BB43_16;
add.s32 %r56, %r19, 2046;
shl.b32 %r57, %r56, 19;
and.b32 %r58, %r57, -1048576;
shl.b32 %r59, %r56, 20;
sub.s32 %r89, %r59, %r58;
mov.u32 %r60, 0;
.loc 2 418 10
mov.b64 %fd205, {%r60, %r58};
mul.f64 %fd380, %fd380, %fd205;
bra.uni BB43_17;
BB43_16:
.loc 2 418 10
shl.b32 %r61, %r19, 20;
add.s32 %r89, %r61, 1072693248;
BB43_17:
mov.u32 %r62, 0;
.loc 2 418 10
mov.b64 %fd206, {%r62, %r89};
mul.f64 %fd381, %fd380, %fd206;
BB43_18:
abs.f64 %fd207, %fd381;
setp.eq.f64 %p15, %fd207, 0d7FF0000000000000;
@%p15 bra BB43_20;
fma.rn.f64 %fd381, %fd381, %fd9, %fd381;
BB43_20:
mul.f64 %fd208, %fd381, 0dBCAA6A0D6F814637;
mov.f64 %fd209, 0d40040D931FF62706;
.loc 2 418 10
fma.rn.f64 %fd210, %fd381, %fd209, %fd208;
mul.f64 %fd400, %fd210, %fd3;
bra.uni BB43_47;
BB43_21:
.loc 2 418 10
cvt.rzi.f64.f64 %fd211, %fd397;
setp.neu.f64 %p16, %fd397, %fd211;
@%p16 bra BB43_23;
mov.f64 %fd400, 0dFFF8000000000000;
bra.uni BB43_47;
BB43_23:
.loc 2 418 10
setp.lt.f64 %p17, %fd397, 0dC067200000000000;
@%p17 bra BB43_34;
{
.reg .b32 %temp;
mov.b64 {%r63, %temp}, %fd397;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r64}, %fd397;
}
add.s32 %r65, %r64, 1048576;
mov.b64 %fd212, {%r63, %r65};
cvt.rni.f64.f64 %fd213, %fd212;
cvt.rzi.s64.f64 %rd6, %fd213;
cvt.u32.u64 %r23, %rd6;
neg.f64 %fd214, %fd213;
mov.f64 %fd215, 0d3FE0000000000000;
.loc 2 418 10
fma.rn.f64 %fd216, %fd214, %fd215, %fd397;
mul.f64 %fd217, %fd216, 0d3CA1A62633145C07;
mov.f64 %fd218, 0d400921FB54442D18;
.loc 2 418 10
fma.rn.f64 %fd219, %fd216, %fd218, %fd217;
shl.b32 %r66, %r23, 3;
and.b32 %r67, %r66, 8;
mul.rn.f64 %fd19, %fd219, %fd219;
and.b64 %rd7, %rd6, 1;
setp.eq.b64 %p18, %rd7, 1;
not.pred %p19, %p18;
selp.f64 %fd220, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p19;
mul.wide.u32 %rd8, %r67, 8;
mov.u64 %rd9, __cudart_sin_cos_coeffs;
add.s64 %rd10, %rd9, %rd8;
.loc 2 418 10
ld.const.f64 %fd221, [%rd10+8];
fma.rn.f64 %fd222, %fd220, %fd19, %fd221;
ld.const.f64 %fd223, [%rd10+16];
fma.rn.f64 %fd224, %fd222, %fd19, %fd223;
ld.const.f64 %fd225, [%rd10+24];
fma.rn.f64 %fd226, %fd224, %fd19, %fd225;
ld.const.f64 %fd227, [%rd10+32];
fma.rn.f64 %fd228, %fd226, %fd19, %fd227;
ld.const.f64 %fd229, [%rd10+40];
fma.rn.f64 %fd230, %fd228, %fd19, %fd229;
ld.const.f64 %fd231, [%rd10+48];
fma.rn.f64 %fd20, %fd230, %fd19, %fd231;
fma.rn.f64 %fd382, %fd20, %fd219, %fd219;
@%p19 bra BB43_26;
mov.f64 %fd232, 0d3FF0000000000000;
.loc 2 418 10
fma.rn.f64 %fd382, %fd20, %fd19, %fd232;
BB43_26:
and.b32 %r68, %r23, 2;
setp.eq.s32 %p20, %r68, 0;
@%p20 bra BB43_28;
mov.f64 %fd233, 0d0000000000000000;
mov.f64 %fd234, 0dBFF0000000000000;
.loc 2 418 10
fma.rn.f64 %fd382, %fd382, %fd234, %fd233;
BB43_28:
mul.f64 %fd235, %fd397, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd236, %fd235;
cvt.rzi.s32.f64 %r24, %fd236;
mov.f64 %fd237, 0dBFE62E42FEFA39EF;
.loc 2 418 10
fma.rn.f64 %fd238, %fd236, %fd237, %fd397;
mov.f64 %fd239, 0dBC7ABC9E3B39803F;
.loc 2 418 10
fma.rn.f64 %fd240, %fd236, %fd239, %fd238;
mov.f64 %fd241, 0d3E928A27E30F5561;
mov.f64 %fd242, 0d3E5AE6449C0686C0;
.loc 2 418 10
fma.rn.f64 %fd243, %fd242, %fd240, %fd241;
mov.f64 %fd244, 0d3EC71DE8E6486D6B;
.loc 2 418 10
fma.rn.f64 %fd245, %fd243, %fd240, %fd244;
mov.f64 %fd246, 0d3EFA019A6B2464C5;
.loc 2 418 10
fma.rn.f64 %fd247, %fd245, %fd240, %fd246;
mov.f64 %fd248, 0d3F2A01A0171064A5;
.loc 2 418 10
fma.rn.f64 %fd249, %fd247, %fd240, %fd248;
mov.f64 %fd250, 0d3F56C16C17F29C8D;
.loc 2 418 10
fma.rn.f64 %fd251, %fd249, %fd240, %fd250;
mov.f64 %fd252, 0d3F8111111111A24E;
.loc 2 418 10
fma.rn.f64 %fd253, %fd251, %fd240, %fd252;
mov.f64 %fd254, 0d3FA555555555211D;
.loc 2 418 10
fma.rn.f64 %fd255, %fd253, %fd240, %fd254;
mov.f64 %fd256, 0d3FC5555555555530;
.loc 2 418 10
fma.rn.f64 %fd257, %fd255, %fd240, %fd256;
mov.f64 %fd258, 0d3FE0000000000005;
.loc 2 418 10
fma.rn.f64 %fd259, %fd257, %fd240, %fd258;
mov.f64 %fd260, 0d3FF0000000000000;
.loc 2 418 10
fma.rn.f64 %fd261, %fd259, %fd240, %fd260;
fma.rn.f64 %fd383, %fd261, %fd240, %fd260;
abs.s32 %r69, %r24;
setp.lt.s32 %p21, %r69, 1023;
@%p21 bra BB43_30;
add.s32 %r70, %r24, 2046;
shl.b32 %r71, %r70, 19;
and.b32 %r72, %r71, -1048576;
shl.b32 %r73, %r70, 20;
sub.s32 %r90, %r73, %r72;
mov.u32 %r74, 0;
.loc 2 418 10
mov.b64 %fd262, {%r74, %r72};
mul.f64 %fd383, %fd383, %fd262;
bra.uni BB43_31;
BB43_30:
.loc 2 418 10
shl.b32 %r75, %r24, 20;
add.s32 %r90, %r75, 1072693248;
BB43_31:
mov.u32 %r76, 0;
.loc 2 418 10
mov.b64 %fd263, {%r76, %r90};
mul.f64 %fd29, %fd383, %fd263;
add.f64 %fd384, %fd2, 0dBFE0000000000000;
setp.leu.f64 %p22, %fd2, 0d4061800000000000;
@%p22 bra BB43_33;
{
.reg .b32 %temp;
mov.b64 {%r77, %temp}, %fd384;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r78}, %fd384;
}
add.s32 %r79, %r78, -1048576;
mov.b64 %fd384, {%r77, %r79};
BB43_33:
// Callseq Start 7
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd2;
.param .b64 param1;
st.param.f64 [param1+0], %fd384;
.param .b64 retval0;
.loc 2 418 10
call.uni (retval0),
__internal_accurate_pow,
(
param0,
param1
);
ld.param.f64 %fd266, [retval0+0];
}
// Callseq End 7
mul.f64 %fd267, %fd29, %fd266;
setp.gt.f64 %p23, %fd2, 0d4061800000000000;
.loc 2 418 10
selp.f64 %fd268, %fd267, %fd29, %p23;
// inline asm
cvt.rn.f32.f64 %f9,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd265,%f10;
// inline asm
neg.f64 %fd269, %fd2;
fma.rn.f64 %fd271, %fd269, %fd265, %fd260;
fma.rn.f64 %fd272, %fd271, %fd271, %fd271;
fma.rn.f64 %fd273, %fd272, %fd265, %fd265;
mov.f64 %fd274, 0d3F4B8239C670E690;
mov.f64 %fd275, 0d0000000000000000;
.loc 2 418 10
fma.rn.f64 %fd276, %fd275, %fd273, %fd274;
mov.f64 %fd277, 0dBF0B1D75D3346711;
.loc 2 418 10
fma.rn.f64 %fd278, %fd276, %fd273, %fd277;
mov.f64 %fd279, 0dBF436773BDB97B48;
.loc 2 418 10
fma.rn.f64 %fd280, %fd278, %fd273, %fd279;
mov.f64 %fd281, 0d3F1247604839C038;
.loc 2 418 10
fma.rn.f64 %fd282, %fd280, %fd273, %fd281;
mov.f64 %fd283, 0d3F49B0FF6874F2C4;
.loc 2 418 10
fma.rn.f64 %fd284, %fd282, %fd273, %fd283;
mov.f64 %fd285, 0dBF2E13CE465FA859;
.loc 2 418 10
fma.rn.f64 %fd286, %fd284, %fd273, %fd285;
mov.f64 %fd287, 0dBF65F7268EDAB4C8;
.loc 2 418 10
fma.rn.f64 %fd288, %fd286, %fd273, %fd287;
mov.f64 %fd289, 0d3F6C71C71C71C71C;
.loc 2 418 10
fma.rn.f64 %fd290, %fd288, %fd273, %fd289;
mov.f64 %fd291, 0d3FB5555555555555;
.loc 2 418 10
fma.rn.f64 %fd292, %fd290, %fd273, %fd291;
fma.rn.f64 %fd293, %fd292, %fd273, %fd260;
mul.f64 %fd294, %fd268, %fd293;
mul.f64 %fd295, %fd294, %fd2;
mul.f64 %fd296, %fd295, %fd382;
rcp.rn.f64 %fd297, %fd296;
mul.f64 %fd298, %fd297, 0dBC9A6A0D6F814637;
mov.f64 %fd299, 0d3FF40D931FF62706;
.loc 2 418 10
fma.rn.f64 %fd300, %fd297, %fd299, %fd298;
div.rn.f64 %fd400, %fd300, %fd266;
bra.uni BB43_47;
BB43_34:
.loc 2 418 10
cvt.rmi.f64.f64 %fd301, %fd397;
mul.f64 %fd302, %fd301, 0d3FE0000000000000;
cvt.rmi.f64.f64 %fd303, %fd302;
fma.rn.f64 %fd304, %fd303, 0dC000000000000000, %fd301;
setp.eq.f64 %p24, %fd304, 0d3FF0000000000000;
selp.f64 %fd400, 0d8000000000000000, 0d0000000000000000, %p24;
bra.uni BB43_47;
BB43_35:
setp.ltu.f64 %p25, %fd397, 0d0000000000000000;
.loc 2 418 10
@%p25 bra BB43_41;
mov.f64 %fd385, 0d3FF0000000000000;
.loc 2 418 10
setp.gt.f64 %p26, %fd397, 0d3FF8000000000000;
@%p26 bra BB43_38;
mov.f64 %fd398, %fd397;
bra.uni BB43_40;
BB43_38:
mov.f64 %fd399, %fd397;
BB43_39:
.loc 2 418 10
neg.f64 %fd308, %fd385;
fma.rn.f64 %fd385, %fd385, %fd399, %fd308;
add.f64 %fd399, %fd399, 0dBFF0000000000000;
setp.gt.f64 %p27, %fd399, 0d3FF8000000000000;
mov.f64 %fd398, %fd399;
@%p27 bra BB43_39;
BB43_40:
add.f64 %fd309, %fd398, 0dBFF0000000000000;
setp.ltu.f64 %p28, %fd397, 0d3FE0000000000000;
selp.f64 %fd310, %fd398, %fd309, %p28;
mov.f64 %fd311, 0dBE8B338C457183B6;
mov.f64 %fd312, 0dBDFE6BDF8CC487CD;
.loc 2 418 10
fma.rn.f64 %fd313, %fd312, %fd310, %fd311;
mov.f64 %fd314, 0d3EB31831766A0388;
.loc 2 418 10
fma.rn.f64 %fd315, %fd313, %fd310, %fd314;
mov.f64 %fd316, 0dBEB4FC07FC9F1563;
.loc 2 418 10
fma.rn.f64 %fd317, %fd315, %fd310, %fd316;
mov.f64 %fd318, 0dBEF51D59DCE6A679;
.loc 2 418 10
fma.rn.f64 %fd319, %fd317, %fd310, %fd318;
mov.f64 %fd320, 0d3F20C8A6351CB1F9;
.loc 2 418 10
fma.rn.f64 %fd321, %fd319, %fd310, %fd320;
mov.f64 %fd322, 0dBF2C364D9E00D4CA;
.loc 2 418 10
fma.rn.f64 %fd323, %fd321, %fd310, %fd322;
mov.f64 %fd324, 0dBF5317112046830B;
.loc 2 418 10
fma.rn.f64 %fd325, %fd323, %fd310, %fd324;
mov.f64 %fd326, 0d3F7D919C50FF9416;
.loc 2 418 10
fma.rn.f64 %fd327, %fd325, %fd310, %fd326;
mov.f64 %fd328, 0dBF83B4AF28728BB0;
.loc 2 418 10
fma.rn.f64 %fd329, %fd327, %fd310, %fd328;
mov.f64 %fd330, 0dBFA59AF103C171DC;
.loc 2 418 10
fma.rn.f64 %fd331, %fd329, %fd310, %fd330;
mov.f64 %fd332, 0d3FC5512320B45D97;
.loc 2 418 10
fma.rn.f64 %fd333, %fd331, %fd310, %fd332;
mov.f64 %fd334, 0dBFA5815E8FA27607;
.loc 2 418 10
fma.rn.f64 %fd335, %fd333, %fd310, %fd334;
mov.f64 %fd336, 0dBFE4FCF4026AFA4B;
.loc 2 418 10
fma.rn.f64 %fd337, %fd335, %fd310, %fd336;
mov.f64 %fd338, 0d3FE2788CFC6FB619;
.loc 2 418 10
fma.rn.f64 %fd339, %fd337, %fd310, %fd338;
mov.f64 %fd340, 0d3FF0000000000000;
.loc 2 418 10
fma.rn.f64 %fd341, %fd339, %fd310, %fd340;
mul.f64 %fd342, %fd341, %fd397;
setp.lt.f64 %p29, %fd397, 0d3FE0000000000000;
selp.f64 %fd343, %fd342, %fd341, %p29;
div.rn.f64 %fd400, %fd385, %fd343;
bra.uni BB43_47;
BB43_41:
.loc 2 418 10
cvt.rzi.f64.f64 %fd344, %fd397;
setp.neu.f64 %p30, %fd397, %fd344;
@%p30 bra BB43_43;
mov.f64 %fd400, 0dFFF8000000000000;
bra.uni BB43_47;
BB43_43:
.loc 2 418 10
setp.geu.f64 %p31, %fd397, 0dBFE0000000000000;
mov.f64 %fd394, %fd397;
mov.f64 %fd395, %fd397;
mov.f64 %fd396, %fd397;
@%p31 bra BB43_45;
BB43_44:
fma.rn.f64 %fd397, %fd397, %fd396, %fd397;
add.f64 %fd396, %fd396, 0d3FF0000000000000;
setp.lt.f64 %p32, %fd396, 0dBFE0000000000000;
mov.f64 %fd395, %fd397;
mov.f64 %fd394, %fd396;
@%p32 bra BB43_44;
BB43_45:
mov.f64 %fd345, 0dBE8B338C457183B6;
mov.f64 %fd346, 0dBDFE6BDF8CC487CD;
.loc 2 418 10
fma.rn.f64 %fd347, %fd346, %fd394, %fd345;
mov.f64 %fd348, 0d3EB31831766A0388;
.loc 2 418 10
fma.rn.f64 %fd349, %fd347, %fd394, %fd348;
mov.f64 %fd350, 0dBEB4FC07FC9F1563;
.loc 2 418 10
fma.rn.f64 %fd351, %fd349, %fd394, %fd350;
mov.f64 %fd352, 0dBEF51D59DCE6A679;
.loc 2 418 10
fma.rn.f64 %fd353, %fd351, %fd394, %fd352;
mov.f64 %fd354, 0d3F20C8A6351CB1F9;
.loc 2 418 10
fma.rn.f64 %fd355, %fd353, %fd394, %fd354;
mov.f64 %fd356, 0dBF2C364D9E00D4CA;
.loc 2 418 10
fma.rn.f64 %fd357, %fd355, %fd394, %fd356;
mov.f64 %fd358, 0dBF5317112046830B;
.loc 2 418 10
fma.rn.f64 %fd359, %fd357, %fd394, %fd358;
mov.f64 %fd360, 0d3F7D919C50FF9416;
.loc 2 418 10
fma.rn.f64 %fd361, %fd359, %fd394, %fd360;
mov.f64 %fd362, 0dBF83B4AF28728BB0;
.loc 2 418 10
fma.rn.f64 %fd363, %fd361, %fd394, %fd362;
mov.f64 %fd364, 0dBFA59AF103C171DC;
.loc 2 418 10
fma.rn.f64 %fd365, %fd363, %fd394, %fd364;
mov.f64 %fd366, 0d3FC5512320B45D97;
.loc 2 418 10
fma.rn.f64 %fd367, %fd365, %fd394, %fd366;
mov.f64 %fd368, 0dBFA5815E8FA27607;
.loc 2 418 10
fma.rn.f64 %fd369, %fd367, %fd394, %fd368;
mov.f64 %fd370, 0dBFE4FCF4026AFA4B;
.loc 2 418 10
fma.rn.f64 %fd371, %fd369, %fd394, %fd370;
mov.f64 %fd372, 0d3FE2788CFC6FB619;
.loc 2 418 10
fma.rn.f64 %fd373, %fd371, %fd394, %fd372;
mov.f64 %fd374, 0d3FF0000000000000;
.loc 2 418 10
fma.rn.f64 %fd375, %fd373, %fd394, %fd374;
mul.f64 %fd376, %fd395, %fd375;
rcp.rn.f64 %fd400, %fd376;
bra.uni BB43_47;
BB43_46:
.loc 2 418 10
add.f64 %fd400, %fd397, %fd397;
BB43_47:
.loc 1 60 42
mad.lo.s32 %r80, %r83, %r32, %r84;
mul.wide.s32 %rd12, %r80, 8;
add.s64 %rd13, %rd11, %rd12;
st.global.f64 [%rd13], %fd400;
.loc 1 60 22
add.s32 %r84, %r2, %r84;
.loc 1 60 1
setp.lt.s32 %p33, %r84, %r30;
@%p33 bra BB43_3;
BB43_48:
.loc 1 60 22
mov.u32 %r81, %nctaid.x;
mad.lo.s32 %r83, %r81, %r34, %r83;
.loc 1 60 1
setp.lt.s32 %p34, %r83, %r31;
@%p34 bra BB43_2;
BB43_49:
.loc 1 60 2
ret;
}
.visible .entry map_trunc_double(
.param .u32 map_trunc_double_param_0,
.param .u32 map_trunc_double_param_1,
.param .u64 map_trunc_double_param_2,
.param .u32 map_trunc_double_param_3,
.param .u64 map_trunc_double_param_4,
.param .u32 map_trunc_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_trunc_double_param_0];
ld.param.u32 %r13, [map_trunc_double_param_1];
ld.param.u64 %rd3, [map_trunc_double_param_2];
ld.param.u32 %r14, [map_trunc_double_param_3];
ld.param.u64 %rd4, [map_trunc_double_param_4];
ld.param.u32 %r15, [map_trunc_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 61 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 61 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB44_6;
.loc 1 61 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 61 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 61 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB44_2:
.loc 1 61 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB44_5;
.loc 1 61 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 61 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB44_4:
.loc 1 61 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 61 1
ld.global.f64 %fd1, [%rd6];
.loc 3 2800 10
cvt.rzi.f64.f64 %fd2, %fd1;
.loc 1 61 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 61 42
st.global.f64 [%rd8], %fd2;
.loc 1 61 22
add.s32 %r10, %r5, %r9;
.loc 1 61 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB44_4;
BB44_5:
.loc 1 61 22
add.s32 %r25, %r4, %r25;
.loc 1 61 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB44_2;
BB44_6:
.loc 1 61 2
ret;
}
.visible .entry map_y0_double(
.param .u32 map_y0_double_param_0,
.param .u32 map_y0_double_param_1,
.param .u64 map_y0_double_param_2,
.param .u32 map_y0_double_param_3,
.param .u64 map_y0_double_param_4,
.param .u32 map_y0_double_param_5
)
{
.local .align 4 .b8 __local_depot45[16];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<35>;
.reg .f32 %f<13>;
.reg .s32 %r<80>;
.reg .s64 %rd<31>;
.reg .f64 %fd<549>;
mov.u64 %SPL, __local_depot45;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r31, [map_y0_double_param_0];
ld.param.u32 %r32, [map_y0_double_param_1];
ld.param.u64 %rd1, [map_y0_double_param_2];
ld.param.u32 %r33, [map_y0_double_param_3];
ld.param.u64 %rd2, [map_y0_double_param_4];
ld.param.u32 %r34, [map_y0_double_param_5];
.loc 1 62 1
mov.u32 %r35, %ntid.x;
mov.u32 %r36, %ctaid.x;
mov.u32 %r37, %tid.x;
mad.lo.s32 %r70, %r35, %r36, %r37;
.loc 1 62 1
setp.ge.s32 %p1, %r70, %r32;
@%p1 bra BB45_56;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd28, %rd1;
BB45_2:
.loc 1 62 1
mov.u32 %r38, %ctaid.y;
mov.u32 %r39, %ntid.y;
mov.u32 %r40, %tid.y;
mad.lo.s32 %r71, %r39, %r38, %r40;
.loc 1 62 1
setp.ge.s32 %p2, %r71, %r31;
@%p2 bra BB45_55;
BB45_3:
.loc 1 62 1
mad.lo.s32 %r45, %r70, %r34, %r71;
mul.wide.s32 %rd4, %r45, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd1, [%rd5];
.loc 2 343 10
abs.f64 %fd2, %fd1;
setp.gtu.f64 %p3, %fd2, 0d3FE97F4A8F9D3F28;
@%p3 bra BB45_35;
mul.f64 %fd62, %fd2, %fd2;
mov.f64 %fd63, 0dBD13098C51C18514;
mov.f64 %fd64, 0d3C8EFBD0A1B77C65;
.loc 2 343 10
fma.rn.f64 %fd65, %fd64, %fd62, %fd63;
mov.f64 %fd66, 0d3D923102D2F5F2F5;
.loc 2 343 10
fma.rn.f64 %fd67, %fd65, %fd62, %fd66;
mov.f64 %fd68, 0dBE0A5F2DEE7D526E;
.loc 2 343 10
fma.rn.f64 %fd69, %fd67, %fd62, %fd68;
mov.f64 %fd70, 0d3E7BB77E758B38AF;
.loc 2 343 10
fma.rn.f64 %fd71, %fd69, %fd62, %fd70;
mov.f64 %fd72, 0dBEE3D1A206EC4F36;
.loc 2 343 10
fma.rn.f64 %fd73, %fd71, %fd62, %fd72;
mov.f64 %fd74, 0d3F4183DCD3ED6294;
.loc 2 343 10
fma.rn.f64 %fd75, %fd73, %fd62, %fd74;
mov.f64 %fd76, 0dBF903921CF04F123;
.loc 2 343 10
fma.rn.f64 %fd77, %fd75, %fd62, %fd76;
mov.f64 %fd78, 0d3FC5DB69D7753176;
.loc 2 343 10
fma.rn.f64 %fd79, %fd77, %fd62, %fd78;
add.f64 %fd80, %fd62, 0dBFDBA96740000000;
add.f64 %fd81, %fd80, 0d3E15A30C80000000;
mul.f64 %fd3, %fd79, %fd81;
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r73, %temp}, %fd2;
}
setp.lt.f64 %p4, %fd2, 0d7FF0000000000000;
setp.gt.f64 %p5, %fd2, 0d0000000000000000;
and.pred %p6, %p5, %p4;
@%p6 bra BB45_10;
abs.f64 %fd538, %fd2;
setp.gtu.f64 %p7, %fd538, 0d7FF0000000000000;
@%p7 bra BB45_9;
setp.neu.f64 %p8, %fd2, 0d0000000000000000;
@%p8 bra BB45_8;
mov.f64 %fd539, 0dFFF0000000000000;
bra.uni BB45_16;
BB45_8:
.loc 2 343 10
setp.eq.f64 %p9, %fd2, 0d7FF0000000000000;
selp.f64 %fd539, %fd2, 0dFFF8000000000000, %p9;
bra.uni BB45_16;
BB45_9:
.loc 2 343 10
add.f64 %fd539, %fd2, %fd2;
bra.uni BB45_16;
BB45_10:
.loc 2 343 10
setp.lt.u32 %p10, %r72, 1048576;
@%p10 bra BB45_12;
mov.u32 %r74, -1023;
bra.uni BB45_13;
BB45_12:
.loc 2 343 10
mul.f64 %fd83, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd83;
}
{
.reg .b32 %temp;
mov.b64 {%r73, %temp}, %fd83;
}
mov.u32 %r74, -1077;
BB45_13:
.loc 2 343 10
shr.s32 %r48, %r72, 20;
add.s32 %r75, %r74, %r48;
and.b32 %r49, %r72, -2146435073;
or.b32 %r50, %r49, 1072693248;
mov.b64 %fd537, {%r73, %r50};
setp.lt.u32 %p11, %r50, 1073127583;
@%p11 bra BB45_15;
{
.reg .b32 %temp;
mov.b64 {%r51, %temp}, %fd537;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd537;
}
add.s32 %r53, %r52, -1048576;
mov.b64 %fd537, {%r51, %r53};
add.s32 %r75, %r75, 1;
BB45_15:
add.f64 %fd84, %fd537, 0d3FF0000000000000;
mov.f64 %fd86, 0d3FF0000000000000;
.loc 2 343 10
// inline asm
cvt.rn.f32.f64 %f1,%fd84;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd85,%f2;
// inline asm
neg.f64 %fd87, %fd84;
fma.rn.f64 %fd88, %fd87, %fd85, %fd86;
fma.rn.f64 %fd89, %fd88, %fd88, %fd88;
fma.rn.f64 %fd90, %fd89, %fd85, %fd85;
add.f64 %fd91, %fd537, 0dBFF0000000000000;
mul.f64 %fd92, %fd91, %fd90;
fma.rn.f64 %fd93, %fd91, %fd90, %fd92;
mul.f64 %fd94, %fd93, %fd93;
mov.f64 %fd95, 0d3ED0EE258B7A8B04;
mov.f64 %fd96, 0d3EB1380B3AE80F1E;
.loc 2 343 10
fma.rn.f64 %fd97, %fd96, %fd94, %fd95;
mov.f64 %fd98, 0d3EF3B2669F02676F;
.loc 2 343 10
fma.rn.f64 %fd99, %fd97, %fd94, %fd98;
mov.f64 %fd100, 0d3F1745CBA9AB0956;
.loc 2 343 10
fma.rn.f64 %fd101, %fd99, %fd94, %fd100;
mov.f64 %fd102, 0d3F3C71C72D1B5154;
.loc 2 343 10
fma.rn.f64 %fd103, %fd101, %fd94, %fd102;
mov.f64 %fd104, 0d3F624924923BE72D;
.loc 2 343 10
fma.rn.f64 %fd105, %fd103, %fd94, %fd104;
mov.f64 %fd106, 0d3F8999999999A3C4;
.loc 2 343 10
fma.rn.f64 %fd107, %fd105, %fd94, %fd106;
mov.f64 %fd108, 0d3FB5555555555554;
.loc 2 343 10
fma.rn.f64 %fd109, %fd107, %fd94, %fd108;
sub.f64 %fd110, %fd91, %fd93;
add.f64 %fd111, %fd110, %fd110;
neg.f64 %fd112, %fd93;
fma.rn.f64 %fd113, %fd112, %fd91, %fd111;
mul.f64 %fd114, %fd90, %fd113;
mul.f64 %fd115, %fd109, %fd94;
fma.rn.f64 %fd116, %fd115, %fd93, %fd114;
cvt.rn.f64.s32 %fd117, %r75;
mov.f64 %fd118, 0d3FE62E42FEFA39EF;
.loc 2 343 10
fma.rn.f64 %fd119, %fd117, %fd118, %fd93;
neg.s32 %r54, %r75;
cvt.rn.f64.s32 %fd120, %r54;
fma.rn.f64 %fd121, %fd120, %fd118, %fd119;
sub.f64 %fd122, %fd121, %fd93;
sub.f64 %fd123, %fd116, %fd122;
mov.f64 %fd124, 0d3C7ABC9E3B39803F;
.loc 2 343 10
fma.rn.f64 %fd125, %fd117, %fd124, %fd123;
add.f64 %fd539, %fd119, %fd125;
abs.f64 %fd538, %fd2;
BB45_16:
mul.f64 %fd14, %fd539, 0d3FE45F306DC9C883;
setp.gtu.f64 %p12, %fd538, 0d400FB319F277BBE5;
@%p12 bra BB45_18;
add.f64 %fd126, %fd538, 0dC0033D152E971B40;
add.f64 %fd127, %fd126, 0d3CA0F539D7DA258E;
mov.f64 %fd128, 0dBCFCF8F9A8C294BC;
mov.f64 %fd129, 0dBCC0D18564C48C61;
.loc 2 343 10
fma.rn.f64 %fd130, %fd129, %fd127, %fd128;
mov.f64 %fd131, 0d3D3FAB983CAE498B;
.loc 2 343 10
fma.rn.f64 %fd132, %fd130, %fd127, %fd131;
mov.f64 %fd133, 0d3D7CD7C018579B88;
.loc 2 343 10
fma.rn.f64 %fd134, %fd132, %fd127, %fd133;
mov.f64 %fd135, 0dBDBBDD2342D64FDD;
.loc 2 343 10
fma.rn.f64 %fd136, %fd134, %fd127, %fd135;
mov.f64 %fd137, 0dBDF5C2D9416B1E2B;
.loc 2 343 10
fma.rn.f64 %fd138, %fd136, %fd127, %fd137;
mov.f64 %fd139, 0d3E32951D73174DD5;
.loc 2 343 10
fma.rn.f64 %fd140, %fd138, %fd127, %fd139;
mov.f64 %fd141, 0d3E67FF99802CAEB5;
.loc 2 343 10
fma.rn.f64 %fd142, %fd140, %fd127, %fd141;
mov.f64 %fd143, 0dBEA1CCE305C4C9F7;
.loc 2 343 10
fma.rn.f64 %fd144, %fd142, %fd127, %fd143;
mov.f64 %fd145, 0dBED232C77E29E1BB;
.loc 2 343 10
fma.rn.f64 %fd146, %fd144, %fd127, %fd145;
mov.f64 %fd147, 0d3F06ED3B9F0EF757;
.loc 2 343 10
fma.rn.f64 %fd148, %fd146, %fd127, %fd147;
mov.f64 %fd149, 0d3F315382BA096A62;
.loc 2 343 10
fma.rn.f64 %fd150, %fd148, %fd127, %fd149;
mov.f64 %fd151, 0dBF61F992590D1AE4;
.loc 2 343 10
fma.rn.f64 %fd152, %fd150, %fd127, %fd151;
mov.f64 %fd153, 0dBF81BB1CBE1A465F;
.loc 2 343 10
fma.rn.f64 %fd154, %fd152, %fd127, %fd153;
mov.f64 %fd155, 0d3FACFAE864368D84;
.loc 2 343 10
fma.rn.f64 %fd156, %fd154, %fd127, %fd155;
mov.f64 %fd157, 0d3FBBA1DEEA0294A3;
.loc 2 343 10
fma.rn.f64 %fd158, %fd156, %fd127, %fd157;
mov.f64 %fd159, 0dBFE09CDB36551280;
.loc 2 343 10
fma.rn.f64 %fd160, %fd158, %fd127, %fd159;
mul.f64 %fd15, %fd160, %fd127;
fma.rn.f64 %fd548, %fd14, %fd15, %fd3;
bra.uni BB45_54;
BB45_18:
.loc 2 343 10
setp.gtu.f64 %p13, %fd538, 0d401C58FD1A62F5EC;
@%p13 bra BB45_20;
add.f64 %fd161, %fd538, 0dC016148F5B2C2E45;
add.f64 %fd162, %fd161, 0dBC975054CD60A517;
mov.f64 %fd163, 0d3CF83FD1F333EB61;
mov.f64 %fd164, 0d3CBCB0A8F126B343;
.loc 2 343 10
fma.rn.f64 %fd165, %fd164, %fd162, %fd163;
mov.f64 %fd166, 0dBD4100E33E3FB413;
.loc 2 343 10
fma.rn.f64 %fd167, %fd165, %fd162, %fd166;
mov.f64 %fd168, 0dBD7846076D004627;
.loc 2 343 10
fma.rn.f64 %fd169, %fd167, %fd162, %fd168;
mov.f64 %fd170, 0d3DBE2F1D4F90720D;
.loc 2 343 10
fma.rn.f64 %fd171, %fd169, %fd162, %fd170;
mov.f64 %fd172, 0d3DF1D03B1E4A119B;
.loc 2 343 10
fma.rn.f64 %fd173, %fd171, %fd162, %fd172;
mov.f64 %fd174, 0dBE341D72B1B3BCE9;
.loc 2 343 10
fma.rn.f64 %fd175, %fd173, %fd162, %fd174;
mov.f64 %fd176, 0dBE62DA37CE2A9EF8;
.loc 2 343 10
fma.rn.f64 %fd177, %fd175, %fd162, %fd176;
mov.f64 %fd178, 0d3EA32E6D9974F763;
.loc 2 343 10
fma.rn.f64 %fd179, %fd177, %fd162, %fd178;
mov.f64 %fd180, 0d3ECAD77D744A1879;
.loc 2 343 10
fma.rn.f64 %fd181, %fd179, %fd162, %fd180;
mov.f64 %fd182, 0dBF0863F481A37337;
.loc 2 343 10
fma.rn.f64 %fd183, %fd181, %fd162, %fd182;
mov.f64 %fd184, 0dBF26F641F418F0F4;
.loc 2 343 10
fma.rn.f64 %fd185, %fd183, %fd162, %fd184;
mov.f64 %fd186, 0d3F627E31FE9A969E;
.loc 2 343 10
fma.rn.f64 %fd187, %fd185, %fd162, %fd186;
mov.f64 %fd188, 0d3F72F7FFE9025628;
.loc 2 343 10
fma.rn.f64 %fd189, %fd187, %fd162, %fd188;
mov.f64 %fd190, 0dBFAB2150CB41E8BF;
.loc 2 343 10
fma.rn.f64 %fd191, %fd189, %fd162, %fd190;
mov.f64 %fd192, 0dBF9F8F72E7A848DE;
.loc 2 343 10
fma.rn.f64 %fd193, %fd191, %fd162, %fd192;
mov.f64 %fd194, 0d3FD5C6E60A097823;
.loc 2 343 10
fma.rn.f64 %fd195, %fd193, %fd162, %fd194;
mul.f64 %fd16, %fd195, %fd162;
fma.rn.f64 %fd548, %fd14, %fd16, %fd3;
bra.uni BB45_54;
BB45_20:
.loc 2 343 10
setp.gtu.f64 %p14, %fd538, 0d402471FCB6A7A8C0;
@%p14 bra BB45_22;
add.f64 %fd196, %fd538, 0dC0214EB56CCCDECA;
add.f64 %fd197, %fd196, 0d3CB51970714C7C25;
mov.f64 %fd198, 0dBCF4B3A71AAAC629;
mov.f64 %fd199, 0dBCBDB7FFCF659E24;
.loc 2 343 10
fma.rn.f64 %fd200, %fd199, %fd197, %fd198;
mov.f64 %fd201, 0d3D417EC150ECDCE7;
.loc 2 343 10
fma.rn.f64 %fd202, %fd200, %fd197, %fd201;
mov.f64 %fd203, 0d3D7438F5EA1D10B2;
.loc 2 343 10
fma.rn.f64 %fd204, %fd202, %fd197, %fd203;
mov.f64 %fd205, 0dBDBEDAE7EC2C9E87;
.loc 2 343 10
fma.rn.f64 %fd206, %fd204, %fd197, %fd205;
mov.f64 %fd207, 0dBDECADD2C4B91F58;
.loc 2 343 10
fma.rn.f64 %fd208, %fd206, %fd197, %fd207;
mov.f64 %fd209, 0d3E34582C8EE12204;
.loc 2 343 10
fma.rn.f64 %fd210, %fd208, %fd197, %fd209;
mov.f64 %fd211, 0d3E5CEDA451DD20F8;
.loc 2 343 10
fma.rn.f64 %fd212, %fd210, %fd197, %fd211;
mov.f64 %fd213, 0dBEA30E8CC3165E2F;
.loc 2 343 10
fma.rn.f64 %fd214, %fd212, %fd197, %fd213;
mov.f64 %fd215, 0dBEC3324842BB1A2E;
.loc 2 343 10
fma.rn.f64 %fd216, %fd214, %fd197, %fd215;
mov.f64 %fd217, 0d3F07800BC54FBDDB;
.loc 2 343 10
fma.rn.f64 %fd218, %fd216, %fd197, %fd217;
mov.f64 %fd219, 0d3F1D79605276949A;
.loc 2 343 10
fma.rn.f64 %fd220, %fd218, %fd197, %fd219;
mov.f64 %fd221, 0dBF60E0D60385A629;
.loc 2 343 10
fma.rn.f64 %fd222, %fd220, %fd197, %fd221;
mov.f64 %fd223, 0dBF648E63600D82F3;
.loc 2 343 10
fma.rn.f64 %fd224, %fd222, %fd197, %fd223;
mov.f64 %fd225, 0d3FA68B984EC6493A;
.loc 2 343 10
fma.rn.f64 %fd226, %fd224, %fd197, %fd225;
mov.f64 %fd227, 0d3F900F7FCF183E0B;
.loc 2 343 10
fma.rn.f64 %fd228, %fd226, %fd197, %fd227;
mov.f64 %fd229, 0dBFD15F7977A772D4;
.loc 2 343 10
fma.rn.f64 %fd230, %fd228, %fd197, %fd229;
mul.f64 %fd17, %fd230, %fd197;
fma.rn.f64 %fd548, %fd14, %fd17, %fd3;
bra.uni BB45_54;
BB45_22:
.loc 2 343 10
abs.f64 %fd231, %fd538;
setp.neu.f64 %p15, %fd231, 0d7FF0000000000000;
@%p15 bra BB45_24;
mov.f64 %fd232, 0d0000000000000000;
.loc 2 343 10
fma.rn.f64 %fd548, %fd14, %fd232, %fd3;
bra.uni BB45_54;
BB45_24:
add.u64 %rd6, %SP, 4;
.loc 2 343 10
// inline asm
cvt.rn.f32.f64 %f5,%fd538;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd234,%f6;
// inline asm
neg.f64 %fd235, %fd538;
mov.f64 %fd236, 0d3FF0000000000000;
.loc 2 343 10
fma.rn.f64 %fd237, %fd235, %fd234, %fd236;
fma.rn.f64 %fd238, %fd237, %fd237, %fd237;
fma.rn.f64 %fd239, %fd238, %fd234, %fd234;
mul.f64 %fd240, %fd239, %fd239;
mov.f64 %fd241, 0d409927467A655012;
mov.f64 %fd242, 0dC0D115CB8C11A9DC;
.loc 2 343 10
fma.rn.f64 %fd243, %fd242, %fd240, %fd241;
mov.f64 %fd244, 0dC05751787E247BD4;
.loc 2 343 10
fma.rn.f64 %fd245, %fd243, %fd240, %fd244;
mov.f64 %fd246, 0d401704C4E5FC36B2;
.loc 2 343 10
fma.rn.f64 %fd247, %fd245, %fd240, %fd246;
mov.f64 %fd248, 0dBFE15B747A2FD531;
.loc 2 343 10
fma.rn.f64 %fd249, %fd247, %fd240, %fd248;
mov.f64 %fd250, 0d3FBA7FEACF6CB79B;
.loc 2 343 10
fma.rn.f64 %fd251, %fd249, %fd240, %fd250;
mov.f64 %fd252, 0dBFAFFFFFEDDCF548;
.loc 2 343 10
fma.rn.f64 %fd253, %fd251, %fd240, %fd252;
mov.f64 %fd254, 0d3FEFFFFFFFFFC9E5;
.loc 2 343 10
fma.rn.f64 %fd255, %fd253, %fd240, %fd254;
mov.f64 %fd256, 0d410ECD4523B12B84;
mov.f64 %fd257, 0dC14602FE1C34685E;
.loc 2 343 10
fma.rn.f64 %fd258, %fd257, %fd240, %fd256;
mov.f64 %fd259, 0dC0C7A2FC1972F05A;
.loc 2 343 10
fma.rn.f64 %fd260, %fd258, %fd240, %fd259;
mov.f64 %fd261, 0d407EBA131F7E5BEB;
.loc 2 343 10
fma.rn.f64 %fd262, %fd260, %fd240, %fd261;
mov.f64 %fd263, 0dC0373B92E6E7CC7D;
.loc 2 343 10
fma.rn.f64 %fd264, %fd262, %fd240, %fd263;
mov.f64 %fd265, 0d3FFA31BEE63A2F08;
.loc 2 343 10
fma.rn.f64 %fd266, %fd264, %fd240, %fd265;
mov.f64 %fd267, 0dBFCAD320104D5D05;
.loc 2 343 10
fma.rn.f64 %fd268, %fd266, %fd240, %fd267;
mov.f64 %fd269, 0d3FB0AAAA9C76D07E;
.loc 2 343 10
fma.rn.f64 %fd270, %fd268, %fd240, %fd269;
mov.f64 %fd271, 0dBFBFFFFFFFFDACEC;
.loc 2 343 10
fma.rn.f64 %fd272, %fd270, %fd240, %fd271;
fma.rn.f64 %fd18, %fd272, %fd239, %fd538;
rsqrt.approx.f64 %fd273, %fd538;
mul.f64 %fd274, %fd273, 0d3FE9884533D43651;
mul.f64 %fd19, %fd274, %fd255;
mul.f64 %fd275, %fd18, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r76, %fd275;
cvta.to.local.u64 %rd7, %rd6;
.loc 2 343 10
st.local.u32 [%rd7], %r76;
cvt.rn.f64.s32 %fd276, %r76;
neg.f64 %fd277, %fd276;
mov.f64 %fd278, 0d3FF921FB54442D18;
.loc 2 343 10
fma.rn.f64 %fd279, %fd277, %fd278, %fd18;
mov.f64 %fd280, 0d3C91A62633145C00;
.loc 2 343 10
fma.rn.f64 %fd281, %fd277, %fd280, %fd279;
mov.f64 %fd282, 0d397B839A252049C0;
.loc 2 343 10
fma.rn.f64 %fd540, %fd277, %fd282, %fd281;
abs.f64 %fd283, %fd18;
setp.leu.f64 %p16, %fd283, 0d41E0000000000000;
@%p16 bra BB45_26;
// Callseq Start 8
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd18;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 2 343 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd540, [retval0+0];
}
// Callseq End 8
ld.local.u32 %r76, [%rd7];
BB45_26:
and.b32 %r55, %r76, 3;
cvt.rn.f64.s32 %fd284, %r55;
add.f64 %fd285, %fd540, 0dBFE921FB54442D18;
fma.rn.f64 %fd541, %fd284, 0d3FF921FB54442D18, %fd285;
abs.f64 %fd286, %fd541;
setp.neu.f64 %p17, %fd286, 0d7FF0000000000000;
@%p17 bra BB45_28;
mov.f64 %fd287, 0d0000000000000000;
.loc 2 343 10
mul.rn.f64 %fd541, %fd541, %fd287;
BB45_28:
add.u64 %rd10, %SP, 0;
.loc 2 343 10
mul.f64 %fd288, %fd541, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r77, %fd288;
cvta.to.local.u64 %rd11, %rd10;
.loc 2 343 10
st.local.u32 [%rd11], %r77;
cvt.rn.f64.s32 %fd289, %r77;
neg.f64 %fd290, %fd289;
fma.rn.f64 %fd292, %fd290, %fd278, %fd541;
fma.rn.f64 %fd294, %fd290, %fd280, %fd292;
fma.rn.f64 %fd542, %fd290, %fd282, %fd294;
abs.f64 %fd296, %fd541;
setp.leu.f64 %p18, %fd296, 0d41E0000000000000;
@%p18 bra BB45_30;
// Callseq Start 9
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd541;
.param .b64 param1;
st.param.b64 [param1+0], %rd10;
.param .b64 retval0;
.loc 2 343 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd542, [retval0+0];
}
// Callseq End 9
ld.local.u32 %r77, [%rd11];
BB45_30:
add.s32 %r21, %r77, 1;
shl.b32 %r56, %r21, 3;
and.b32 %r57, %r56, 8;
and.b32 %r58, %r21, 1;
setp.eq.b32 %p19, %r58, 1;
not.pred %p20, %p19;
selp.f64 %fd297, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p20;
mul.wide.u32 %rd14, %r57, 8;
mov.u64 %rd15, __cudart_sin_cos_coeffs;
add.s64 %rd16, %rd15, %rd14;
.loc 2 343 10
ld.const.f64 %fd298, [%rd16+8];
mul.rn.f64 %fd29, %fd542, %fd542;
fma.rn.f64 %fd299, %fd297, %fd29, %fd298;
ld.const.f64 %fd300, [%rd16+16];
fma.rn.f64 %fd301, %fd299, %fd29, %fd300;
ld.const.f64 %fd302, [%rd16+24];
fma.rn.f64 %fd303, %fd301, %fd29, %fd302;
ld.const.f64 %fd304, [%rd16+32];
fma.rn.f64 %fd305, %fd303, %fd29, %fd304;
ld.const.f64 %fd306, [%rd16+40];
fma.rn.f64 %fd307, %fd305, %fd29, %fd306;
ld.const.f64 %fd308, [%rd16+48];
fma.rn.f64 %fd30, %fd307, %fd29, %fd308;
fma.rn.f64 %fd543, %fd30, %fd542, %fd542;
@%p20 bra BB45_32;
fma.rn.f64 %fd543, %fd30, %fd29, %fd236;
BB45_32:
and.b32 %r59, %r21, 2;
setp.eq.s32 %p21, %r59, 0;
@%p21 bra BB45_34;
mov.f64 %fd310, 0d0000000000000000;
mov.f64 %fd311, 0dBFF0000000000000;
.loc 2 343 10
fma.rn.f64 %fd543, %fd543, %fd311, %fd310;
BB45_34:
mul.f64 %fd36, %fd19, %fd543;
fma.rn.f64 %fd548, %fd14, %fd36, %fd3;
bra.uni BB45_54;
BB45_35:
.loc 2 343 10
setp.gtu.f64 %p22, %fd2, 0d4000347C4AB37B18;
@%p22 bra BB45_37;
add.f64 %fd312, %fd2, 0dBFEC982EB8D417EA;
add.f64 %fd313, %fd312, 0dBC7EA9D270347F83;
mov.f64 %fd314, 0d3F3D054B05D3C52D;
mov.f64 %fd315, 0dBF01630132D75FC3;
.loc 2 343 10
fma.rn.f64 %fd316, %fd315, %fd313, %fd314;
mov.f64 %fd317, 0dBF66DAC0B314B2E5;
.loc 2 343 10
fma.rn.f64 %fd318, %fd316, %fd313, %fd317;
mov.f64 %fd319, 0d3F86A5D1DE76263F;
.loc 2 343 10
fma.rn.f64 %fd320, %fd318, %fd313, %fd319;
mov.f64 %fd321, 0dBF9FD16652824592;
.loc 2 343 10
fma.rn.f64 %fd322, %fd320, %fd313, %fd321;
mov.f64 %fd323, 0d3FB0F69A9CC79FBD;
.loc 2 343 10
fma.rn.f64 %fd324, %fd322, %fd313, %fd323;
mov.f64 %fd325, 0dBFBCCE40EF15583E;
.loc 2 343 10
fma.rn.f64 %fd326, %fd324, %fd313, %fd325;
mov.f64 %fd327, 0d3FC446B11780E4FC;
.loc 2 343 10
fma.rn.f64 %fd328, %fd326, %fd313, %fd327;
mov.f64 %fd329, 0dBFC89AE7E19621F7;
.loc 2 343 10
fma.rn.f64 %fd330, %fd328, %fd313, %fd329;
mov.f64 %fd331, 0d3FCACBA1B38EF7B8;
.loc 2 343 10
fma.rn.f64 %fd332, %fd330, %fd313, %fd331;
mov.f64 %fd333, 0dBFCB4166A03BBFA5;
.loc 2 343 10
fma.rn.f64 %fd334, %fd332, %fd313, %fd333;
mov.f64 %fd335, 0d3FCACCA4D5D4889A;
.loc 2 343 10
fma.rn.f64 %fd336, %fd334, %fd313, %fd335;
mov.f64 %fd337, 0dBFCA1455932B9392;
.loc 2 343 10
fma.rn.f64 %fd338, %fd336, %fd313, %fd337;
mov.f64 %fd339, 0d3FC96D8DB8D844EC;
.loc 2 343 10
fma.rn.f64 %fd340, %fd338, %fd313, %fd339;
mov.f64 %fd341, 0dBFC8F7FB77522EDF;
.loc 2 343 10
fma.rn.f64 %fd342, %fd340, %fd313, %fd341;
mov.f64 %fd343, 0d3FC8C0926ABC9AB0;
.loc 2 343 10
fma.rn.f64 %fd344, %fd342, %fd313, %fd343;
mov.f64 %fd345, 0dBFC8D35B8FEA468C;
.loc 2 343 10
fma.rn.f64 %fd346, %fd344, %fd313, %fd345;
mov.f64 %fd347, 0d3FC9424B8A0C8F94;
.loc 2 343 10
fma.rn.f64 %fd348, %fd346, %fd313, %fd347;
mov.f64 %fd349, 0dBFCA396A7F3403EF;
.loc 2 343 10
fma.rn.f64 %fd350, %fd348, %fd313, %fd349;
mov.f64 %fd351, 0d3FCC068086C37055;
.loc 2 343 10
fma.rn.f64 %fd352, %fd350, %fd313, %fd351;
mov.f64 %fd353, 0dBFCCF18E6A4C5C4E;
.loc 2 343 10
fma.rn.f64 %fd354, %fd352, %fd313, %fd353;
mov.f64 %fd355, 0d3FCC3B1338AF4239;
.loc 2 343 10
fma.rn.f64 %fd356, %fd354, %fd313, %fd355;
mov.f64 %fd357, 0dBFDF7E38A46D70DB;
.loc 2 343 10
fma.rn.f64 %fd358, %fd356, %fd313, %fd357;
mov.f64 %fd359, 0d3FEC24371844B88A;
.loc 2 343 10
fma.rn.f64 %fd360, %fd358, %fd313, %fd359;
mul.f64 %fd548, %fd360, %fd313;
bra.uni BB45_54;
BB45_37:
.loc 2 343 10
setp.gtu.f64 %p23, %fd2, 0d40161663B5D9A628;
@%p23 bra BB45_39;
add.f64 %fd361, %fd2, 0dC00FA9534D98569C;
add.f64 %fd362, %fd361, 0d3C9F06AE7804384E;
mov.f64 %fd363, 0dBCD2434958151AC7;
mov.f64 %fd364, 0dBCDAEA62AC8BDA68;
.loc 2 343 10
fma.rn.f64 %fd365, %fd364, %fd362, %fd363;
mov.f64 %fd366, 0d3D11C24A40D33FE1;
.loc 2 343 10
fma.rn.f64 %fd367, %fd365, %fd362, %fd366;
mov.f64 %fd368, 0d3D237CD62FA08CA4;
.loc 2 343 10
fma.rn.f64 %fd369, %fd367, %fd362, %fd368;
mov.f64 %fd370, 0dBD43902E0298C52A;
.loc 2 343 10
fma.rn.f64 %fd371, %fd369, %fd362, %fd370;
mov.f64 %fd372, 0dBD1DDAAD11CAB40F;
.loc 2 343 10
fma.rn.f64 %fd373, %fd371, %fd362, %fd372;
mov.f64 %fd374, 0dBD5209D9F06D7DE4;
.loc 2 343 10
fma.rn.f64 %fd375, %fd373, %fd362, %fd374;
mov.f64 %fd376, 0d3D8BB9F464468E1A;
.loc 2 343 10
fma.rn.f64 %fd377, %fd375, %fd362, %fd376;
mov.f64 %fd378, 0dBDA8F67B07D1B440;
.loc 2 343 10
fma.rn.f64 %fd379, %fd377, %fd362, %fd378;
mov.f64 %fd380, 0d3DC7C8D60F9EAECF;
.loc 2 343 10
fma.rn.f64 %fd381, %fd379, %fd362, %fd380;
mov.f64 %fd382, 0dBDE9703405B49A8D;
.loc 2 343 10
fma.rn.f64 %fd383, %fd381, %fd362, %fd382;
mov.f64 %fd384, 0d3E0A6B64E76417E4;
.loc 2 343 10
fma.rn.f64 %fd385, %fd383, %fd362, %fd384;
mov.f64 %fd386, 0dBE2F6B5AFB2F1359;
.loc 2 343 10
fma.rn.f64 %fd387, %fd385, %fd362, %fd386;
mov.f64 %fd388, 0d3E54526B71C21EC1;
.loc 2 343 10
fma.rn.f64 %fd389, %fd387, %fd362, %fd388;
mov.f64 %fd390, 0dBE5776DBCBBC8E1D;
.loc 2 343 10
fma.rn.f64 %fd391, %fd389, %fd362, %fd390;
mov.f64 %fd392, 0dBE93B211FC2DF90E;
.loc 2 343 10
fma.rn.f64 %fd393, %fd391, %fd362, %fd392;
mov.f64 %fd394, 0dBED486372E8562DC;
.loc 2 343 10
fma.rn.f64 %fd395, %fd393, %fd362, %fd394;
mov.f64 %fd396, 0d3F0AB2C1FBC3A254;
.loc 2 343 10
fma.rn.f64 %fd397, %fd395, %fd362, %fd396;
mov.f64 %fd398, 0d3F299827653353B8;
.loc 2 343 10
fma.rn.f64 %fd399, %fd397, %fd362, %fd398;
mov.f64 %fd400, 0dBF61E32BC4ED7084;
.loc 2 343 10
fma.rn.f64 %fd401, %fd399, %fd362, %fd400;
mov.f64 %fd402, 0dBF7C116FDC599A09;
.loc 2 343 10
fma.rn.f64 %fd403, %fd401, %fd362, %fd402;
mov.f64 %fd404, 0d3FADF6D59BF50C77;
.loc 2 343 10
fma.rn.f64 %fd405, %fd403, %fd362, %fd404;
mov.f64 %fd406, 0d3FAA09C92903680B;
.loc 2 343 10
fma.rn.f64 %fd407, %fd405, %fd362, %fd406;
mov.f64 %fd408, 0dBFD9C34256A12A0B;
.loc 2 343 10
fma.rn.f64 %fd409, %fd407, %fd362, %fd408;
mul.f64 %fd548, %fd409, %fd362;
bra.uni BB45_54;
BB45_39:
.loc 2 343 10
setp.gtu.f64 %p24, %fd2, 0d40214EF30C0C06ED;
@%p24 bra BB45_41;
add.f64 %fd410, %fd2, 0dC01C581DC4E72103;
add.f64 %fd411, %fd410, 0d3C99774A495F56CF;
mov.f64 %fd412, 0dBD3F443BB4F53D75;
mov.f64 %fd413, 0d3CF1CB3ABA718B8E;
.loc 2 343 10
fma.rn.f64 %fd414, %fd413, %fd411, %fd412;
mov.f64 %fd415, 0dBD770F737BD6A786;
.loc 2 343 10
fma.rn.f64 %fd416, %fd414, %fd411, %fd415;
mov.f64 %fd417, 0d3DBF0E9A20459E14;
.loc 2 343 10
fma.rn.f64 %fd418, %fd416, %fd411, %fd417;
mov.f64 %fd419, 0d3DEFA6B137D5E108;
.loc 2 343 10
fma.rn.f64 %fd420, %fd418, %fd411, %fd419;
mov.f64 %fd421, 0dBE344296729FB7FA;
.loc 2 343 10
fma.rn.f64 %fd422, %fd420, %fd411, %fd421;
mov.f64 %fd423, 0dBE60A2813A80DFAA;
.loc 2 343 10
fma.rn.f64 %fd424, %fd422, %fd411, %fd423;
mov.f64 %fd425, 0d3EA34AA737A83EB4;
.loc 2 343 10
fma.rn.f64 %fd426, %fd424, %fd411, %fd425;
mov.f64 %fd427, 0d3EC6A9227332D03C;
.loc 2 343 10
fma.rn.f64 %fd428, %fd426, %fd411, %fd427;
mov.f64 %fd429, 0dBF08177E4F93C81E;
.loc 2 343 10
fma.rn.f64 %fd430, %fd428, %fd411, %fd429;
mov.f64 %fd431, 0dBF226DD71E391775;
.loc 2 343 10
fma.rn.f64 %fd432, %fd430, %fd411, %fd431;
mov.f64 %fd433, 0d3F61D35E85FD7B22;
.loc 2 343 10
fma.rn.f64 %fd434, %fd432, %fd411, %fd433;
mov.f64 %fd435, 0d3F6B2F14A955285C;
.loc 2 343 10
fma.rn.f64 %fd436, %fd434, %fd411, %fd435;
mov.f64 %fd437, 0dBFA8969C64CBF388;
.loc 2 343 10
fma.rn.f64 %fd438, %fd436, %fd411, %fd437;
mov.f64 %fd439, 0dBF95AEF611FC4D5A;
.loc 2 343 10
fma.rn.f64 %fd440, %fd438, %fd411, %fd439;
mov.f64 %fd441, 0d3FD334CCA0697A5A;
.loc 2 343 10
fma.rn.f64 %fd442, %fd440, %fd411, %fd441;
mul.f64 %fd548, %fd442, %fd411;
bra.uni BB45_54;
BB45_41:
.loc 2 343 10
abs.f64 %fd443, %fd2;
setp.neu.f64 %p25, %fd443, 0d7FF0000000000000;
@%p25 bra BB45_43;
mov.f64 %fd548, 0d0000000000000000;
bra.uni BB45_54;
BB45_43:
add.u64 %rd17, %SP, 12;
.loc 2 343 10
// inline asm
cvt.rn.f32.f64 %f9,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd446,%f10;
// inline asm
neg.f64 %fd447, %fd2;
mov.f64 %fd448, 0d3FF0000000000000;
.loc 2 343 10
fma.rn.f64 %fd449, %fd447, %fd446, %fd448;
fma.rn.f64 %fd450, %fd449, %fd449, %fd449;
fma.rn.f64 %fd451, %fd450, %fd446, %fd446;
mul.f64 %fd452, %fd451, %fd451;
mov.f64 %fd453, 0d4093F56A049CDDE7;
mov.f64 %fd454, 0dC0C5E91E6AC3AD03;
.loc 2 343 10
fma.rn.f64 %fd455, %fd454, %fd452, %fd453;
mov.f64 %fd456, 0dC05572D39DFB8433;
.loc 2 343 10
fma.rn.f64 %fd457, %fd455, %fd452, %fd456;
mov.f64 %fd458, 0d4016A6041CAA59E5;
.loc 2 343 10
fma.rn.f64 %fd459, %fd457, %fd452, %fd458;
mov.f64 %fd460, 0dBFE155E3A0493880;
.loc 2 343 10
fma.rn.f64 %fd461, %fd459, %fd452, %fd460;
mov.f64 %fd462, 0d3FBA7FB92F417F7F;
.loc 2 343 10
fma.rn.f64 %fd463, %fd461, %fd452, %fd462;
mov.f64 %fd464, 0dBFAFFFFFB12E32F5;
.loc 2 343 10
fma.rn.f64 %fd465, %fd463, %fd452, %fd464;
mov.f64 %fd466, 0d3FEFFFFFFFFECED5;
.loc 2 343 10
fma.rn.f64 %fd467, %fd465, %fd452, %fd466;
mov.f64 %fd468, 0dC15709C79AAC5813;
mov.f64 %fd469, 0d418A86A64BE101DC;
.loc 2 343 10
fma.rn.f64 %fd470, %fd469, %fd452, %fd468;
mov.f64 %fd471, 0d41142A31C980A287;
.loc 2 343 10
fma.rn.f64 %fd472, %fd470, %fd452, %fd471;
mov.f64 %fd473, 0dC0C9CBE68930485D;
.loc 2 343 10
fma.rn.f64 %fd474, %fd472, %fd452, %fd473;
mov.f64 %fd475, 0d407F583E14E8A4E8;
.loc 2 343 10
fma.rn.f64 %fd476, %fd474, %fd452, %fd475;
mov.f64 %fd477, 0dC0374A629C650680;
.loc 2 343 10
fma.rn.f64 %fd478, %fd476, %fd452, %fd477;
mov.f64 %fd479, 0d3FFA32A7AF17FAE9;
.loc 2 343 10
fma.rn.f64 %fd480, %fd478, %fd452, %fd479;
mov.f64 %fd481, 0dBFCAD32497785CD6;
.loc 2 343 10
fma.rn.f64 %fd482, %fd480, %fd452, %fd481;
mov.f64 %fd483, 0d3FB0AAAA9FB75F7B;
.loc 2 343 10
fma.rn.f64 %fd484, %fd482, %fd452, %fd483;
mov.f64 %fd485, 0dBFBFFFFFFFFE320F;
.loc 2 343 10
fma.rn.f64 %fd486, %fd484, %fd452, %fd485;
fma.rn.f64 %fd42, %fd486, %fd451, %fd2;
rsqrt.approx.f64 %fd487, %fd2;
mul.f64 %fd488, %fd487, 0d3FE9884533D43651;
mul.f64 %fd43, %fd488, %fd467;
mul.f64 %fd489, %fd42, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r78, %fd489;
cvta.to.local.u64 %rd18, %rd17;
.loc 2 343 10
st.local.u32 [%rd18], %r78;
cvt.rn.f64.s32 %fd490, %r78;
neg.f64 %fd491, %fd490;
mov.f64 %fd492, 0d3FF921FB54442D18;
.loc 2 343 10
fma.rn.f64 %fd493, %fd491, %fd492, %fd42;
mov.f64 %fd494, 0d3C91A62633145C00;
.loc 2 343 10
fma.rn.f64 %fd495, %fd491, %fd494, %fd493;
mov.f64 %fd496, 0d397B839A252049C0;
.loc 2 343 10
fma.rn.f64 %fd544, %fd491, %fd496, %fd495;
abs.f64 %fd497, %fd42;
setp.leu.f64 %p26, %fd497, 0d41E0000000000000;
@%p26 bra BB45_45;
// Callseq Start 10
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd42;
.param .b64 param1;
st.param.b64 [param1+0], %rd17;
.param .b64 retval0;
.loc 2 343 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd544, [retval0+0];
}
// Callseq End 10
ld.local.u32 %r78, [%rd18];
BB45_45:
and.b32 %r60, %r78, 3;
cvt.rn.f64.s32 %fd498, %r60;
add.f64 %fd499, %fd544, 0dC002D97C7F3321D2;
fma.rn.f64 %fd545, %fd498, 0d3FF921FB54442D18, %fd499;
abs.f64 %fd500, %fd545;
setp.neu.f64 %p27, %fd500, 0d7FF0000000000000;
@%p27 bra BB45_47;
mov.f64 %fd501, 0d0000000000000000;
.loc 2 343 10
mul.rn.f64 %fd545, %fd545, %fd501;
BB45_47:
add.u64 %rd21, %SP, 8;
.loc 2 343 10
mul.f64 %fd502, %fd545, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r79, %fd502;
cvta.to.local.u64 %rd22, %rd21;
.loc 2 343 10
st.local.u32 [%rd22], %r79;
cvt.rn.f64.s32 %fd503, %r79;
neg.f64 %fd504, %fd503;
fma.rn.f64 %fd506, %fd504, %fd492, %fd545;
fma.rn.f64 %fd508, %fd504, %fd494, %fd506;
fma.rn.f64 %fd546, %fd504, %fd496, %fd508;
abs.f64 %fd510, %fd545;
setp.leu.f64 %p28, %fd510, 0d41E0000000000000;
@%p28 bra BB45_49;
// Callseq Start 11
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd545;
.param .b64 param1;
st.param.b64 [param1+0], %rd21;
.param .b64 retval0;
.loc 2 343 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd546, [retval0+0];
}
// Callseq End 11
ld.local.u32 %r79, [%rd22];
BB45_49:
add.s32 %r28, %r79, 1;
shl.b32 %r61, %r28, 3;
and.b32 %r62, %r61, 8;
and.b32 %r63, %r28, 1;
setp.eq.b32 %p29, %r63, 1;
not.pred %p30, %p29;
selp.f64 %fd511, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p30;
mul.wide.u32 %rd25, %r62, 8;
mov.u64 %rd26, __cudart_sin_cos_coeffs;
add.s64 %rd27, %rd26, %rd25;
.loc 2 343 10
ld.const.f64 %fd512, [%rd27+8];
mul.rn.f64 %fd53, %fd546, %fd546;
fma.rn.f64 %fd513, %fd511, %fd53, %fd512;
ld.const.f64 %fd514, [%rd27+16];
fma.rn.f64 %fd515, %fd513, %fd53, %fd514;
ld.const.f64 %fd516, [%rd27+24];
fma.rn.f64 %fd517, %fd515, %fd53, %fd516;
ld.const.f64 %fd518, [%rd27+32];
fma.rn.f64 %fd519, %fd517, %fd53, %fd518;
ld.const.f64 %fd520, [%rd27+40];
fma.rn.f64 %fd521, %fd519, %fd53, %fd520;
ld.const.f64 %fd522, [%rd27+48];
fma.rn.f64 %fd54, %fd521, %fd53, %fd522;
fma.rn.f64 %fd547, %fd54, %fd546, %fd546;
@%p30 bra BB45_51;
fma.rn.f64 %fd547, %fd54, %fd53, %fd448;
BB45_51:
and.b32 %r64, %r28, 2;
setp.eq.s32 %p31, %r64, 0;
@%p31 bra BB45_53;
mov.f64 %fd524, 0d0000000000000000;
mov.f64 %fd525, 0dBFF0000000000000;
.loc 2 343 10
fma.rn.f64 %fd547, %fd547, %fd525, %fd524;
BB45_53:
mul.f64 %fd548, %fd43, %fd547;
BB45_54:
setp.lt.f64 %p32, %fd1, 0d0000000000000000;
selp.f64 %fd526, 0dFFF8000000000000, %fd548, %p32;
.loc 1 62 42
mad.lo.s32 %r65, %r70, %r33, %r71;
mul.wide.s32 %rd29, %r65, 8;
add.s64 %rd30, %rd28, %rd29;
st.global.f64 [%rd30], %fd526;
.loc 1 62 22
mov.u32 %r67, %nctaid.y;
mad.lo.s32 %r71, %r67, %r39, %r71;
.loc 1 62 1
setp.lt.s32 %p33, %r71, %r31;
@%p33 bra BB45_3;
BB45_55:
.loc 1 62 22
mov.u32 %r68, %nctaid.x;
mad.lo.s32 %r70, %r68, %r35, %r70;
.loc 1 62 1
setp.lt.s32 %p34, %r70, %r32;
@%p34 bra BB45_2;
BB45_56:
.loc 1 62 2
ret;
}
.visible .entry map_y1_double(
.param .u32 map_y1_double_param_0,
.param .u32 map_y1_double_param_1,
.param .u64 map_y1_double_param_2,
.param .u32 map_y1_double_param_3,
.param .u64 map_y1_double_param_4,
.param .u32 map_y1_double_param_5
)
{
.local .align 4 .b8 __local_depot46[16];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<39>;
.reg .f32 %f<13>;
.reg .s32 %r<80>;
.reg .s64 %rd<31>;
.reg .f64 %fd<533>;
mov.u64 %SPL, __local_depot46;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r31, [map_y1_double_param_0];
ld.param.u32 %r32, [map_y1_double_param_1];
ld.param.u64 %rd1, [map_y1_double_param_2];
ld.param.u32 %r33, [map_y1_double_param_3];
ld.param.u64 %rd2, [map_y1_double_param_4];
ld.param.u32 %r34, [map_y1_double_param_5];
.loc 1 63 1
mov.u32 %r35, %ntid.x;
mov.u32 %r36, %ctaid.x;
mov.u32 %r37, %tid.x;
mad.lo.s32 %r70, %r35, %r36, %r37;
.loc 1 63 1
setp.ge.s32 %p1, %r70, %r32;
@%p1 bra BB46_61;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd28, %rd1;
BB46_2:
.loc 1 63 1
mov.u32 %r38, %ctaid.y;
mov.u32 %r39, %ntid.y;
mov.u32 %r40, %tid.y;
mad.lo.s32 %r71, %r39, %r38, %r40;
.loc 1 63 1
setp.ge.s32 %p2, %r71, %r31;
@%p2 bra BB46_60;
BB46_3:
.loc 1 63 1
mad.lo.s32 %r45, %r70, %r34, %r71;
mul.wide.s32 %rd4, %r45, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd1, [%rd5];
.loc 2 348 10
abs.f64 %fd2, %fd1;
setp.lt.f64 %p3, %fd2, 0d000730D67819E8D2;
@%p3 bra BB46_56;
setp.gtu.f64 %p4, %fd2, 0d3FF4C6F208132576;
@%p4 bra BB46_37;
mul.f64 %fd64, %fd2, %fd2;
mov.f64 %fd65, 0dBDCF0B5B1FB7B95E;
mov.f64 %fd66, 0d3D5249F90687428C;
.loc 2 348 10
fma.rn.f64 %fd67, %fd66, %fd64, %fd65;
mov.f64 %fd68, 0d3E432E589311FA14;
.loc 2 348 10
fma.rn.f64 %fd69, %fd67, %fd64, %fd68;
mov.f64 %fd70, 0dBEB0A780AA4A92E9;
.loc 2 348 10
fma.rn.f64 %fd71, %fd69, %fd64, %fd70;
mov.f64 %fd72, 0d3F12C7DBFFCAEC2B;
.loc 2 348 10
fma.rn.f64 %fd73, %fd71, %fd64, %fd72;
mov.f64 %fd74, 0dBF6835B97894BA4A;
.loc 2 348 10
fma.rn.f64 %fd75, %fd73, %fd64, %fd74;
mov.f64 %fd76, 0d3FABD3975C75B4A3;
.loc 2 348 10
fma.rn.f64 %fd77, %fd75, %fd64, %fd76;
mov.f64 %fd78, 0dBFC91866143CBC8A;
.loc 2 348 10
fma.rn.f64 %fd79, %fd77, %fd64, %fd78;
mul.f64 %fd3, %fd79, %fd2;
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r73, %temp}, %fd2;
}
setp.lt.f64 %p5, %fd2, 0d7FF0000000000000;
setp.gt.f64 %p6, %fd2, 0d0000000000000000;
and.pred %p7, %p6, %p5;
@%p7 bra BB46_11;
abs.f64 %fd521, %fd2;
setp.gtu.f64 %p8, %fd521, 0d7FF0000000000000;
@%p8 bra BB46_10;
setp.neu.f64 %p9, %fd2, 0d0000000000000000;
@%p9 bra BB46_9;
mov.f64 %fd522, 0dFFF0000000000000;
bra.uni BB46_17;
BB46_9:
.loc 2 348 10
setp.eq.f64 %p10, %fd2, 0d7FF0000000000000;
selp.f64 %fd522, %fd2, 0dFFF8000000000000, %p10;
bra.uni BB46_17;
BB46_10:
.loc 2 348 10
add.f64 %fd522, %fd2, %fd2;
bra.uni BB46_17;
BB46_11:
.loc 2 348 10
setp.lt.u32 %p11, %r72, 1048576;
@%p11 bra BB46_13;
mov.u32 %r74, -1023;
bra.uni BB46_14;
BB46_13:
.loc 2 348 10
mul.f64 %fd81, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd81;
}
{
.reg .b32 %temp;
mov.b64 {%r73, %temp}, %fd81;
}
mov.u32 %r74, -1077;
BB46_14:
.loc 2 348 10
shr.s32 %r48, %r72, 20;
add.s32 %r75, %r74, %r48;
and.b32 %r49, %r72, -2146435073;
or.b32 %r50, %r49, 1072693248;
mov.b64 %fd520, {%r73, %r50};
setp.lt.u32 %p12, %r50, 1073127583;
@%p12 bra BB46_16;
{
.reg .b32 %temp;
mov.b64 {%r51, %temp}, %fd520;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd520;
}
add.s32 %r53, %r52, -1048576;
mov.b64 %fd520, {%r51, %r53};
add.s32 %r75, %r75, 1;
BB46_16:
add.f64 %fd82, %fd520, 0d3FF0000000000000;
mov.f64 %fd84, 0d3FF0000000000000;
.loc 2 348 10
// inline asm
cvt.rn.f32.f64 %f1,%fd82;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd83,%f2;
// inline asm
neg.f64 %fd85, %fd82;
fma.rn.f64 %fd86, %fd85, %fd83, %fd84;
fma.rn.f64 %fd87, %fd86, %fd86, %fd86;
fma.rn.f64 %fd88, %fd87, %fd83, %fd83;
add.f64 %fd89, %fd520, 0dBFF0000000000000;
mul.f64 %fd90, %fd89, %fd88;
fma.rn.f64 %fd91, %fd89, %fd88, %fd90;
mul.f64 %fd92, %fd91, %fd91;
mov.f64 %fd93, 0d3ED0EE258B7A8B04;
mov.f64 %fd94, 0d3EB1380B3AE80F1E;
.loc 2 348 10
fma.rn.f64 %fd95, %fd94, %fd92, %fd93;
mov.f64 %fd96, 0d3EF3B2669F02676F;
.loc 2 348 10
fma.rn.f64 %fd97, %fd95, %fd92, %fd96;
mov.f64 %fd98, 0d3F1745CBA9AB0956;
.loc 2 348 10
fma.rn.f64 %fd99, %fd97, %fd92, %fd98;
mov.f64 %fd100, 0d3F3C71C72D1B5154;
.loc 2 348 10
fma.rn.f64 %fd101, %fd99, %fd92, %fd100;
mov.f64 %fd102, 0d3F624924923BE72D;
.loc 2 348 10
fma.rn.f64 %fd103, %fd101, %fd92, %fd102;
mov.f64 %fd104, 0d3F8999999999A3C4;
.loc 2 348 10
fma.rn.f64 %fd105, %fd103, %fd92, %fd104;
mov.f64 %fd106, 0d3FB5555555555554;
.loc 2 348 10
fma.rn.f64 %fd107, %fd105, %fd92, %fd106;
sub.f64 %fd108, %fd89, %fd91;
add.f64 %fd109, %fd108, %fd108;
neg.f64 %fd110, %fd91;
fma.rn.f64 %fd111, %fd110, %fd89, %fd109;
mul.f64 %fd112, %fd88, %fd111;
mul.f64 %fd113, %fd107, %fd92;
fma.rn.f64 %fd114, %fd113, %fd91, %fd112;
cvt.rn.f64.s32 %fd115, %r75;
mov.f64 %fd116, 0d3FE62E42FEFA39EF;
.loc 2 348 10
fma.rn.f64 %fd117, %fd115, %fd116, %fd91;
neg.s32 %r54, %r75;
cvt.rn.f64.s32 %fd118, %r54;
fma.rn.f64 %fd119, %fd118, %fd116, %fd117;
sub.f64 %fd120, %fd119, %fd91;
sub.f64 %fd121, %fd114, %fd120;
mov.f64 %fd122, 0d3C7ABC9E3B39803F;
.loc 2 348 10
fma.rn.f64 %fd123, %fd115, %fd122, %fd121;
add.f64 %fd522, %fd117, %fd123;
abs.f64 %fd521, %fd2;
BB46_17:
setp.gtu.f64 %p13, %fd521, 0d400353AABAD7B784;
@%p13 bra BB46_19;
mov.f64 %fd124, 0dBD4DD167A0DC3F55;
mov.f64 %fd125, 0d3D020E4ADCDE2AD3;
.loc 2 348 10
fma.rn.f64 %fd126, %fd125, %fd521, %fd124;
mov.f64 %fd127, 0d3D5503F5A491E487;
.loc 2 348 10
fma.rn.f64 %fd128, %fd126, %fd521, %fd127;
mov.f64 %fd129, 0d3DC1F29940C2403A;
.loc 2 348 10
fma.rn.f64 %fd130, %fd128, %fd521, %fd129;
mov.f64 %fd131, 0d3D84CF9302EACDEF;
.loc 2 348 10
fma.rn.f64 %fd132, %fd130, %fd521, %fd131;
mov.f64 %fd133, 0dBE384A53DBBCA436;
.loc 2 348 10
fma.rn.f64 %fd134, %fd132, %fd521, %fd133;
mov.f64 %fd135, 0d3D9779BEE4F63BCC;
.loc 2 348 10
fma.rn.f64 %fd136, %fd134, %fd521, %fd135;
mov.f64 %fd137, 0d3EA6C160E414F3F0;
.loc 2 348 10
fma.rn.f64 %fd138, %fd136, %fd521, %fd137;
mov.f64 %fd139, 0d3D8F3D2F12430699;
.loc 2 348 10
fma.rn.f64 %fd140, %fd138, %fd521, %fd139;
mov.f64 %fd141, 0dBF0C71C72C0CED04;
.loc 2 348 10
fma.rn.f64 %fd142, %fd140, %fd521, %fd141;
mov.f64 %fd143, 0d3D659BCA506F1128;
.loc 2 348 10
fma.rn.f64 %fd144, %fd142, %fd521, %fd143;
mov.f64 %fd145, 0d3F65555555506982;
.loc 2 348 10
fma.rn.f64 %fd146, %fd144, %fd521, %fd145;
mov.f64 %fd147, 0d3D15BA0B425F1BFB;
.loc 2 348 10
fma.rn.f64 %fd148, %fd146, %fd521, %fd147;
mov.f64 %fd149, 0dBFB0000000000065;
.loc 2 348 10
fma.rn.f64 %fd150, %fd148, %fd521, %fd149;
mov.f64 %fd151, 0d3C8729A7253FB679;
.loc 2 348 10
fma.rn.f64 %fd152, %fd150, %fd521, %fd151;
mov.f64 %fd153, 0d3FE0000000000000;
.loc 2 348 10
fma.rn.f64 %fd154, %fd152, %fd521, %fd153;
mul.f64 %fd527, %fd154, %fd521;
bra.uni BB46_36;
BB46_19:
.loc 2 348 10
setp.gtu.f64 %p14, %fd521, 0d4015B1D0574614EA;
@%p14 bra BB46_21;
add.f64 %fd155, %fd521, 0dC00EA75575AF6F09;
add.f64 %fd156, %fd155, 0d3CA60155A9D1B256;
mov.f64 %fd157, 0d3D41011A1DF02DAD;
mov.f64 %fd158, 0dBCF8D3CDBB60175E;
.loc 2 348 10
fma.rn.f64 %fd159, %fd158, %fd156, %fd157;
mov.f64 %fd160, 0d3D76013AC1E5E222;
.loc 2 348 10
fma.rn.f64 %fd161, %fd159, %fd156, %fd160;
mov.f64 %fd162, 0dBDBEC315D96D5F03;
.loc 2 348 10
fma.rn.f64 %fd163, %fd161, %fd156, %fd162;
mov.f64 %fd164, 0dBDF03BE1B4B57207;
.loc 2 348 10
fma.rn.f64 %fd165, %fd163, %fd156, %fd164;
mov.f64 %fd166, 0d3E345695F8B660F7;
.loc 2 348 10
fma.rn.f64 %fd167, %fd165, %fd156, %fd166;
mov.f64 %fd168, 0d3E617069FCFCFFF4;
.loc 2 348 10
fma.rn.f64 %fd169, %fd167, %fd156, %fd168;
mov.f64 %fd170, 0dBEA33825C36745EB;
.loc 2 348 10
fma.rn.f64 %fd171, %fd169, %fd156, %fd170;
mov.f64 %fd172, 0dBEC9799D4F90931B;
.loc 2 348 10
fma.rn.f64 %fd173, %fd171, %fd156, %fd172;
mov.f64 %fd174, 0d3F083A06E2F7DF13;
.loc 2 348 10
fma.rn.f64 %fd175, %fd173, %fd156, %fd174;
mov.f64 %fd176, 0d3F26E4C2D53A7CF6;
.loc 2 348 10
fma.rn.f64 %fd177, %fd175, %fd156, %fd176;
mov.f64 %fd178, 0dBF624B3409957B1C;
.loc 2 348 10
fma.rn.f64 %fd179, %fd177, %fd156, %fd178;
mov.f64 %fd180, 0dBF7537544C3325DF;
.loc 2 348 10
fma.rn.f64 %fd181, %fd179, %fd156, %fd180;
mov.f64 %fd182, 0d3FAB589D1DA138E2;
.loc 2 348 10
fma.rn.f64 %fd183, %fd181, %fd156, %fd182;
mov.f64 %fd184, 0d3FAAE8A39F51AD13;
.loc 2 348 10
fma.rn.f64 %fd185, %fd183, %fd156, %fd184;
mov.f64 %fd186, 0dBFD9C6CF582CBF7F;
.loc 2 348 10
fma.rn.f64 %fd187, %fd185, %fd156, %fd186;
mul.f64 %fd527, %fd187, %fd156;
bra.uni BB46_36;
BB46_21:
.loc 2 348 10
setp.gtu.f64 %p15, %fd521, 0d40213065E54C1AA9;
@%p15 bra BB46_23;
add.f64 %fd188, %fd521, 0dC01C0FF5F3B47250;
add.f64 %fd189, %fd188, 0d3C9B226D9D243827;
mov.f64 %fd190, 0dBD40E8363DB649A9;
mov.f64 %fd191, 0d3CF3EB867515FAD6;
.loc 2 348 10
fma.rn.f64 %fd192, %fd191, %fd189, %fd190;
mov.f64 %fd193, 0dBD73B7DD4A6608FB;
.loc 2 348 10
fma.rn.f64 %fd194, %fd192, %fd189, %fd193;
mov.f64 %fd195, 0d3DBEC5E01482C750;
.loc 2 348 10
fma.rn.f64 %fd196, %fd194, %fd189, %fd195;
mov.f64 %fd197, 0d3DEC62BB9E882103;
.loc 2 348 10
fma.rn.f64 %fd198, %fd196, %fd189, %fd197;
mov.f64 %fd199, 0dBE34462EED732A23;
.loc 2 348 10
fma.rn.f64 %fd200, %fd198, %fd189, %fd199;
mov.f64 %fd201, 0dBE5D48DCAD7DC59B;
.loc 2 348 10
fma.rn.f64 %fd202, %fd200, %fd189, %fd201;
mov.f64 %fd203, 0d3EA3026DF29167E9;
.loc 2 348 10
fma.rn.f64 %fd204, %fd202, %fd189, %fd203;
mov.f64 %fd205, 0d3EC4255B0119666C;
.loc 2 348 10
fma.rn.f64 %fd206, %fd204, %fd189, %fd205;
mov.f64 %fd207, 0dBF0796A751B32693;
.loc 2 348 10
fma.rn.f64 %fd208, %fd206, %fd189, %fd207;
mov.f64 %fd209, 0dBF207358BBDBA284;
.loc 2 348 10
fma.rn.f64 %fd210, %fd208, %fd189, %fd209;
mov.f64 %fd211, 0d3F613FBC7D6927B1;
.loc 2 348 10
fma.rn.f64 %fd212, %fd210, %fd189, %fd211;
mov.f64 %fd213, 0d3F69A4B292E3DD75;
.loc 2 348 10
fma.rn.f64 %fd214, %fd212, %fd189, %fd213;
mov.f64 %fd215, 0dBFA80C83BDEEE4FB;
.loc 2 348 10
fma.rn.f64 %fd216, %fd214, %fd189, %fd215;
mov.f64 %fd217, 0dBF95E70DC60362BF;
.loc 2 348 10
fma.rn.f64 %fd218, %fd216, %fd189, %fd217;
mov.f64 %fd219, 0d3FD33518B3874E8A;
.loc 2 348 10
fma.rn.f64 %fd220, %fd218, %fd189, %fd219;
mul.f64 %fd527, %fd220, %fd189;
bra.uni BB46_36;
BB46_23:
.loc 2 348 10
abs.f64 %fd221, %fd521;
setp.neu.f64 %p16, %fd221, 0d7FF0000000000000;
@%p16 bra BB46_25;
mov.f64 %fd527, 0d0000000000000000;
bra.uni BB46_36;
BB46_25:
add.u64 %rd6, %SP, 4;
.loc 2 348 10
// inline asm
cvt.rn.f32.f64 %f5,%fd521;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd224,%f6;
// inline asm
neg.f64 %fd225, %fd521;
mov.f64 %fd226, 0d3FF0000000000000;
.loc 2 348 10
fma.rn.f64 %fd227, %fd225, %fd224, %fd226;
fma.rn.f64 %fd228, %fd227, %fd227, %fd227;
fma.rn.f64 %fd229, %fd228, %fd224, %fd224;
mul.f64 %fd230, %fd229, %fd229;
mov.f64 %fd231, 0dC099C06322A3F8BE;
mov.f64 %fd232, 0d40CD02EA3F2F6751;
.loc 2 348 10
fma.rn.f64 %fd233, %fd232, %fd230, %fd231;
mov.f64 %fd234, 0d405B89354DA77324;
.loc 2 348 10
fma.rn.f64 %fd235, %fd233, %fd230, %fd234;
mov.f64 %fd236, 0dC01E352294653188;
.loc 2 348 10
fma.rn.f64 %fd237, %fd235, %fd230, %fd236;
mov.f64 %fd238, 0d3FE9BC7DB16BD7A7;
.loc 2 348 10
fma.rn.f64 %fd239, %fd237, %fd230, %fd238;
mov.f64 %fd240, 0dBFC8BFE1C3A4F741;
.loc 2 348 10
fma.rn.f64 %fd241, %fd239, %fd230, %fd240;
mov.f64 %fd242, 0d3FC7FFFFF0D00BE2;
.loc 2 348 10
fma.rn.f64 %fd243, %fd241, %fd230, %fd242;
mov.f64 %fd244, 0d3FF00000000068CC;
.loc 2 348 10
fma.rn.f64 %fd245, %fd243, %fd230, %fd244;
mov.f64 %fd246, 0d415A30AC6857BEE0;
mov.f64 %fd247, 0dC18DA26B212FDC9A;
.loc 2 348 10
fma.rn.f64 %fd248, %fd247, %fd230, %fd246;
mov.f64 %fd249, 0dC11764222AD7C910;
.loc 2 348 10
fma.rn.f64 %fd250, %fd248, %fd230, %fd249;
mov.f64 %fd251, 0d40CEB02E0C306857;
.loc 2 348 10
fma.rn.f64 %fd252, %fd250, %fd230, %fd251;
mov.f64 %fd253, 0dC08351859FA2B23B;
.loc 2 348 10
fma.rn.f64 %fd254, %fd252, %fd230, %fd253;
mov.f64 %fd255, 0d403E65A07AF51F42;
.loc 2 348 10
fma.rn.f64 %fd256, %fd254, %fd230, %fd255;
mov.f64 %fd257, 0dC002F2B817F77A57;
.loc 2 348 10
fma.rn.f64 %fd258, %fd256, %fd230, %fd257;
mov.f64 %fd259, 0d3FD7BCC34DA069FD;
.loc 2 348 10
fma.rn.f64 %fd260, %fd258, %fd230, %fd259;
mov.f64 %fd261, 0dBFC4FFFFF8A44463;
.loc 2 348 10
fma.rn.f64 %fd262, %fd260, %fd230, %fd261;
mov.f64 %fd263, 0d3FD7FFFFFFFF5CD7;
.loc 2 348 10
fma.rn.f64 %fd264, %fd262, %fd230, %fd263;
fma.rn.f64 %fd17, %fd264, %fd229, %fd521;
rsqrt.approx.f64 %fd265, %fd521;
mul.f64 %fd266, %fd265, 0d3FE9884533D43651;
mul.f64 %fd18, %fd266, %fd245;
mul.f64 %fd267, %fd17, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r76, %fd267;
cvta.to.local.u64 %rd7, %rd6;
.loc 2 348 10
st.local.u32 [%rd7], %r76;
cvt.rn.f64.s32 %fd268, %r76;
neg.f64 %fd269, %fd268;
mov.f64 %fd270, 0d3FF921FB54442D18;
.loc 2 348 10
fma.rn.f64 %fd271, %fd269, %fd270, %fd17;
mov.f64 %fd272, 0d3C91A62633145C00;
.loc 2 348 10
fma.rn.f64 %fd273, %fd269, %fd272, %fd271;
mov.f64 %fd274, 0d397B839A252049C0;
.loc 2 348 10
fma.rn.f64 %fd523, %fd269, %fd274, %fd273;
abs.f64 %fd275, %fd17;
setp.leu.f64 %p17, %fd275, 0d41E0000000000000;
@%p17 bra BB46_27;
// Callseq Start 12
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd17;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 2 348 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd523, [retval0+0];
}
// Callseq End 12
ld.local.u32 %r76, [%rd7];
BB46_27:
and.b32 %r55, %r76, 3;
cvt.rn.f64.s32 %fd276, %r55;
add.f64 %fd277, %fd523, 0dC002D97C7F3321D2;
fma.rn.f64 %fd524, %fd276, 0d3FF921FB54442D18, %fd277;
abs.f64 %fd278, %fd524;
setp.neu.f64 %p18, %fd278, 0d7FF0000000000000;
@%p18 bra BB46_29;
mov.f64 %fd279, 0d0000000000000000;
.loc 2 348 10
mul.rn.f64 %fd524, %fd524, %fd279;
BB46_29:
add.u64 %rd10, %SP, 0;
.loc 2 348 10
mul.f64 %fd280, %fd524, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r77, %fd280;
cvta.to.local.u64 %rd11, %rd10;
.loc 2 348 10
st.local.u32 [%rd11], %r77;
cvt.rn.f64.s32 %fd281, %r77;
neg.f64 %fd282, %fd281;
fma.rn.f64 %fd284, %fd282, %fd270, %fd524;
fma.rn.f64 %fd286, %fd282, %fd272, %fd284;
fma.rn.f64 %fd525, %fd282, %fd274, %fd286;
abs.f64 %fd288, %fd524;
setp.leu.f64 %p19, %fd288, 0d41E0000000000000;
@%p19 bra BB46_31;
// Callseq Start 13
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd524;
.param .b64 param1;
st.param.b64 [param1+0], %rd10;
.param .b64 retval0;
.loc 2 348 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd525, [retval0+0];
}
// Callseq End 13
ld.local.u32 %r77, [%rd11];
BB46_31:
add.s32 %r21, %r77, 1;
shl.b32 %r56, %r21, 3;
and.b32 %r57, %r56, 8;
and.b32 %r58, %r21, 1;
setp.eq.b32 %p20, %r58, 1;
not.pred %p21, %p20;
selp.f64 %fd289, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p21;
mul.wide.u32 %rd14, %r57, 8;
mov.u64 %rd15, __cudart_sin_cos_coeffs;
add.s64 %rd16, %rd15, %rd14;
.loc 2 348 10
ld.const.f64 %fd290, [%rd16+8];
mul.rn.f64 %fd28, %fd525, %fd525;
fma.rn.f64 %fd291, %fd289, %fd28, %fd290;
ld.const.f64 %fd292, [%rd16+16];
fma.rn.f64 %fd293, %fd291, %fd28, %fd292;
ld.const.f64 %fd294, [%rd16+24];
fma.rn.f64 %fd295, %fd293, %fd28, %fd294;
ld.const.f64 %fd296, [%rd16+32];
fma.rn.f64 %fd297, %fd295, %fd28, %fd296;
ld.const.f64 %fd298, [%rd16+40];
fma.rn.f64 %fd299, %fd297, %fd28, %fd298;
ld.const.f64 %fd300, [%rd16+48];
fma.rn.f64 %fd29, %fd299, %fd28, %fd300;
fma.rn.f64 %fd526, %fd29, %fd525, %fd525;
@%p21 bra BB46_33;
fma.rn.f64 %fd526, %fd29, %fd28, %fd226;
BB46_33:
and.b32 %r59, %r21, 2;
setp.eq.s32 %p22, %r59, 0;
@%p22 bra BB46_35;
mov.f64 %fd302, 0d0000000000000000;
mov.f64 %fd303, 0dBFF0000000000000;
.loc 2 348 10
fma.rn.f64 %fd526, %fd526, %fd303, %fd302;
BB46_35:
mul.f64 %fd527, %fd18, %fd526;
BB46_36:
neg.f64 %fd304, %fd527;
setp.lt.f64 %p23, %fd2, 0d0000000000000000;
selp.f64 %fd305, %fd304, %fd527, %p23;
mul.f64 %fd306, %fd2, 0d3FE0000000000000;
setp.lt.f64 %p24, %fd521, 0d39B4484BFEEBC2A0;
selp.f64 %fd307, %fd306, %fd305, %p24;
mov.f64 %fd308, 0dBFF0000000000000;
.loc 2 348 10
div.rn.f64 %fd309, %fd308, %fd2;
fma.rn.f64 %fd310, %fd522, %fd307, %fd309;
fma.rn.f64 %fd532, %fd310, 0d3FE45F306DC9C883, %fd3;
bra.uni BB46_57;
BB46_37:
.loc 2 348 10
setp.gtu.f64 %p25, %fd2, 0d4009B510EC2ADC83;
@%p25 bra BB46_39;
add.f64 %fd311, %fd2, 0dC00193BED4DFF243;
add.f64 %fd312, %fd311, 0d3C8BD1E50D219BFD;
mov.f64 %fd313, 0d3E4833AAE4D8B975;
mov.f64 %fd314, 0dBE2B87B0BE2AA150;
.loc 2 348 10
fma.rn.f64 %fd315, %fd314, %fd312, %fd313;
mov.f64 %fd316, 0dBE44E279B423E68F;
.loc 2 348 10
fma.rn.f64 %fd317, %fd315, %fd312, %fd316;
mov.f64 %fd318, 0d3E129DC6A747EB4F;
.loc 2 348 10
fma.rn.f64 %fd319, %fd317, %fd312, %fd318;
mov.f64 %fd320, 0dBE61D15534496CD8;
.loc 2 348 10
fma.rn.f64 %fd321, %fd319, %fd312, %fd320;
mov.f64 %fd322, 0d3E7EEC8D48FECE00;
.loc 2 348 10
fma.rn.f64 %fd323, %fd321, %fd312, %fd322;
mov.f64 %fd324, 0dBE8D1180AF70A134;
.loc 2 348 10
fma.rn.f64 %fd325, %fd323, %fd312, %fd324;
mov.f64 %fd326, 0d3E9C8386A0EA1388;
.loc 2 348 10
fma.rn.f64 %fd327, %fd325, %fd312, %fd326;
mov.f64 %fd328, 0dBEB01A014E7F3250;
.loc 2 348 10
fma.rn.f64 %fd329, %fd327, %fd312, %fd328;
mov.f64 %fd330, 0d3EC1FB752010A320;
.loc 2 348 10
fma.rn.f64 %fd331, %fd329, %fd312, %fd330;
mov.f64 %fd332, 0dBED3AA0AFF4E332B;
.loc 2 348 10
fma.rn.f64 %fd333, %fd331, %fd312, %fd332;
mov.f64 %fd334, 0d3EE584A6C77F6700;
.loc 2 348 10
fma.rn.f64 %fd335, %fd333, %fd312, %fd334;
mov.f64 %fd336, 0dBEF794C520FC2EBB;
.loc 2 348 10
fma.rn.f64 %fd337, %fd335, %fd312, %fd336;
mov.f64 %fd338, 0d3F09D18D2D35CC71;
.loc 2 348 10
fma.rn.f64 %fd339, %fd337, %fd312, %fd338;
mov.f64 %fd340, 0dBF1C3FB7315C4599;
.loc 2 348 10
fma.rn.f64 %fd341, %fd339, %fd312, %fd340;
mov.f64 %fd342, 0d3F2EEA7ADECCE927;
.loc 2 348 10
fma.rn.f64 %fd343, %fd341, %fd312, %fd342;
mov.f64 %fd344, 0dBF40B2D85257446F;
.loc 2 348 10
fma.rn.f64 %fd345, %fd343, %fd312, %fd344;
mov.f64 %fd346, 0d3F517AB4B1FE5D5B;
.loc 2 348 10
fma.rn.f64 %fd347, %fd345, %fd312, %fd346;
mov.f64 %fd348, 0dBF65429DC6516C0D;
.loc 2 348 10
fma.rn.f64 %fd349, %fd347, %fd312, %fd348;
mov.f64 %fd350, 0d3F7E671C7D0B090B;
.loc 2 348 10
fma.rn.f64 %fd351, %fd349, %fd312, %fd350;
mov.f64 %fd352, 0dBF73A6DEC36FB27C;
.loc 2 348 10
fma.rn.f64 %fd353, %fd351, %fd312, %fd352;
mov.f64 %fd354, 0dBFA0D2AF4E931FD1;
.loc 2 348 10
fma.rn.f64 %fd355, %fd353, %fd312, %fd354;
mov.f64 %fd356, 0dBFBE56F82217B964;
.loc 2 348 10
fma.rn.f64 %fd357, %fd355, %fd312, %fd356;
mov.f64 %fd358, 0d3FE0AA48442F014B;
.loc 2 348 10
fma.rn.f64 %fd359, %fd357, %fd312, %fd358;
mul.f64 %fd532, %fd359, %fd312;
bra.uni BB46_57;
BB46_39:
.loc 2 348 10
setp.gtu.f64 %p26, %fd2, 0d401C0D26D5A541CB;
@%p26 bra BB46_41;
add.f64 %fd360, %fd2, 0dC015B7FE4E87B02E;
add.f64 %fd361, %fd360, 0dBCBDFE7BAC228E8C;
mov.f64 %fd362, 0d3CC69A30996793E2;
mov.f64 %fd363, 0d3CBA3C76069F1D8C;
.loc 2 348 10
fma.rn.f64 %fd364, %fd363, %fd361, %fd362;
mov.f64 %fd365, 0dBCDDD8432FE756E7;
.loc 2 348 10
fma.rn.f64 %fd366, %fd364, %fd361, %fd365;
mov.f64 %fd367, 0dBD143158EEE220F7;
.loc 2 348 10
fma.rn.f64 %fd368, %fd366, %fd361, %fd367;
mov.f64 %fd369, 0d3D28D44491230F5A;
.loc 2 348 10
fma.rn.f64 %fd370, %fd368, %fd361, %fd369;
mov.f64 %fd371, 0dBD438842EAF4EDBC;
.loc 2 348 10
fma.rn.f64 %fd372, %fd370, %fd361, %fd371;
mov.f64 %fd373, 0d3D74958DAFBFAF5A;
.loc 2 348 10
fma.rn.f64 %fd374, %fd372, %fd361, %fd373;
mov.f64 %fd375, 0dBD9449A60E664848;
.loc 2 348 10
fma.rn.f64 %fd376, %fd374, %fd361, %fd375;
mov.f64 %fd377, 0d3D838BC8CD594A76;
.loc 2 348 10
fma.rn.f64 %fd378, %fd376, %fd361, %fd377;
mov.f64 %fd379, 0dBDFA798002141323;
.loc 2 348 10
fma.rn.f64 %fd380, %fd378, %fd361, %fd379;
mov.f64 %fd381, 0d3E380B4198956AAA;
.loc 2 348 10
fma.rn.f64 %fd382, %fd380, %fd361, %fd381;
mov.f64 %fd383, 0d3E5B62B5F21BACD4;
.loc 2 348 10
fma.rn.f64 %fd384, %fd382, %fd361, %fd383;
mov.f64 %fd385, 0dBEA255E729FB6AAE;
.loc 2 348 10
fma.rn.f64 %fd386, %fd384, %fd361, %fd385;
mov.f64 %fd387, 0dBEC80618F6BAE5AA;
.loc 2 348 10
fma.rn.f64 %fd388, %fd386, %fd361, %fd387;
mov.f64 %fd389, 0d3F085B940F8E8D36;
.loc 2 348 10
fma.rn.f64 %fd390, %fd388, %fd361, %fd389;
mov.f64 %fd391, 0d3F2337C7E10E14E8;
.loc 2 348 10
fma.rn.f64 %fd392, %fd390, %fd361, %fd391;
mov.f64 %fd393, 0dBF61BE6DB99332CA;
.loc 2 348 10
fma.rn.f64 %fd394, %fd392, %fd361, %fd393;
mov.f64 %fd395, 0dBF710A329E2BE9B8;
.loc 2 348 10
fma.rn.f64 %fd396, %fd394, %fd361, %fd395;
mov.f64 %fd397, 0d3FAA15D92DFE3FCF;
.loc 2 348 10
fma.rn.f64 %fd398, %fd396, %fd361, %fd397;
mov.f64 %fd399, 0d3FA00B9F8571C9BE;
.loc 2 348 10
fma.rn.f64 %fd400, %fd398, %fd361, %fd399;
mov.f64 %fd401, 0dBFD5C7C556F0C19A;
.loc 2 348 10
fma.rn.f64 %fd402, %fd400, %fd361, %fd401;
mul.f64 %fd532, %fd402, %fd361;
bra.uni BB46_57;
BB46_41:
.loc 2 348 10
setp.gtu.f64 %p27, %fd2, 0d4022585C739ACDDD;
@%p27 bra BB46_43;
add.f64 %fd403, %fd2, 0dC0213127AE6169B4;
add.f64 %fd404, %fd403, 0dBCB479CC068D9046;
mov.f64 %fd405, 0dBD43515F67644276;
mov.f64 %fd406, 0d3CB09CCC22945996;
.loc 2 348 10
fma.rn.f64 %fd407, %fd406, %fd404, %fd405;
mov.f64 %fd408, 0dBD72C5B978E9F5C7;
.loc 2 348 10
fma.rn.f64 %fd409, %fd407, %fd404, %fd408;
mov.f64 %fd410, 0d3DBEC1151613913C;
.loc 2 348 10
fma.rn.f64 %fd411, %fd409, %fd404, %fd410;
mov.f64 %fd412, 0d3DE9E38D13C4A824;
.loc 2 348 10
fma.rn.f64 %fd413, %fd411, %fd404, %fd412;
mov.f64 %fd414, 0dBE341E75E1088EB5;
.loc 2 348 10
fma.rn.f64 %fd415, %fd413, %fd404, %fd414;
mov.f64 %fd416, 0dBE5A384EBB13CFE1;
.loc 2 348 10
fma.rn.f64 %fd417, %fd415, %fd404, %fd416;
mov.f64 %fd418, 0d3EA2BECB27F8C8F8;
.loc 2 348 10
fma.rn.f64 %fd419, %fd417, %fd404, %fd418;
mov.f64 %fd420, 0d3EC176E72B989FD8;
.loc 2 348 10
fma.rn.f64 %fd421, %fd419, %fd404, %fd420;
mov.f64 %fd422, 0dBF06F7BAB102F822;
.loc 2 348 10
fma.rn.f64 %fd423, %fd421, %fd404, %fd422;
mov.f64 %fd424, 0dBF1B50D7E1D278E1;
.loc 2 348 10
fma.rn.f64 %fd425, %fd423, %fd404, %fd424;
mov.f64 %fd426, 0d3F607A678D60004F;
.loc 2 348 10
fma.rn.f64 %fd427, %fd425, %fd404, %fd426;
mov.f64 %fd428, 0d3F63CED2A2E69115;
.loc 2 348 10
fma.rn.f64 %fd429, %fd427, %fd404, %fd428;
mov.f64 %fd430, 0dBFA6395DFE49FCD4;
.loc 2 348 10
fma.rn.f64 %fd431, %fd429, %fd404, %fd430;
mov.f64 %fd432, 0dBF902B3933CF21B1;
.loc 2 348 10
fma.rn.f64 %fd433, %fd431, %fd404, %fd432;
mov.f64 %fd434, 0d3FD15F993FCEAB5C;
.loc 2 348 10
fma.rn.f64 %fd435, %fd433, %fd404, %fd434;
mul.f64 %fd532, %fd435, %fd404;
bra.uni BB46_57;
BB46_43:
.loc 2 348 10
abs.f64 %fd436, %fd2;
setp.neu.f64 %p28, %fd436, 0d7FF0000000000000;
@%p28 bra BB46_45;
mov.f64 %fd532, 0d0000000000000000;
bra.uni BB46_57;
BB46_45:
add.u64 %rd17, %SP, 12;
.loc 2 348 10
// inline asm
cvt.rn.f32.f64 %f9,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd439,%f10;
// inline asm
neg.f64 %fd440, %fd2;
mov.f64 %fd441, 0d3FF0000000000000;
.loc 2 348 10
fma.rn.f64 %fd442, %fd440, %fd439, %fd441;
fma.rn.f64 %fd443, %fd442, %fd442, %fd442;
fma.rn.f64 %fd444, %fd443, %fd439, %fd439;
mul.f64 %fd445, %fd444, %fd444;
mov.f64 %fd446, 0dC09C26E89385D5B1;
mov.f64 %fd447, 0d40D13DB326ECEBFE;
.loc 2 348 10
fma.rn.f64 %fd448, %fd447, %fd445, %fd446;
mov.f64 %fd449, 0d405C6AB923C6F55E;
.loc 2 348 10
fma.rn.f64 %fd450, %fd448, %fd445, %fd449;
mov.f64 %fd451, 0dC01E61EAF3BD2FA1;
.loc 2 348 10
fma.rn.f64 %fd452, %fd450, %fd445, %fd451;
mov.f64 %fd453, 0d3FE9BF15D9B97DD1;
.loc 2 348 10
fma.rn.f64 %fd454, %fd452, %fd445, %fd453;
mov.f64 %fd455, 0dBFC8BFECF93D7D19;
.loc 2 348 10
fma.rn.f64 %fd456, %fd454, %fd445, %fd455;
mov.f64 %fd457, 0d3FC7FFFFF756AA6C;
.loc 2 348 10
fma.rn.f64 %fd458, %fd456, %fd445, %fd457;
mov.f64 %fd459, 0d3FF0000000003646;
.loc 2 348 10
fma.rn.f64 %fd460, %fd458, %fd445, %fd459;
mov.f64 %fd461, 0d416024E99BA46E7B;
mov.f64 %fd462, 0dC1943281A050209C;
.loc 2 348 10
fma.rn.f64 %fd463, %fd462, %fd445, %fd461;
mov.f64 %fd464, 0dC11A6875D7DFBD65;
.loc 2 348 10
fma.rn.f64 %fd465, %fd463, %fd445, %fd464;
mov.f64 %fd466, 0d40D032C041790233;
.loc 2 348 10
fma.rn.f64 %fd467, %fd465, %fd445, %fd466;
mov.f64 %fd468, 0dC0839F895BC22946;
.loc 2 348 10
fma.rn.f64 %fd469, %fd467, %fd445, %fd468;
mov.f64 %fd470, 0d403E77CC78ECD2D8;
.loc 2 348 10
fma.rn.f64 %fd471, %fd469, %fd445, %fd470;
mov.f64 %fd472, 0dC002F368D0117BE9;
.loc 2 348 10
fma.rn.f64 %fd473, %fd471, %fd445, %fd472;
mov.f64 %fd474, 0d3FD7BCC786009A25;
.loc 2 348 10
fma.rn.f64 %fd475, %fd473, %fd445, %fd474;
mov.f64 %fd476, 0dBFC4FFFFFC51BC7A;
.loc 2 348 10
fma.rn.f64 %fd477, %fd475, %fd445, %fd476;
mov.f64 %fd478, 0d3FD7FFFFFFFFB5EA;
.loc 2 348 10
fma.rn.f64 %fd479, %fd477, %fd445, %fd478;
fma.rn.f64 %fd41, %fd479, %fd444, %fd2;
rsqrt.approx.f64 %fd480, %fd2;
mul.f64 %fd481, %fd480, 0d3FE9884533D43651;
mul.f64 %fd42, %fd481, %fd460;
mul.f64 %fd482, %fd41, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r78, %fd482;
cvta.to.local.u64 %rd18, %rd17;
.loc 2 348 10
st.local.u32 [%rd18], %r78;
cvt.rn.f64.s32 %fd483, %r78;
neg.f64 %fd484, %fd483;
mov.f64 %fd485, 0d3FF921FB54442D18;
.loc 2 348 10
fma.rn.f64 %fd486, %fd484, %fd485, %fd41;
mov.f64 %fd487, 0d3C91A62633145C00;
.loc 2 348 10
fma.rn.f64 %fd488, %fd484, %fd487, %fd486;
mov.f64 %fd489, 0d397B839A252049C0;
.loc 2 348 10
fma.rn.f64 %fd528, %fd484, %fd489, %fd488;
abs.f64 %fd490, %fd41;
setp.leu.f64 %p29, %fd490, 0d41E0000000000000;
@%p29 bra BB46_47;
// Callseq Start 14
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd41;
.param .b64 param1;
st.param.b64 [param1+0], %rd17;
.param .b64 retval0;
.loc 2 348 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd528, [retval0+0];
}
// Callseq End 14
ld.local.u32 %r78, [%rd18];
BB46_47:
and.b32 %r60, %r78, 3;
cvt.rn.f64.s32 %fd491, %r60;
add.f64 %fd492, %fd528, 0dC00F6A7A2955385E;
fma.rn.f64 %fd529, %fd491, 0d3FF921FB54442D18, %fd492;
abs.f64 %fd493, %fd529;
setp.neu.f64 %p30, %fd493, 0d7FF0000000000000;
@%p30 bra BB46_49;
mov.f64 %fd494, 0d0000000000000000;
.loc 2 348 10
mul.rn.f64 %fd529, %fd529, %fd494;
BB46_49:
add.u64 %rd21, %SP, 8;
.loc 2 348 10
mul.f64 %fd495, %fd529, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r79, %fd495;
cvta.to.local.u64 %rd22, %rd21;
.loc 2 348 10
st.local.u32 [%rd22], %r79;
cvt.rn.f64.s32 %fd496, %r79;
neg.f64 %fd497, %fd496;
fma.rn.f64 %fd499, %fd497, %fd485, %fd529;
fma.rn.f64 %fd501, %fd497, %fd487, %fd499;
fma.rn.f64 %fd530, %fd497, %fd489, %fd501;
abs.f64 %fd503, %fd529;
setp.leu.f64 %p31, %fd503, 0d41E0000000000000;
@%p31 bra BB46_51;
// Callseq Start 15
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd529;
.param .b64 param1;
st.param.b64 [param1+0], %rd21;
.param .b64 retval0;
.loc 2 348 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd530, [retval0+0];
}
// Callseq End 15
ld.local.u32 %r79, [%rd22];
BB46_51:
add.s32 %r28, %r79, 1;
shl.b32 %r61, %r28, 3;
and.b32 %r62, %r61, 8;
and.b32 %r63, %r28, 1;
setp.eq.b32 %p32, %r63, 1;
not.pred %p33, %p32;
selp.f64 %fd504, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p33;
mul.wide.u32 %rd25, %r62, 8;
mov.u64 %rd26, __cudart_sin_cos_coeffs;
add.s64 %rd27, %rd26, %rd25;
.loc 2 348 10
ld.const.f64 %fd505, [%rd27+8];
mul.rn.f64 %fd52, %fd530, %fd530;
fma.rn.f64 %fd506, %fd504, %fd52, %fd505;
ld.const.f64 %fd507, [%rd27+16];
fma.rn.f64 %fd508, %fd506, %fd52, %fd507;
ld.const.f64 %fd509, [%rd27+24];
fma.rn.f64 %fd510, %fd508, %fd52, %fd509;
ld.const.f64 %fd511, [%rd27+32];
fma.rn.f64 %fd512, %fd510, %fd52, %fd511;
ld.const.f64 %fd513, [%rd27+40];
fma.rn.f64 %fd514, %fd512, %fd52, %fd513;
ld.const.f64 %fd515, [%rd27+48];
fma.rn.f64 %fd53, %fd514, %fd52, %fd515;
fma.rn.f64 %fd531, %fd53, %fd530, %fd530;
@%p33 bra BB46_53;
fma.rn.f64 %fd531, %fd53, %fd52, %fd441;
BB46_53:
and.b32 %r64, %r28, 2;
setp.eq.s32 %p34, %r64, 0;
@%p34 bra BB46_55;
mov.f64 %fd517, 0d0000000000000000;
mov.f64 %fd518, 0dBFF0000000000000;
.loc 2 348 10
fma.rn.f64 %fd531, %fd531, %fd518, %fd517;
BB46_55:
mul.f64 %fd532, %fd42, %fd531;
bra.uni BB46_57;
BB46_56:
mov.f64 %fd519, 0dBFE45F306DC9C883;
.loc 2 348 10
div.rn.f64 %fd532, %fd519, %fd2;
BB46_57:
setp.gtu.f64 %p35, %fd1, 0d0000000000000000;
@%p35 bra BB46_59;
setp.eq.f64 %p36, %fd1, 0d0000000000000000;
selp.f64 %fd532, 0dFFF0000000000000, 0dFFF8000000000000, %p36;
BB46_59:
.loc 1 63 42
mad.lo.s32 %r65, %r70, %r33, %r71;
mul.wide.s32 %rd29, %r65, 8;
add.s64 %rd30, %rd28, %rd29;
st.global.f64 [%rd30], %fd532;
.loc 1 63 22
mov.u32 %r67, %nctaid.y;
mad.lo.s32 %r71, %r67, %r39, %r71;
.loc 1 63 1
setp.lt.s32 %p37, %r71, %r31;
@%p37 bra BB46_3;
BB46_60:
.loc 1 63 22
mov.u32 %r68, %nctaid.x;
mad.lo.s32 %r70, %r68, %r35, %r70;
.loc 1 63 1
setp.lt.s32 %p38, %r70, %r32;
@%p38 bra BB46_2;
BB46_61:
.loc 1 63 2
ret;
}
.visible .entry map2_add_double(
.param .u32 map2_add_double_param_0,
.param .u32 map2_add_double_param_1,
.param .u64 map2_add_double_param_2,
.param .u32 map2_add_double_param_3,
.param .u64 map2_add_double_param_4,
.param .u32 map2_add_double_param_5,
.param .u64 map2_add_double_param_6,
.param .u32 map2_add_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_add_double_param_0];
ld.param.u32 %r14, [map2_add_double_param_1];
ld.param.u64 %rd4, [map2_add_double_param_2];
ld.param.u32 %r15, [map2_add_double_param_3];
ld.param.u64 %rd5, [map2_add_double_param_4];
ld.param.u32 %r16, [map2_add_double_param_5];
ld.param.u64 %rd6, [map2_add_double_param_6];
ld.param.u32 %r17, [map2_add_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 1 112 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 1 112 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB47_6;
.loc 1 112 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 1 112 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 1 112 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB47_2:
.loc 1 112 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB47_5;
.loc 1 112 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB47_4:
.loc 1 112 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 1 112 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 1 112 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
add.f64 %fd3, %fd2, %fd1;
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 1 112 1
st.global.f64 [%rd12], %fd3;
.loc 1 112 22
add.s32 %r11, %r5, %r10;
.loc 1 112 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB47_4;
BB47_5:
.loc 1 112 22
add.s32 %r28, %r4, %r28;
.loc 1 112 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB47_2;
BB47_6:
.loc 1 112 2
ret;
}
.visible .entry map2_v_s_add_double(
.param .u32 map2_v_s_add_double_param_0,
.param .u32 map2_v_s_add_double_param_1,
.param .u64 map2_v_s_add_double_param_2,
.param .u32 map2_v_s_add_double_param_3,
.param .u64 map2_v_s_add_double_param_4,
.param .u32 map2_v_s_add_double_param_5,
.param .f64 map2_v_s_add_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_add_double_param_0];
ld.param.u32 %r13, [map2_v_s_add_double_param_1];
ld.param.u64 %rd3, [map2_v_s_add_double_param_2];
ld.param.u32 %r14, [map2_v_s_add_double_param_3];
ld.param.u64 %rd4, [map2_v_s_add_double_param_4];
ld.param.u32 %r15, [map2_v_s_add_double_param_5];
ld.param.f64 %fd1, [map2_v_s_add_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 112 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 112 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB48_6;
.loc 1 112 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 112 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 112 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB48_2:
.loc 1 112 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB48_5;
.loc 1 112 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB48_4:
.loc 1 112 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 112 1
ld.global.f64 %fd2, [%rd6];
add.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 112 1
st.global.f64 [%rd8], %fd3;
.loc 1 112 22
add.s32 %r10, %r5, %r9;
.loc 1 112 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB48_4;
BB48_5:
.loc 1 112 22
add.s32 %r25, %r4, %r25;
.loc 1 112 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB48_2;
BB48_6:
.loc 1 112 2
ret;
}
.visible .entry map2_s_v_add_double(
.param .u32 map2_s_v_add_double_param_0,
.param .u32 map2_s_v_add_double_param_1,
.param .u64 map2_s_v_add_double_param_2,
.param .u32 map2_s_v_add_double_param_3,
.param .f64 map2_s_v_add_double_param_4,
.param .u64 map2_s_v_add_double_param_5,
.param .u32 map2_s_v_add_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_add_double_param_0];
ld.param.u32 %r13, [map2_s_v_add_double_param_1];
ld.param.u64 %rd3, [map2_s_v_add_double_param_2];
ld.param.u32 %r14, [map2_s_v_add_double_param_3];
ld.param.f64 %fd1, [map2_s_v_add_double_param_4];
ld.param.u64 %rd4, [map2_s_v_add_double_param_5];
ld.param.u32 %r15, [map2_s_v_add_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 112 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 112 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB49_6;
.loc 1 112 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 112 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 112 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB49_2:
.loc 1 112 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB49_5;
.loc 1 112 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB49_4:
.loc 1 112 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 112 1
ld.global.f64 %fd2, [%rd6];
add.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 112 1
st.global.f64 [%rd8], %fd3;
.loc 1 112 22
add.s32 %r10, %r5, %r9;
.loc 1 112 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB49_4;
BB49_5:
.loc 1 112 22
add.s32 %r25, %r4, %r25;
.loc 1 112 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB49_2;
BB49_6:
.loc 1 112 2
ret;
}
.visible .entry map2_sub_double(
.param .u32 map2_sub_double_param_0,
.param .u32 map2_sub_double_param_1,
.param .u64 map2_sub_double_param_2,
.param .u32 map2_sub_double_param_3,
.param .u64 map2_sub_double_param_4,
.param .u32 map2_sub_double_param_5,
.param .u64 map2_sub_double_param_6,
.param .u32 map2_sub_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_sub_double_param_0];
ld.param.u32 %r14, [map2_sub_double_param_1];
ld.param.u64 %rd4, [map2_sub_double_param_2];
ld.param.u32 %r15, [map2_sub_double_param_3];
ld.param.u64 %rd5, [map2_sub_double_param_4];
ld.param.u32 %r16, [map2_sub_double_param_5];
ld.param.u64 %rd6, [map2_sub_double_param_6];
ld.param.u32 %r17, [map2_sub_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 1 113 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 1 113 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB50_6;
.loc 1 113 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 1 113 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 1 113 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB50_2:
.loc 1 113 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB50_5;
.loc 1 113 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB50_4:
.loc 1 113 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 1 113 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 1 113 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
sub.f64 %fd3, %fd2, %fd1;
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 1 113 1
st.global.f64 [%rd12], %fd3;
.loc 1 113 22
add.s32 %r11, %r5, %r10;
.loc 1 113 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB50_4;
BB50_5:
.loc 1 113 22
add.s32 %r28, %r4, %r28;
.loc 1 113 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB50_2;
BB50_6:
.loc 1 113 2
ret;
}
.visible .entry map2_v_s_sub_double(
.param .u32 map2_v_s_sub_double_param_0,
.param .u32 map2_v_s_sub_double_param_1,
.param .u64 map2_v_s_sub_double_param_2,
.param .u32 map2_v_s_sub_double_param_3,
.param .u64 map2_v_s_sub_double_param_4,
.param .u32 map2_v_s_sub_double_param_5,
.param .f64 map2_v_s_sub_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_sub_double_param_0];
ld.param.u32 %r13, [map2_v_s_sub_double_param_1];
ld.param.u64 %rd3, [map2_v_s_sub_double_param_2];
ld.param.u32 %r14, [map2_v_s_sub_double_param_3];
ld.param.u64 %rd4, [map2_v_s_sub_double_param_4];
ld.param.u32 %r15, [map2_v_s_sub_double_param_5];
ld.param.f64 %fd1, [map2_v_s_sub_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 113 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 113 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB51_6;
.loc 1 113 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 113 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 113 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB51_2:
.loc 1 113 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB51_5;
.loc 1 113 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB51_4:
.loc 1 113 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 113 1
ld.global.f64 %fd2, [%rd6];
sub.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 113 1
st.global.f64 [%rd8], %fd3;
.loc 1 113 22
add.s32 %r10, %r5, %r9;
.loc 1 113 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB51_4;
BB51_5:
.loc 1 113 22
add.s32 %r25, %r4, %r25;
.loc 1 113 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB51_2;
BB51_6:
.loc 1 113 2
ret;
}
.visible .entry map2_s_v_sub_double(
.param .u32 map2_s_v_sub_double_param_0,
.param .u32 map2_s_v_sub_double_param_1,
.param .u64 map2_s_v_sub_double_param_2,
.param .u32 map2_s_v_sub_double_param_3,
.param .f64 map2_s_v_sub_double_param_4,
.param .u64 map2_s_v_sub_double_param_5,
.param .u32 map2_s_v_sub_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_sub_double_param_0];
ld.param.u32 %r13, [map2_s_v_sub_double_param_1];
ld.param.u64 %rd3, [map2_s_v_sub_double_param_2];
ld.param.u32 %r14, [map2_s_v_sub_double_param_3];
ld.param.f64 %fd1, [map2_s_v_sub_double_param_4];
ld.param.u64 %rd4, [map2_s_v_sub_double_param_5];
ld.param.u32 %r15, [map2_s_v_sub_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 113 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 113 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB52_6;
.loc 1 113 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 113 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 113 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB52_2:
.loc 1 113 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB52_5;
.loc 1 113 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB52_4:
.loc 1 113 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 113 1
ld.global.f64 %fd2, [%rd6];
sub.f64 %fd3, %fd1, %fd2;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 113 1
st.global.f64 [%rd8], %fd3;
.loc 1 113 22
add.s32 %r10, %r5, %r9;
.loc 1 113 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB52_4;
BB52_5:
.loc 1 113 22
add.s32 %r25, %r4, %r25;
.loc 1 113 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB52_2;
BB52_6:
.loc 1 113 2
ret;
}
.visible .entry map2_mul_double(
.param .u32 map2_mul_double_param_0,
.param .u32 map2_mul_double_param_1,
.param .u64 map2_mul_double_param_2,
.param .u32 map2_mul_double_param_3,
.param .u64 map2_mul_double_param_4,
.param .u32 map2_mul_double_param_5,
.param .u64 map2_mul_double_param_6,
.param .u32 map2_mul_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_mul_double_param_0];
ld.param.u32 %r14, [map2_mul_double_param_1];
ld.param.u64 %rd4, [map2_mul_double_param_2];
ld.param.u32 %r15, [map2_mul_double_param_3];
ld.param.u64 %rd5, [map2_mul_double_param_4];
ld.param.u32 %r16, [map2_mul_double_param_5];
ld.param.u64 %rd6, [map2_mul_double_param_6];
ld.param.u32 %r17, [map2_mul_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 1 114 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 1 114 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB53_6;
.loc 1 114 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 1 114 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 1 114 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB53_2:
.loc 1 114 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB53_5;
.loc 1 114 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB53_4:
.loc 1 114 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 1 114 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 1 114 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
mul.f64 %fd3, %fd2, %fd1;
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 1 114 1
st.global.f64 [%rd12], %fd3;
.loc 1 114 22
add.s32 %r11, %r5, %r10;
.loc 1 114 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB53_4;
BB53_5:
.loc 1 114 22
add.s32 %r28, %r4, %r28;
.loc 1 114 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB53_2;
BB53_6:
.loc 1 114 2
ret;
}
.visible .entry map2_v_s_mul_double(
.param .u32 map2_v_s_mul_double_param_0,
.param .u32 map2_v_s_mul_double_param_1,
.param .u64 map2_v_s_mul_double_param_2,
.param .u32 map2_v_s_mul_double_param_3,
.param .u64 map2_v_s_mul_double_param_4,
.param .u32 map2_v_s_mul_double_param_5,
.param .f64 map2_v_s_mul_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_mul_double_param_0];
ld.param.u32 %r13, [map2_v_s_mul_double_param_1];
ld.param.u64 %rd3, [map2_v_s_mul_double_param_2];
ld.param.u32 %r14, [map2_v_s_mul_double_param_3];
ld.param.u64 %rd4, [map2_v_s_mul_double_param_4];
ld.param.u32 %r15, [map2_v_s_mul_double_param_5];
ld.param.f64 %fd1, [map2_v_s_mul_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 114 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 114 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB54_6;
.loc 1 114 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 114 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 114 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB54_2:
.loc 1 114 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB54_5;
.loc 1 114 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB54_4:
.loc 1 114 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 114 1
ld.global.f64 %fd2, [%rd6];
mul.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 114 1
st.global.f64 [%rd8], %fd3;
.loc 1 114 22
add.s32 %r10, %r5, %r9;
.loc 1 114 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB54_4;
BB54_5:
.loc 1 114 22
add.s32 %r25, %r4, %r25;
.loc 1 114 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB54_2;
BB54_6:
.loc 1 114 2
ret;
}
.visible .entry map2_s_v_mul_double(
.param .u32 map2_s_v_mul_double_param_0,
.param .u32 map2_s_v_mul_double_param_1,
.param .u64 map2_s_v_mul_double_param_2,
.param .u32 map2_s_v_mul_double_param_3,
.param .f64 map2_s_v_mul_double_param_4,
.param .u64 map2_s_v_mul_double_param_5,
.param .u32 map2_s_v_mul_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_mul_double_param_0];
ld.param.u32 %r13, [map2_s_v_mul_double_param_1];
ld.param.u64 %rd3, [map2_s_v_mul_double_param_2];
ld.param.u32 %r14, [map2_s_v_mul_double_param_3];
ld.param.f64 %fd1, [map2_s_v_mul_double_param_4];
ld.param.u64 %rd4, [map2_s_v_mul_double_param_5];
ld.param.u32 %r15, [map2_s_v_mul_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 114 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 114 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB55_6;
.loc 1 114 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 114 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 114 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB55_2:
.loc 1 114 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB55_5;
.loc 1 114 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB55_4:
.loc 1 114 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 114 1
ld.global.f64 %fd2, [%rd6];
mul.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 114 1
st.global.f64 [%rd8], %fd3;
.loc 1 114 22
add.s32 %r10, %r5, %r9;
.loc 1 114 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB55_4;
BB55_5:
.loc 1 114 22
add.s32 %r25, %r4, %r25;
.loc 1 114 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB55_2;
BB55_6:
.loc 1 114 2
ret;
}
.visible .entry map2_div_double(
.param .u32 map2_div_double_param_0,
.param .u32 map2_div_double_param_1,
.param .u64 map2_div_double_param_2,
.param .u32 map2_div_double_param_3,
.param .u64 map2_div_double_param_4,
.param .u32 map2_div_double_param_5,
.param .u64 map2_div_double_param_6,
.param .u32 map2_div_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_div_double_param_0];
ld.param.u32 %r14, [map2_div_double_param_1];
ld.param.u64 %rd4, [map2_div_double_param_2];
ld.param.u32 %r15, [map2_div_double_param_3];
ld.param.u64 %rd5, [map2_div_double_param_4];
ld.param.u32 %r16, [map2_div_double_param_5];
ld.param.u64 %rd6, [map2_div_double_param_6];
ld.param.u32 %r17, [map2_div_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 1 115 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 1 115 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB56_6;
.loc 1 115 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 1 115 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 1 115 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB56_2:
.loc 1 115 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB56_5;
.loc 1 115 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
.loc 1 115 51
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB56_4:
.loc 1 115 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 1 115 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 1 115 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
.loc 3 3614 3
div.rn.f64 %fd3, %fd2, %fd1;
.loc 1 115 51
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 1 115 51
st.global.f64 [%rd12], %fd3;
.loc 1 115 22
add.s32 %r11, %r5, %r10;
.loc 1 115 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB56_4;
BB56_5:
.loc 1 115 22
add.s32 %r28, %r4, %r28;
.loc 1 115 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB56_2;
BB56_6:
.loc 1 115 2
ret;
}
.visible .entry map2_v_s_div_double(
.param .u32 map2_v_s_div_double_param_0,
.param .u32 map2_v_s_div_double_param_1,
.param .u64 map2_v_s_div_double_param_2,
.param .u32 map2_v_s_div_double_param_3,
.param .u64 map2_v_s_div_double_param_4,
.param .u32 map2_v_s_div_double_param_5,
.param .f64 map2_v_s_div_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_div_double_param_0];
ld.param.u32 %r13, [map2_v_s_div_double_param_1];
ld.param.u64 %rd3, [map2_v_s_div_double_param_2];
ld.param.u32 %r14, [map2_v_s_div_double_param_3];
ld.param.u64 %rd4, [map2_v_s_div_double_param_4];
ld.param.u32 %r15, [map2_v_s_div_double_param_5];
ld.param.f64 %fd1, [map2_v_s_div_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 115 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 115 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB57_6;
.loc 1 115 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 115 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 115 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB57_2:
.loc 1 115 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB57_5;
.loc 1 115 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 115 51
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB57_4:
.loc 1 115 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 115 1
ld.global.f64 %fd2, [%rd6];
.loc 3 3614 3
div.rn.f64 %fd3, %fd2, %fd1;
.loc 1 115 51
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 115 51
st.global.f64 [%rd8], %fd3;
.loc 1 115 22
add.s32 %r10, %r5, %r9;
.loc 1 115 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB57_4;
BB57_5:
.loc 1 115 22
add.s32 %r25, %r4, %r25;
.loc 1 115 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB57_2;
BB57_6:
.loc 1 115 2
ret;
}
.visible .entry map2_s_v_div_double(
.param .u32 map2_s_v_div_double_param_0,
.param .u32 map2_s_v_div_double_param_1,
.param .u64 map2_s_v_div_double_param_2,
.param .u32 map2_s_v_div_double_param_3,
.param .f64 map2_s_v_div_double_param_4,
.param .u64 map2_s_v_div_double_param_5,
.param .u32 map2_s_v_div_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_div_double_param_0];
ld.param.u32 %r13, [map2_s_v_div_double_param_1];
ld.param.u64 %rd3, [map2_s_v_div_double_param_2];
ld.param.u32 %r14, [map2_s_v_div_double_param_3];
ld.param.f64 %fd1, [map2_s_v_div_double_param_4];
ld.param.u64 %rd4, [map2_s_v_div_double_param_5];
ld.param.u32 %r15, [map2_s_v_div_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 115 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 115 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB58_6;
.loc 1 115 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 115 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 115 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB58_2:
.loc 1 115 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB58_5;
.loc 1 115 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 115 51
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB58_4:
.loc 1 115 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 115 1
ld.global.f64 %fd2, [%rd6];
.loc 3 3614 3
div.rn.f64 %fd3, %fd1, %fd2;
.loc 1 115 51
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 115 51
st.global.f64 [%rd8], %fd3;
.loc 1 115 22
add.s32 %r10, %r5, %r9;
.loc 1 115 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB58_4;
BB58_5:
.loc 1 115 22
add.s32 %r25, %r4, %r25;
.loc 1 115 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB58_2;
BB58_6:
.loc 1 115 2
ret;
}
.visible .entry map2_mod_double(
.param .u32 map2_mod_double_param_0,
.param .u32 map2_mod_double_param_1,
.param .u64 map2_mod_double_param_2,
.param .u32 map2_mod_double_param_3,
.param .u64 map2_mod_double_param_4,
.param .u32 map2_mod_double_param_5,
.param .u64 map2_mod_double_param_6,
.param .u32 map2_mod_double_param_7
)
{
.reg .pred %p<18>;
.reg .s32 %r<66>;
.reg .s64 %rd<33>;
.reg .f64 %fd<20>;
ld.param.u32 %r16, [map2_mod_double_param_0];
ld.param.u32 %r17, [map2_mod_double_param_1];
ld.param.u64 %rd9, [map2_mod_double_param_2];
ld.param.u32 %r18, [map2_mod_double_param_3];
ld.param.u64 %rd10, [map2_mod_double_param_4];
ld.param.u32 %r19, [map2_mod_double_param_5];
ld.param.u64 %rd11, [map2_mod_double_param_6];
ld.param.u32 %r20, [map2_mod_double_param_7];
.loc 1 116 1
mov.u32 %r21, %ntid.x;
mov.u32 %r22, %ctaid.x;
mov.u32 %r23, %tid.x;
mad.lo.s32 %r61, %r21, %r22, %r23;
.loc 1 116 1
setp.ge.s32 %p1, %r61, %r17;
@%p1 bra BB59_19;
cvta.to.global.u64 %rd12, %rd10;
cvta.to.global.u64 %rd13, %rd11;
cvta.to.global.u64 %rd28, %rd9;
BB59_2:
.loc 1 116 1
mov.u32 %r24, %ctaid.y;
mov.u32 %r25, %ntid.y;
mov.u32 %r26, %tid.y;
mad.lo.s32 %r62, %r25, %r24, %r26;
.loc 1 116 1
setp.ge.s32 %p2, %r62, %r16;
@%p2 bra BB59_18;
BB59_3:
.loc 1 116 1
mad.lo.s32 %r31, %r61, %r19, %r62;
mul.wide.s32 %rd14, %r31, 8;
add.s64 %rd15, %rd12, %rd14;
.loc 1 116 1
mad.lo.s32 %r32, %r61, %r20, %r62;
mul.wide.s32 %rd16, %r32, 8;
add.s64 %rd17, %rd13, %rd16;
.loc 1 116 1
ld.global.f64 %fd19, [%rd15];
.loc 2 458 10
abs.f64 %fd17, %fd19;
.loc 1 116 1
ld.global.f64 %fd3, [%rd17];
.loc 2 458 10
abs.f64 %fd18, %fd3;
setp.gtu.f64 %p3, %fd17, 0d7FF0000000000000;
setp.gtu.f64 %p4, %fd18, 0d7FF0000000000000;
or.pred %p5, %p3, %p4;
.loc 2 458 10
@%p5 bra BB59_16;
setp.eq.f64 %p6, %fd17, 0d7FF0000000000000;
setp.eq.f64 %p7, %fd18, 0d0000000000000000;
or.pred %p8, %p6, %p7;
@!%p8 bra BB59_6;
bra.uni BB59_5;
BB59_5:
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB59_17;
BB59_6:
.loc 2 458 10
setp.ltu.f64 %p9, %fd17, %fd18;
@%p9 bra BB59_17;
{
.reg .b32 %temp;
mov.b64 {%temp, %r33}, %fd17;
}
shr.s32 %r63, %r33, 20;
{
.reg .b32 %temp;
mov.b64 {%temp, %r34}, %fd18;
}
shr.s32 %r64, %r34, 20;
setp.gt.s32 %p10, %r63, 0;
@%p10 bra BB59_9;
mul.f64 %fd17, %fd17, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r35}, %fd17;
}
shr.s32 %r36, %r35, 20;
add.s32 %r37, %r63, %r36;
add.s32 %r63, %r37, -54;
BB59_9:
setp.gt.s32 %p11, %r64, 0;
@%p11 bra BB59_11;
mul.f64 %fd18, %fd18, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r38}, %fd18;
}
shr.s32 %r39, %r38, 20;
add.s32 %r40, %r64, %r39;
add.s32 %r64, %r40, -54;
BB59_11:
mov.b64 %rd18, %fd17;
and.b64 %rd19, %rd18, 4503599627370495;
or.b64 %rd31, %rd19, 4503599627370496;
mov.b64 %rd20, %fd18;
and.b64 %rd21, %rd20, 4503599627370495;
or.b64 %rd2, %rd21, 4503599627370496;
add.s32 %r41, %r63, 1;
sub.s32 %r65, %r41, %r64;
BB59_12:
sub.s64 %rd22, %rd31, %rd2;
mov.b64 %fd12, %rd22;
{
.reg .b32 %temp;
mov.b64 {%temp, %r42}, %fd12;
}
setp.lt.s32 %p12, %r42, 0;
selp.b64 %rd4, %rd31, %rd22, %p12;
shl.b64 %rd31, %rd4, 1;
add.s32 %r65, %r65, -1;
setp.gt.s32 %p13, %r65, 0;
@%p13 bra BB59_12;
and.b64 %rd32, %rd4, 9223372036854775807;
setp.eq.s64 %p14, %rd32, 0;
@%p14 bra BB59_15;
mov.b64 %fd13, %rd32;
mul.f64 %fd14, %fd13, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r43}, %fd14;
}
shr.s32 %r44, %r43, 20;
mov.u32 %r45, 55;
.loc 2 458 10
sub.s32 %r46, %r45, %r44;
sub.s32 %r47, %r64, %r46;
shl.b64 %rd23, %rd32, %r46;
setp.lt.s32 %p15, %r47, 1;
mov.u32 %r48, 1;
.loc 2 458 10
sub.s32 %r49, %r48, %r47;
shr.u64 %rd24, %rd23, %r49;
add.s32 %r50, %r47, 4095;
cvt.u64.u32 %rd25, %r50;
shl.b64 %rd26, %rd25, 52;
add.s64 %rd27, %rd26, %rd23;
selp.b64 %rd32, %rd24, %rd27, %p15;
BB59_15:
mov.b64 %fd15, %rd32;
{
.reg .b32 %temp;
mov.b64 {%r51, %temp}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r53}, %fd19;
}
and.b32 %r54, %r53, -2147483648;
or.b32 %r55, %r52, %r54;
mov.b64 %fd19, {%r51, %r55};
bra.uni BB59_17;
BB59_16:
.loc 2 458 10
add.f64 %fd19, %fd19, %fd3;
BB59_17:
.loc 1 116 145
mad.lo.s32 %r56, %r61, %r18, %r62;
mul.wide.s32 %rd29, %r56, 8;
add.s64 %rd30, %rd28, %rd29;
.loc 1 116 145
st.global.f64 [%rd30], %fd19;
.loc 1 116 22
mov.u32 %r58, %nctaid.y;
mad.lo.s32 %r62, %r58, %r25, %r62;
.loc 1 116 1
setp.lt.s32 %p16, %r62, %r16;
@%p16 bra BB59_3;
BB59_18:
.loc 1 116 22
mov.u32 %r59, %nctaid.x;
mad.lo.s32 %r61, %r59, %r21, %r61;
.loc 1 116 1
setp.lt.s32 %p17, %r61, %r17;
@%p17 bra BB59_2;
BB59_19:
.loc 1 116 2
ret;
}
.visible .entry map2_v_s_mod_double(
.param .u32 map2_v_s_mod_double_param_0,
.param .u32 map2_v_s_mod_double_param_1,
.param .u64 map2_v_s_mod_double_param_2,
.param .u32 map2_v_s_mod_double_param_3,
.param .u64 map2_v_s_mod_double_param_4,
.param .u32 map2_v_s_mod_double_param_5,
.param .f64 map2_v_s_mod_double_param_6
)
{
.reg .pred %p<18>;
.reg .s32 %r<64>;
.reg .s64 %rd<29>;
.reg .f64 %fd<20>;
ld.param.u32 %r16, [map2_v_s_mod_double_param_0];
ld.param.u32 %r17, [map2_v_s_mod_double_param_1];
ld.param.u64 %rd9, [map2_v_s_mod_double_param_2];
ld.param.u32 %r18, [map2_v_s_mod_double_param_3];
ld.param.u64 %rd10, [map2_v_s_mod_double_param_4];
ld.param.u32 %r19, [map2_v_s_mod_double_param_5];
ld.param.f64 %fd11, [map2_v_s_mod_double_param_6];
.loc 1 116 1
mov.u32 %r20, %ntid.x;
mov.u32 %r21, %ctaid.x;
mov.u32 %r22, %tid.x;
mad.lo.s32 %r59, %r20, %r21, %r22;
.loc 1 116 1
setp.ge.s32 %p1, %r59, %r17;
@%p1 bra BB60_20;
cvta.to.global.u64 %rd11, %rd10;
cvta.to.global.u64 %rd24, %rd9;
BB60_2:
.loc 1 116 1
mov.u32 %r23, %ctaid.y;
mov.u32 %r24, %ntid.y;
mov.u32 %r25, %tid.y;
mad.lo.s32 %r60, %r24, %r23, %r25;
.loc 1 116 1
setp.ge.s32 %p2, %r60, %r16;
@%p2 bra BB60_19;
.loc 2 458 10
abs.f64 %fd1, %fd11;
mul.f64 %fd2, %fd1, 0d4350000000000000;
BB60_4:
.loc 1 116 1
mad.lo.s32 %r30, %r59, %r19, %r60;
mul.wide.s32 %rd12, %r30, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 1 116 1
ld.global.f64 %fd19, [%rd13];
.loc 2 458 10
abs.f64 %fd17, %fd19;
setp.gtu.f64 %p3, %fd17, 0d7FF0000000000000;
setp.gtu.f64 %p4, %fd1, 0d7FF0000000000000;
or.pred %p5, %p3, %p4;
.loc 2 458 10
@%p5 bra BB60_17;
setp.eq.f64 %p6, %fd1, 0d0000000000000000;
.loc 2 458 10
setp.eq.f64 %p7, %fd17, 0d7FF0000000000000;
or.pred %p8, %p7, %p6;
@!%p8 bra BB60_7;
bra.uni BB60_6;
BB60_6:
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB60_18;
BB60_7:
.loc 2 458 10
setp.ltu.f64 %p9, %fd17, %fd1;
@%p9 bra BB60_18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r31}, %fd17;
}
shr.s32 %r61, %r31, 20;
{
.reg .b32 %temp;
mov.b64 {%temp, %r32}, %fd1;
}
shr.s32 %r62, %r32, 20;
setp.gt.s32 %p10, %r61, 0;
@%p10 bra BB60_10;
mul.f64 %fd17, %fd17, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r33}, %fd17;
}
shr.s32 %r34, %r33, 20;
add.s32 %r35, %r61, %r34;
add.s32 %r61, %r35, -54;
BB60_10:
setp.gt.s32 %p11, %r62, 0;
mov.f64 %fd18, %fd1;
@%p11 bra BB60_12;
{
.reg .b32 %temp;
mov.b64 {%temp, %r36}, %fd2;
}
shr.s32 %r37, %r36, 20;
add.s32 %r38, %r62, %r37;
add.s32 %r62, %r38, -54;
mov.f64 %fd18, %fd2;
BB60_12:
.loc 2 458 10
mov.f64 %fd7, %fd18;
mov.b64 %rd14, %fd17;
and.b64 %rd15, %rd14, 4503599627370495;
or.b64 %rd27, %rd15, 4503599627370496;
mov.b64 %rd16, %fd7;
and.b64 %rd17, %rd16, 4503599627370495;
or.b64 %rd2, %rd17, 4503599627370496;
add.s32 %r39, %r61, 1;
sub.s32 %r63, %r39, %r62;
BB60_13:
sub.s64 %rd18, %rd27, %rd2;
mov.b64 %fd12, %rd18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd12;
}
setp.lt.s32 %p12, %r40, 0;
selp.b64 %rd4, %rd27, %rd18, %p12;
shl.b64 %rd27, %rd4, 1;
add.s32 %r63, %r63, -1;
setp.gt.s32 %p13, %r63, 0;
@%p13 bra BB60_13;
and.b64 %rd28, %rd4, 9223372036854775807;
setp.eq.s64 %p14, %rd28, 0;
@%p14 bra BB60_16;
mov.b64 %fd13, %rd28;
mul.f64 %fd14, %fd13, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r41}, %fd14;
}
shr.s32 %r42, %r41, 20;
mov.u32 %r43, 55;
.loc 2 458 10
sub.s32 %r44, %r43, %r42;
sub.s32 %r45, %r62, %r44;
shl.b64 %rd19, %rd28, %r44;
setp.lt.s32 %p15, %r45, 1;
mov.u32 %r46, 1;
.loc 2 458 10
sub.s32 %r47, %r46, %r45;
shr.u64 %rd20, %rd19, %r47;
add.s32 %r48, %r45, 4095;
cvt.u64.u32 %rd21, %r48;
shl.b64 %rd22, %rd21, 52;
add.s64 %rd23, %rd22, %rd19;
selp.b64 %rd28, %rd20, %rd23, %p15;
BB60_16:
mov.b64 %fd15, %rd28;
{
.reg .b32 %temp;
mov.b64 {%r49, %temp}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r50}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd19;
}
and.b32 %r52, %r51, -2147483648;
or.b32 %r53, %r50, %r52;
mov.b64 %fd19, {%r49, %r53};
bra.uni BB60_18;
BB60_17:
.loc 2 458 10
add.f64 %fd19, %fd19, %fd11;
BB60_18:
.loc 1 116 89
mad.lo.s32 %r54, %r59, %r18, %r60;
mul.wide.s32 %rd25, %r54, 8;
add.s64 %rd26, %rd24, %rd25;
.loc 1 116 89
st.global.f64 [%rd26], %fd19;
.loc 1 116 22
mov.u32 %r56, %nctaid.y;
mad.lo.s32 %r60, %r56, %r24, %r60;
.loc 1 116 1
setp.lt.s32 %p16, %r60, %r16;
@%p16 bra BB60_4;
BB60_19:
.loc 1 116 22
mov.u32 %r57, %nctaid.x;
mad.lo.s32 %r59, %r57, %r20, %r59;
.loc 1 116 1
setp.lt.s32 %p17, %r59, %r17;
@%p17 bra BB60_2;
BB60_20:
.loc 1 116 2
ret;
}
.visible .entry map2_s_v_mod_double(
.param .u32 map2_s_v_mod_double_param_0,
.param .u32 map2_s_v_mod_double_param_1,
.param .u64 map2_s_v_mod_double_param_2,
.param .u32 map2_s_v_mod_double_param_3,
.param .f64 map2_s_v_mod_double_param_4,
.param .u64 map2_s_v_mod_double_param_5,
.param .u32 map2_s_v_mod_double_param_6
)
{
.reg .pred %p<18>;
.reg .s32 %r<64>;
.reg .s64 %rd<29>;
.reg .f64 %fd<20>;
ld.param.u32 %r16, [map2_s_v_mod_double_param_0];
ld.param.u32 %r17, [map2_s_v_mod_double_param_1];
ld.param.u64 %rd9, [map2_s_v_mod_double_param_2];
ld.param.u32 %r18, [map2_s_v_mod_double_param_3];
ld.param.f64 %fd11, [map2_s_v_mod_double_param_4];
ld.param.u64 %rd10, [map2_s_v_mod_double_param_5];
ld.param.u32 %r19, [map2_s_v_mod_double_param_6];
.loc 1 116 1
mov.u32 %r20, %ntid.x;
mov.u32 %r21, %ctaid.x;
mov.u32 %r22, %tid.x;
mad.lo.s32 %r59, %r20, %r21, %r22;
.loc 1 116 1
setp.ge.s32 %p1, %r59, %r17;
@%p1 bra BB61_20;
cvta.to.global.u64 %rd11, %rd10;
cvta.to.global.u64 %rd24, %rd9;
BB61_2:
.loc 1 116 1
mov.u32 %r23, %ctaid.y;
mov.u32 %r24, %ntid.y;
mov.u32 %r25, %tid.y;
mad.lo.s32 %r60, %r24, %r23, %r25;
.loc 1 116 1
setp.ge.s32 %p2, %r60, %r16;
@%p2 bra BB61_19;
.loc 2 458 10
abs.f64 %fd1, %fd11;
mul.f64 %fd2, %fd1, 0d4350000000000000;
BB61_4:
.loc 1 116 1
mad.lo.s32 %r30, %r59, %r19, %r60;
mul.wide.s32 %rd12, %r30, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 1 116 1
ld.global.f64 %fd3, [%rd13];
.loc 2 458 10
abs.f64 %fd18, %fd3;
setp.gtu.f64 %p3, %fd18, 0d7FF0000000000000;
setp.gtu.f64 %p4, %fd1, 0d7FF0000000000000;
or.pred %p5, %p4, %p3;
.loc 2 458 10
@%p5 bra BB61_17;
setp.eq.f64 %p6, %fd1, 0d7FF0000000000000;
.loc 2 458 10
setp.eq.f64 %p7, %fd18, 0d0000000000000000;
or.pred %p8, %p6, %p7;
@!%p8 bra BB61_7;
bra.uni BB61_6;
BB61_6:
mov.f64 %fd16, 0dFFF8000000000000;
.loc 2 458 10
mov.f64 %fd19, %fd16;
bra.uni BB61_18;
BB61_7:
.loc 2 458 10
setp.ltu.f64 %p9, %fd1, %fd18;
mov.f64 %fd19, %fd11;
@%p9 bra BB61_18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r31}, %fd1;
}
shr.s32 %r61, %r31, 20;
{
.reg .b32 %temp;
mov.b64 {%temp, %r32}, %fd18;
}
shr.s32 %r62, %r32, 20;
setp.gt.s32 %p10, %r61, 0;
mov.f64 %fd17, %fd1;
@%p10 bra BB61_10;
{
.reg .b32 %temp;
mov.b64 {%temp, %r33}, %fd2;
}
shr.s32 %r34, %r33, 20;
add.s32 %r35, %r61, %r34;
add.s32 %r61, %r35, -54;
mov.f64 %fd17, %fd2;
BB61_10:
.loc 2 458 10
mov.f64 %fd5, %fd17;
setp.gt.s32 %p11, %r62, 0;
@%p11 bra BB61_12;
mul.f64 %fd18, %fd18, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r36}, %fd18;
}
shr.s32 %r37, %r36, 20;
add.s32 %r38, %r62, %r37;
add.s32 %r62, %r38, -54;
BB61_12:
mov.b64 %rd14, %fd5;
and.b64 %rd15, %rd14, 4503599627370495;
or.b64 %rd27, %rd15, 4503599627370496;
mov.b64 %rd16, %fd18;
and.b64 %rd17, %rd16, 4503599627370495;
or.b64 %rd2, %rd17, 4503599627370496;
add.s32 %r39, %r61, 1;
sub.s32 %r63, %r39, %r62;
BB61_13:
sub.s64 %rd18, %rd27, %rd2;
mov.b64 %fd12, %rd18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd12;
}
setp.lt.s32 %p12, %r40, 0;
selp.b64 %rd4, %rd27, %rd18, %p12;
shl.b64 %rd27, %rd4, 1;
add.s32 %r63, %r63, -1;
setp.gt.s32 %p13, %r63, 0;
@%p13 bra BB61_13;
and.b64 %rd28, %rd4, 9223372036854775807;
setp.eq.s64 %p14, %rd28, 0;
@%p14 bra BB61_16;
mov.b64 %fd13, %rd28;
mul.f64 %fd14, %fd13, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r41}, %fd14;
}
shr.s32 %r42, %r41, 20;
mov.u32 %r43, 55;
.loc 2 458 10
sub.s32 %r44, %r43, %r42;
sub.s32 %r45, %r62, %r44;
shl.b64 %rd19, %rd28, %r44;
setp.lt.s32 %p15, %r45, 1;
mov.u32 %r46, 1;
.loc 2 458 10
sub.s32 %r47, %r46, %r45;
shr.u64 %rd20, %rd19, %r47;
add.s32 %r48, %r45, 4095;
cvt.u64.u32 %rd21, %r48;
shl.b64 %rd22, %rd21, 52;
add.s64 %rd23, %rd22, %rd19;
selp.b64 %rd28, %rd20, %rd23, %p15;
BB61_16:
mov.b64 %fd15, %rd28;
{
.reg .b32 %temp;
mov.b64 {%r49, %temp}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r50}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd11;
}
and.b32 %r52, %r51, -2147483648;
or.b32 %r53, %r50, %r52;
mov.b64 %fd8, {%r49, %r53};
mov.f64 %fd19, %fd8;
bra.uni BB61_18;
BB61_17:
.loc 2 458 10
add.f64 %fd9, %fd3, %fd11;
mov.f64 %fd19, %fd9;
BB61_18:
.loc 2 458 10
mov.f64 %fd10, %fd19;
.loc 1 116 89
mad.lo.s32 %r54, %r59, %r18, %r60;
mul.wide.s32 %rd25, %r54, 8;
add.s64 %rd26, %rd24, %rd25;
.loc 1 116 89
st.global.f64 [%rd26], %fd10;
.loc 1 116 22
mov.u32 %r56, %nctaid.y;
mad.lo.s32 %r60, %r56, %r24, %r60;
.loc 1 116 1
setp.lt.s32 %p16, %r60, %r16;
@%p16 bra BB61_4;
BB61_19:
.loc 1 116 22
mov.u32 %r57, %nctaid.x;
mad.lo.s32 %r59, %r57, %r20, %r59;
.loc 1 116 1
setp.lt.s32 %p17, %r59, %r17;
@%p17 bra BB61_2;
BB61_20:
.loc 1 116 2
ret;
}
.visible .entry map2_pow_double(
.param .u32 map2_pow_double_param_0,
.param .u32 map2_pow_double_param_1,
.param .u64 map2_pow_double_param_2,
.param .u32 map2_pow_double_param_3,
.param .u64 map2_pow_double_param_4,
.param .u32 map2_pow_double_param_5,
.param .u64 map2_pow_double_param_6,
.param .u32 map2_pow_double_param_7
)
{
.reg .pred %p<26>;
.reg .s32 %r<52>;
.reg .s64 %rd<13>;
.reg .f64 %fd<20>;
ld.param.u32 %r10, [map2_pow_double_param_0];
ld.param.u32 %r11, [map2_pow_double_param_1];
ld.param.u64 %rd1, [map2_pow_double_param_2];
ld.param.u32 %r12, [map2_pow_double_param_3];
ld.param.u64 %rd2, [map2_pow_double_param_4];
ld.param.u32 %r13, [map2_pow_double_param_5];
ld.param.u64 %rd3, [map2_pow_double_param_6];
ld.param.u32 %r14, [map2_pow_double_param_7];
.loc 1 117 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r50, %r15, %r16, %r17;
.loc 1 117 1
setp.ge.s32 %p1, %r50, %r11;
@%p1 bra BB62_21;
.loc 1 117 1
mov.u32 %r18, %ntid.y;
.loc 1 117 22
mov.u32 %r19, %nctaid.y;
mul.lo.s32 %r2, %r19, %r18;
cvta.to.global.u64 %rd4, %rd2;
cvta.to.global.u64 %rd5, %rd3;
cvta.to.global.u64 %rd10, %rd1;
BB62_2:
.loc 1 117 1
mov.u32 %r20, %ctaid.y;
mov.u32 %r22, %tid.y;
mad.lo.s32 %r51, %r18, %r20, %r22;
.loc 1 117 1
setp.ge.s32 %p2, %r51, %r10;
@%p2 bra BB62_20;
BB62_3:
.loc 1 117 1
mad.lo.s32 %r27, %r50, %r13, %r51;
mul.wide.s32 %rd6, %r27, 8;
add.s64 %rd7, %rd4, %rd6;
mad.lo.s32 %r28, %r50, %r14, %r51;
mul.wide.s32 %rd8, %r28, 8;
add.s64 %rd9, %rd5, %rd8;
ld.global.f64 %fd1, [%rd7];
.loc 2 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r6}, %fd1;
}
.loc 1 117 1
ld.global.f64 %fd2, [%rd9];
.loc 2 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r7}, %fd2;
}
mul.f64 %fd13, %fd2, 0d3FE0000000000000;
cvt.rzi.f64.f64 %fd14, %fd13;
fma.rn.f64 %fd15, %fd14, 0dC000000000000000, %fd2;
abs.f64 %fd3, %fd15;
setp.eq.f64 %p3, %fd1, 0d3FF0000000000000;
setp.eq.f64 %p4, %fd2, 0d0000000000000000;
or.pred %p5, %p3, %p4;
@!%p5 bra BB62_5;
bra.uni BB62_4;
BB62_4:
mov.f64 %fd19, 0d3FF0000000000000;
bra.uni BB62_19;
BB62_5:
.loc 2 328 10
abs.f64 %fd4, %fd1;
setp.gtu.f64 %p6, %fd4, 0d7FF0000000000000;
@%p6 bra BB62_18;
abs.f64 %fd5, %fd2;
setp.gtu.f64 %p7, %fd5, 0d7FF0000000000000;
@%p7 bra BB62_18;
setp.eq.f64 %p8, %fd5, 0d7FF0000000000000;
@%p8 bra BB62_17;
setp.eq.f64 %p9, %fd4, 0d7FF0000000000000;
@%p9 bra BB62_16;
setp.eq.f64 %p10, %fd1, 0d0000000000000000;
@%p10 bra BB62_15;
setp.gt.s32 %p11, %r6, -1;
@%p11 bra BB62_13;
cvt.rzi.f64.f64 %fd16, %fd2;
setp.eq.f64 %p12, %fd2, %fd16;
@%p12 bra BB62_13;
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB62_19;
BB62_13:
setp.lt.s32 %p13, %r6, 0;
// Callseq Start 16
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd4;
.param .b64 param1;
st.param.f64 [param1+0], %fd2;
.param .b64 retval0;
.loc 2 328 10
call.uni (retval0),
__internal_accurate_pow,
(
param0,
param1
);
ld.param.f64 %fd19, [retval0+0];
}
// Callseq End 16
setp.eq.f64 %p14, %fd3, 0d3FF0000000000000;
.loc 2 328 10
and.pred %p15, %p13, %p14;
@!%p15 bra BB62_19;
bra.uni BB62_14;
BB62_14:
{
.reg .b32 %temp;
mov.b64 {%temp, %r29}, %fd19;
}
xor.b32 %r30, %r29, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%r31, %temp}, %fd19;
}
mov.b64 %fd19, {%r31, %r30};
bra.uni BB62_19;
BB62_15:
setp.eq.f64 %p16, %fd3, 0d3FF0000000000000;
.loc 2 328 10
selp.b32 %r32, %r6, 0, %p16;
mov.u32 %r33, 0;
.loc 2 328 10
or.b32 %r34, %r32, 2146435072;
setp.lt.s32 %p17, %r7, 0;
selp.b32 %r35, %r34, %r32, %p17;
mov.b64 %fd19, {%r33, %r35};
bra.uni BB62_19;
BB62_16:
setp.eq.f64 %p18, %fd3, 0d3FF0000000000000;
.loc 2 328 10
shr.s32 %r36, %r7, 31;
and.b32 %r37, %r36, -2146435072;
add.s32 %r38, %r37, 2146435072;
setp.lt.s32 %p19, %r6, 0;
mov.u32 %r39, 0;
.loc 2 328 10
and.pred %p20, %p19, %p18;
or.b32 %r40, %r38, -2147483648;
selp.b32 %r41, %r40, %r38, %p20;
mov.b64 %fd19, {%r39, %r41};
bra.uni BB62_19;
BB62_17:
.loc 2 328 10
setp.gt.f64 %p21, %fd4, 0d3FF0000000000000;
selp.b32 %r42, 2146435072, 0, %p21;
mov.u32 %r43, 0;
.loc 2 328 10
xor.b32 %r44, %r42, 2146435072;
setp.lt.s32 %p22, %r7, 0;
selp.b32 %r45, %r44, %r42, %p22;
setp.eq.f64 %p23, %fd1, 0dBFF0000000000000;
selp.b32 %r46, 1072693248, %r45, %p23;
mov.b64 %fd19, {%r43, %r46};
bra.uni BB62_19;
BB62_18:
.loc 2 328 10
add.f64 %fd19, %fd1, %fd2;
BB62_19:
.loc 1 117 42
mad.lo.s32 %r47, %r50, %r12, %r51;
mul.wide.s32 %rd11, %r47, 8;
add.s64 %rd12, %rd10, %rd11;
st.global.f64 [%rd12], %fd19;
.loc 1 117 22
add.s32 %r51, %r2, %r51;
.loc 1 117 1
setp.lt.s32 %p24, %r51, %r10;
@%p24 bra BB62_3;
BB62_20:
.loc 1 117 22
mov.u32 %r48, %nctaid.x;
mad.lo.s32 %r50, %r48, %r15, %r50;
.loc 1 117 1
setp.lt.s32 %p25, %r50, %r11;
@%p25 bra BB62_2;
BB62_21:
.loc 1 117 2
ret;
}
.visible .entry map2_v_s_pow_double(
.param .u32 map2_v_s_pow_double_param_0,
.param .u32 map2_v_s_pow_double_param_1,
.param .u64 map2_v_s_pow_double_param_2,
.param .u32 map2_v_s_pow_double_param_3,
.param .u64 map2_v_s_pow_double_param_4,
.param .u32 map2_v_s_pow_double_param_5,
.param .f64 map2_v_s_pow_double_param_6
)
{
.reg .pred %p<26>;
.reg .s32 %r<49>;
.reg .s64 %rd<9>;
.reg .f64 %fd<20>;
ld.param.u32 %r10, [map2_v_s_pow_double_param_0];
ld.param.u32 %r11, [map2_v_s_pow_double_param_1];
ld.param.u64 %rd1, [map2_v_s_pow_double_param_2];
ld.param.u32 %r12, [map2_v_s_pow_double_param_3];
ld.param.u64 %rd2, [map2_v_s_pow_double_param_4];
ld.param.u32 %r13, [map2_v_s_pow_double_param_5];
ld.param.f64 %fd13, [map2_v_s_pow_double_param_6];
.loc 1 117 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r47, %r14, %r15, %r16;
.loc 1 117 1
setp.ge.s32 %p1, %r47, %r11;
@%p1 bra BB63_22;
.loc 2 328 10
mul.f64 %fd1, %fd13, 0d3FE0000000000000;
cvt.rzi.f64.f64 %fd14, %fd1;
fma.rn.f64 %fd15, %fd14, 0dC000000000000000, %fd13;
abs.f64 %fd2, %fd15;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB63_2:
.loc 1 117 1
mov.u32 %r17, %ctaid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %tid.y;
mad.lo.s32 %r48, %r18, %r17, %r19;
.loc 1 117 1
setp.ge.s32 %p2, %r48, %r10;
@%p2 bra BB63_21;
.loc 2 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r3}, %fd13;
}
shr.s32 %r21, %r3, 31;
and.b32 %r22, %r21, -2146435072;
add.s32 %r4, %r22, 2146435072;
BB63_4:
.loc 1 117 1
mad.lo.s32 %r26, %r47, %r13, %r48;
mul.wide.s32 %rd4, %r26, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd3, [%rd5];
.loc 2 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r7}, %fd3;
}
setp.eq.f64 %p3, %fd3, 0d3FF0000000000000;
setp.eq.f64 %p4, %fd13, 0d0000000000000000;
.loc 2 328 10
or.pred %p5, %p3, %p4;
@!%p5 bra BB63_6;
bra.uni BB63_5;
BB63_5:
mov.f64 %fd19, 0d3FF0000000000000;
bra.uni BB63_20;
BB63_6:
.loc 2 328 10
abs.f64 %fd4, %fd3;
setp.gtu.f64 %p6, %fd4, 0d7FF0000000000000;
@%p6 bra BB63_19;
abs.f64 %fd5, %fd13;
setp.gtu.f64 %p7, %fd5, 0d7FF0000000000000;
@%p7 bra BB63_19;
setp.eq.f64 %p8, %fd5, 0d7FF0000000000000;
@%p8 bra BB63_18;
setp.eq.f64 %p9, %fd4, 0d7FF0000000000000;
@%p9 bra BB63_17;
setp.eq.f64 %p10, %fd3, 0d0000000000000000;
@%p10 bra BB63_16;
setp.gt.s32 %p11, %r7, -1;
@%p11 bra BB63_14;
cvt.rzi.f64.f64 %fd16, %fd13;
setp.eq.f64 %p12, %fd16, %fd13;
@%p12 bra BB63_14;
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB63_20;
BB63_14:
setp.lt.s32 %p13, %r7, 0;
// Callseq Start 17
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd4;
.param .b64 param1;
st.param.f64 [param1+0], %fd13;
.param .b64 retval0;
.loc 2 328 10
call.uni (retval0),
__internal_accurate_pow,
(
param0,
param1
);
ld.param.f64 %fd19, [retval0+0];
}
// Callseq End 17
setp.eq.f64 %p14, %fd2, 0d3FF0000000000000;
.loc 2 328 10
and.pred %p15, %p13, %p14;
@!%p15 bra BB63_20;
bra.uni BB63_15;
BB63_15:
{
.reg .b32 %temp;
mov.b64 {%temp, %r27}, %fd19;
}
xor.b32 %r28, %r27, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%r29, %temp}, %fd19;
}
mov.b64 %fd19, {%r29, %r28};
bra.uni BB63_20;
BB63_16:
setp.lt.s32 %p16, %r3, 0;
mov.u32 %r30, 0;
setp.eq.f64 %p17, %fd2, 0d3FF0000000000000;
.loc 2 328 10
selp.b32 %r31, %r7, 0, %p17;
or.b32 %r32, %r31, 2146435072;
selp.b32 %r33, %r32, %r31, %p16;
mov.b64 %fd19, {%r30, %r33};
bra.uni BB63_20;
BB63_17:
setp.eq.f64 %p18, %fd2, 0d3FF0000000000000;
.loc 2 328 10
setp.lt.s32 %p19, %r7, 0;
mov.u32 %r34, 0;
.loc 2 328 10
and.pred %p20, %p19, %p18;
or.b32 %r35, %r4, -2147483648;
selp.b32 %r36, %r35, %r4, %p20;
mov.b64 %fd19, {%r34, %r36};
bra.uni BB63_20;
BB63_18:
setp.lt.s32 %p21, %r3, 0;
mov.u32 %r37, 0;
.loc 2 328 10
setp.gt.f64 %p22, %fd4, 0d3FF0000000000000;
selp.b32 %r38, 2146435072, 0, %p22;
xor.b32 %r39, %r38, 2146435072;
selp.b32 %r40, %r39, %r38, %p21;
setp.eq.f64 %p23, %fd3, 0dBFF0000000000000;
selp.b32 %r41, 1072693248, %r40, %p23;
mov.b64 %fd19, {%r37, %r41};
bra.uni BB63_20;
BB63_19:
.loc 2 328 10
add.f64 %fd19, %fd3, %fd13;
BB63_20:
.loc 1 117 42
mad.lo.s32 %r42, %r47, %r12, %r48;
mul.wide.s32 %rd7, %r42, 8;
add.s64 %rd8, %rd6, %rd7;
st.global.f64 [%rd8], %fd19;
.loc 1 117 22
mov.u32 %r44, %nctaid.y;
mad.lo.s32 %r48, %r44, %r18, %r48;
.loc 1 117 1
setp.lt.s32 %p24, %r48, %r10;
@%p24 bra BB63_4;
BB63_21:
.loc 1 117 22
mov.u32 %r45, %nctaid.x;
mad.lo.s32 %r47, %r45, %r14, %r47;
.loc 1 117 1
setp.lt.s32 %p25, %r47, %r11;
@%p25 bra BB63_2;
BB63_22:
.loc 1 117 2
ret;
}
.visible .entry map2_s_v_pow_double(
.param .u32 map2_s_v_pow_double_param_0,
.param .u32 map2_s_v_pow_double_param_1,
.param .u64 map2_s_v_pow_double_param_2,
.param .u32 map2_s_v_pow_double_param_3,
.param .f64 map2_s_v_pow_double_param_4,
.param .u64 map2_s_v_pow_double_param_5,
.param .u32 map2_s_v_pow_double_param_6
)
{
.reg .pred %p<26>;
.reg .s32 %r<51>;
.reg .s64 %rd<9>;
.reg .f64 %fd<20>;
ld.param.u32 %r11, [map2_s_v_pow_double_param_0];
ld.param.u32 %r12, [map2_s_v_pow_double_param_1];
ld.param.u64 %rd1, [map2_s_v_pow_double_param_2];
ld.param.u32 %r13, [map2_s_v_pow_double_param_3];
ld.param.f64 %fd12, [map2_s_v_pow_double_param_4];
ld.param.u64 %rd2, [map2_s_v_pow_double_param_5];
ld.param.u32 %r14, [map2_s_v_pow_double_param_6];
.loc 1 117 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r49, %r15, %r16, %r17;
.loc 1 117 1
setp.ge.s32 %p1, %r49, %r12;
@%p1 bra BB64_22;
.loc 1 117 1
mov.u32 %r18, %ntid.y;
.loc 1 117 22
mov.u32 %r19, %nctaid.y;
mul.lo.s32 %r2, %r19, %r18;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB64_2:
.loc 1 117 1
mov.u32 %r20, %ctaid.y;
mov.u32 %r22, %tid.y;
mad.lo.s32 %r50, %r18, %r20, %r22;
.loc 1 117 1
setp.ge.s32 %p2, %r50, %r11;
@%p2 bra BB64_21;
.loc 1 117 1
mul.lo.s32 %r4, %r49, %r14;
.loc 2 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r5}, %fd12;
}
BB64_4:
.loc 1 117 1
add.s32 %r27, %r50, %r4;
mul.wide.s32 %rd4, %r27, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd1, [%rd5];
.loc 2 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
mul.f64 %fd13, %fd1, 0d3FE0000000000000;
cvt.rzi.f64.f64 %fd14, %fd13;
fma.rn.f64 %fd15, %fd14, 0dC000000000000000, %fd1;
abs.f64 %fd2, %fd15;
setp.eq.f64 %p3, %fd1, 0d0000000000000000;
setp.eq.f64 %p4, %fd12, 0d3FF0000000000000;
.loc 2 328 10
or.pred %p5, %p4, %p3;
@!%p5 bra BB64_6;
bra.uni BB64_5;
BB64_5:
mov.f64 %fd19, 0d3FF0000000000000;
bra.uni BB64_20;
BB64_6:
.loc 2 328 10
abs.f64 %fd3, %fd12;
setp.gtu.f64 %p6, %fd3, 0d7FF0000000000000;
@%p6 bra BB64_19;
abs.f64 %fd4, %fd1;
setp.gtu.f64 %p7, %fd4, 0d7FF0000000000000;
@%p7 bra BB64_19;
setp.eq.f64 %p8, %fd4, 0d7FF0000000000000;
@%p8 bra BB64_18;
setp.eq.f64 %p9, %fd3, 0d7FF0000000000000;
@%p9 bra BB64_17;
setp.eq.f64 %p10, %fd12, 0d0000000000000000;
.loc 2 328 10
@%p10 bra BB64_16;
setp.gt.s32 %p11, %r5, -1;
@%p11 bra BB64_14;
cvt.rzi.f64.f64 %fd16, %fd1;
setp.eq.f64 %p12, %fd1, %fd16;
@%p12 bra BB64_14;
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB64_20;
BB64_14:
setp.eq.f64 %p13, %fd2, 0d3FF0000000000000;
// Callseq Start 18
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd3;
.param .b64 param1;
st.param.f64 [param1+0], %fd1;
.param .b64 retval0;
.loc 2 328 10
call.uni (retval0),
__internal_accurate_pow,
(
param0,
param1
);
ld.param.f64 %fd19, [retval0+0];
}
// Callseq End 18
setp.lt.s32 %p14, %r5, 0;
.loc 2 328 10
and.pred %p15, %p14, %p13;
@!%p15 bra BB64_20;
bra.uni BB64_15;
BB64_15:
{
.reg .b32 %temp;
mov.b64 {%temp, %r28}, %fd19;
}
xor.b32 %r29, %r28, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%r30, %temp}, %fd19;
}
mov.b64 %fd19, {%r30, %r29};
bra.uni BB64_20;
BB64_16:
setp.eq.f64 %p16, %fd2, 0d3FF0000000000000;
.loc 2 328 10
selp.b32 %r31, %r5, 0, %p16;
mov.u32 %r32, 0;
.loc 2 328 10
or.b32 %r33, %r31, 2146435072;
setp.lt.s32 %p17, %r8, 0;
selp.b32 %r34, %r33, %r31, %p17;
mov.b64 %fd19, {%r32, %r34};
bra.uni BB64_20;
BB64_17:
setp.eq.f64 %p18, %fd2, 0d3FF0000000000000;
.loc 2 328 10
shr.s32 %r35, %r8, 31;
and.b32 %r36, %r35, -2146435072;
add.s32 %r37, %r36, 2146435072;
setp.lt.s32 %p19, %r5, 0;
mov.u32 %r38, 0;
.loc 2 328 10
and.pred %p20, %p19, %p18;
or.b32 %r39, %r37, -2147483648;
selp.b32 %r40, %r39, %r37, %p20;
mov.b64 %fd19, {%r38, %r40};
bra.uni BB64_20;
BB64_18:
setp.eq.f64 %p21, %fd12, 0dBFF0000000000000;
.loc 2 328 10
setp.gt.f64 %p22, %fd3, 0d3FF0000000000000;
selp.b32 %r41, 2146435072, 0, %p22;
mov.u32 %r42, 0;
.loc 2 328 10
xor.b32 %r43, %r41, 2146435072;
setp.lt.s32 %p23, %r8, 0;
selp.b32 %r44, %r43, %r41, %p23;
selp.b32 %r45, 1072693248, %r44, %p21;
mov.b64 %fd19, {%r42, %r45};
bra.uni BB64_20;
BB64_19:
.loc 2 328 10
add.f64 %fd19, %fd1, %fd12;
BB64_20:
.loc 1 117 42
mad.lo.s32 %r46, %r49, %r13, %r50;
mul.wide.s32 %rd7, %r46, 8;
add.s64 %rd8, %rd6, %rd7;
st.global.f64 [%rd8], %fd19;
.loc 1 117 22
add.s32 %r50, %r2, %r50;
.loc 1 117 1
setp.lt.s32 %p24, %r50, %r11;
@%p24 bra BB64_4;
BB64_21:
.loc 1 117 22
mov.u32 %r47, %nctaid.x;
mad.lo.s32 %r49, %r47, %r15, %r49;
.loc 1 117 1
setp.lt.s32 %p25, %r49, %r12;
@%p25 bra BB64_2;
BB64_22:
.loc 1 117 2
ret;
}
.visible .entry map2_max_double(
.param .u32 map2_max_double_param_0,
.param .u32 map2_max_double_param_1,
.param .u64 map2_max_double_param_2,
.param .u32 map2_max_double_param_3,
.param .u64 map2_max_double_param_4,
.param .u32 map2_max_double_param_5,
.param .u64 map2_max_double_param_6,
.param .u32 map2_max_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_max_double_param_0];
ld.param.u32 %r14, [map2_max_double_param_1];
ld.param.u64 %rd4, [map2_max_double_param_2];
ld.param.u32 %r15, [map2_max_double_param_3];
ld.param.u64 %rd5, [map2_max_double_param_4];
ld.param.u32 %r16, [map2_max_double_param_5];
ld.param.u64 %rd6, [map2_max_double_param_6];
ld.param.u32 %r17, [map2_max_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 1 118 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 1 118 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB65_6;
.loc 1 118 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 1 118 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 1 118 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB65_2:
.loc 1 118 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB65_5;
.loc 1 118 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
.loc 1 118 145
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB65_4:
.loc 1 118 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 1 118 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 1 118 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
.loc 3 2785 10
max.f64 %fd3, %fd2, %fd1;
.loc 1 118 145
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 1 118 145
st.global.f64 [%rd12], %fd3;
.loc 1 118 22
add.s32 %r11, %r5, %r10;
.loc 1 118 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB65_4;
BB65_5:
.loc 1 118 22
add.s32 %r28, %r4, %r28;
.loc 1 118 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB65_2;
BB65_6:
.loc 1 118 2
ret;
}
.visible .entry map2_v_s_max_double(
.param .u32 map2_v_s_max_double_param_0,
.param .u32 map2_v_s_max_double_param_1,
.param .u64 map2_v_s_max_double_param_2,
.param .u32 map2_v_s_max_double_param_3,
.param .u64 map2_v_s_max_double_param_4,
.param .u32 map2_v_s_max_double_param_5,
.param .f64 map2_v_s_max_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_max_double_param_0];
ld.param.u32 %r13, [map2_v_s_max_double_param_1];
ld.param.u64 %rd3, [map2_v_s_max_double_param_2];
ld.param.u32 %r14, [map2_v_s_max_double_param_3];
ld.param.u64 %rd4, [map2_v_s_max_double_param_4];
ld.param.u32 %r15, [map2_v_s_max_double_param_5];
ld.param.f64 %fd1, [map2_v_s_max_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 118 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 118 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB66_6;
.loc 1 118 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 118 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 118 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB66_2:
.loc 1 118 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB66_5;
.loc 1 118 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 118 89
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB66_4:
.loc 1 118 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 118 1
ld.global.f64 %fd2, [%rd6];
.loc 3 2785 10
max.f64 %fd3, %fd2, %fd1;
.loc 1 118 89
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 118 89
st.global.f64 [%rd8], %fd3;
.loc 1 118 22
add.s32 %r10, %r5, %r9;
.loc 1 118 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB66_4;
BB66_5:
.loc 1 118 22
add.s32 %r25, %r4, %r25;
.loc 1 118 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB66_2;
BB66_6:
.loc 1 118 2
ret;
}
.visible .entry map2_s_v_max_double(
.param .u32 map2_s_v_max_double_param_0,
.param .u32 map2_s_v_max_double_param_1,
.param .u64 map2_s_v_max_double_param_2,
.param .u32 map2_s_v_max_double_param_3,
.param .f64 map2_s_v_max_double_param_4,
.param .u64 map2_s_v_max_double_param_5,
.param .u32 map2_s_v_max_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_max_double_param_0];
ld.param.u32 %r13, [map2_s_v_max_double_param_1];
ld.param.u64 %rd3, [map2_s_v_max_double_param_2];
ld.param.u32 %r14, [map2_s_v_max_double_param_3];
ld.param.f64 %fd1, [map2_s_v_max_double_param_4];
ld.param.u64 %rd4, [map2_s_v_max_double_param_5];
ld.param.u32 %r15, [map2_s_v_max_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 118 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 118 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB67_6;
.loc 1 118 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 118 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 118 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB67_2:
.loc 1 118 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB67_5;
.loc 1 118 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 118 89
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB67_4:
.loc 1 118 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 118 1
ld.global.f64 %fd2, [%rd6];
.loc 3 2785 10
max.f64 %fd3, %fd1, %fd2;
.loc 1 118 89
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 118 89
st.global.f64 [%rd8], %fd3;
.loc 1 118 22
add.s32 %r10, %r5, %r9;
.loc 1 118 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB67_4;
BB67_5:
.loc 1 118 22
add.s32 %r25, %r4, %r25;
.loc 1 118 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB67_2;
BB67_6:
.loc 1 118 2
ret;
}
.visible .entry map2_min_double(
.param .u32 map2_min_double_param_0,
.param .u32 map2_min_double_param_1,
.param .u64 map2_min_double_param_2,
.param .u32 map2_min_double_param_3,
.param .u64 map2_min_double_param_4,
.param .u32 map2_min_double_param_5,
.param .u64 map2_min_double_param_6,
.param .u32 map2_min_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_min_double_param_0];
ld.param.u32 %r14, [map2_min_double_param_1];
ld.param.u64 %rd4, [map2_min_double_param_2];
ld.param.u32 %r15, [map2_min_double_param_3];
ld.param.u64 %rd5, [map2_min_double_param_4];
ld.param.u32 %r16, [map2_min_double_param_5];
ld.param.u64 %rd6, [map2_min_double_param_6];
ld.param.u32 %r17, [map2_min_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 1 119 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 1 119 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB68_6;
.loc 1 119 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 1 119 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 1 119 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB68_2:
.loc 1 119 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB68_5;
.loc 1 119 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
.loc 1 119 145
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB68_4:
.loc 1 119 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 1 119 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 1 119 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
.loc 3 2780 10
min.f64 %fd3, %fd2, %fd1;
.loc 1 119 145
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 1 119 145
st.global.f64 [%rd12], %fd3;
.loc 1 119 22
add.s32 %r11, %r5, %r10;
.loc 1 119 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB68_4;
BB68_5:
.loc 1 119 22
add.s32 %r28, %r4, %r28;
.loc 1 119 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB68_2;
BB68_6:
.loc 1 119 2
ret;
}
.visible .entry map2_v_s_min_double(
.param .u32 map2_v_s_min_double_param_0,
.param .u32 map2_v_s_min_double_param_1,
.param .u64 map2_v_s_min_double_param_2,
.param .u32 map2_v_s_min_double_param_3,
.param .u64 map2_v_s_min_double_param_4,
.param .u32 map2_v_s_min_double_param_5,
.param .f64 map2_v_s_min_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_min_double_param_0];
ld.param.u32 %r13, [map2_v_s_min_double_param_1];
ld.param.u64 %rd3, [map2_v_s_min_double_param_2];
ld.param.u32 %r14, [map2_v_s_min_double_param_3];
ld.param.u64 %rd4, [map2_v_s_min_double_param_4];
ld.param.u32 %r15, [map2_v_s_min_double_param_5];
ld.param.f64 %fd1, [map2_v_s_min_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 119 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 119 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB69_6;
.loc 1 119 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 119 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 119 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB69_2:
.loc 1 119 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB69_5;
.loc 1 119 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 119 90
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB69_4:
.loc 1 119 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 119 1
ld.global.f64 %fd2, [%rd6];
.loc 3 2780 10
min.f64 %fd3, %fd2, %fd1;
.loc 1 119 90
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 119 90
st.global.f64 [%rd8], %fd3;
.loc 1 119 22
add.s32 %r10, %r5, %r9;
.loc 1 119 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB69_4;
BB69_5:
.loc 1 119 22
add.s32 %r25, %r4, %r25;
.loc 1 119 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB69_2;
BB69_6:
.loc 1 119 2
ret;
}
.visible .entry map2_s_v_min_double(
.param .u32 map2_s_v_min_double_param_0,
.param .u32 map2_s_v_min_double_param_1,
.param .u64 map2_s_v_min_double_param_2,
.param .u32 map2_s_v_min_double_param_3,
.param .f64 map2_s_v_min_double_param_4,
.param .u64 map2_s_v_min_double_param_5,
.param .u32 map2_s_v_min_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_min_double_param_0];
ld.param.u32 %r13, [map2_s_v_min_double_param_1];
ld.param.u64 %rd3, [map2_s_v_min_double_param_2];
ld.param.u32 %r14, [map2_s_v_min_double_param_3];
ld.param.f64 %fd1, [map2_s_v_min_double_param_4];
ld.param.u64 %rd4, [map2_s_v_min_double_param_5];
ld.param.u32 %r15, [map2_s_v_min_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 119 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 119 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB70_6;
.loc 1 119 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 119 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 119 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB70_2:
.loc 1 119 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB70_5;
.loc 1 119 1
mul.lo.s32 %r7, %r25, %r15;
.loc 1 119 90
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB70_4:
.loc 1 119 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 119 1
ld.global.f64 %fd2, [%rd6];
.loc 3 2780 10
min.f64 %fd3, %fd1, %fd2;
.loc 1 119 90
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 119 90
st.global.f64 [%rd8], %fd3;
.loc 1 119 22
add.s32 %r10, %r5, %r9;
.loc 1 119 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB70_4;
BB70_5:
.loc 1 119 22
add.s32 %r25, %r4, %r25;
.loc 1 119 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB70_2;
BB70_6:
.loc 1 119 2
ret;
}
.visible .entry map2_set_double(
.param .u32 map2_set_double_param_0,
.param .u32 map2_set_double_param_1,
.param .u64 map2_set_double_param_2,
.param .u32 map2_set_double_param_3,
.param .u64 map2_set_double_param_4,
.param .u32 map2_set_double_param_5,
.param .u64 map2_set_double_param_6,
.param .u32 map2_set_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<2>;
ld.param.u32 %r12, [map2_set_double_param_0];
ld.param.u32 %r13, [map2_set_double_param_1];
ld.param.u64 %rd3, [map2_set_double_param_2];
ld.param.u32 %r14, [map2_set_double_param_3];
ld.param.u64 %rd4, [map2_set_double_param_6];
ld.param.u32 %r15, [map2_set_double_param_7];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 120 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 120 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB71_6;
.loc 1 120 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 120 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 120 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB71_2:
.loc 1 120 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB71_5;
.loc 1 120 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB71_4:
.loc 1 120 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 120 1
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 120 1
ld.global.f64 %fd1, [%rd6];
st.global.f64 [%rd8], %fd1;
.loc 1 120 22
add.s32 %r10, %r5, %r9;
.loc 1 120 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB71_4;
BB71_5:
.loc 1 120 22
add.s32 %r25, %r4, %r25;
.loc 1 120 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB71_2;
BB71_6:
.loc 1 120 2
ret;
}
.visible .entry map2_v_s_set_double(
.param .u32 map2_v_s_set_double_param_0,
.param .u32 map2_v_s_set_double_param_1,
.param .u64 map2_v_s_set_double_param_2,
.param .u32 map2_v_s_set_double_param_3,
.param .u64 map2_v_s_set_double_param_4,
.param .u32 map2_v_s_set_double_param_5,
.param .f64 map2_v_s_set_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<24>;
.reg .s64 %rd<5>;
.reg .f64 %fd<2>;
ld.param.u32 %r11, [map2_v_s_set_double_param_0];
ld.param.u32 %r12, [map2_v_s_set_double_param_1];
ld.param.u64 %rd2, [map2_v_s_set_double_param_2];
ld.param.u32 %r13, [map2_v_s_set_double_param_3];
ld.param.f64 %fd1, [map2_v_s_set_double_param_6];
cvta.to.global.u64 %rd1, %rd2;
.loc 1 120 1
mov.u32 %r1, %ntid.x;
mov.u32 %r14, %ctaid.x;
mov.u32 %r15, %tid.x;
mad.lo.s32 %r22, %r1, %r14, %r15;
.loc 1 120 1
setp.ge.s32 %p1, %r22, %r12;
@%p1 bra BB72_6;
.loc 1 120 1
mov.u32 %r16, %tid.y;
mov.u32 %r17, %ntid.y;
mov.u32 %r18, %ctaid.y;
mad.lo.s32 %r3, %r17, %r18, %r16;
.loc 1 120 22
mov.u32 %r19, %nctaid.x;
mul.lo.s32 %r4, %r19, %r1;
.loc 1 120 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r5, %r20, %r17;
BB72_2:
.loc 1 120 1
setp.ge.s32 %p2, %r3, %r11;
@%p2 bra BB72_5;
.loc 1 120 1
mul.lo.s32 %r7, %r22, %r13;
mov.u32 %r23, %r3;
BB72_4:
.loc 1 120 1
mov.u32 %r8, %r23;
add.s32 %r21, %r8, %r7;
mul.wide.s32 %rd3, %r21, 8;
add.s64 %rd4, %rd1, %rd3;
.loc 1 120 1
st.global.f64 [%rd4], %fd1;
.loc 1 120 22
add.s32 %r9, %r5, %r8;
.loc 1 120 1
setp.lt.s32 %p3, %r9, %r11;
mov.u32 %r23, %r9;
@%p3 bra BB72_4;
BB72_5:
.loc 1 120 22
add.s32 %r22, %r4, %r22;
.loc 1 120 1
setp.lt.s32 %p4, %r22, %r12;
@%p4 bra BB72_2;
BB72_6:
.loc 1 120 2
ret;
}
.visible .entry map2_s_v_set_double(
.param .u32 map2_s_v_set_double_param_0,
.param .u32 map2_s_v_set_double_param_1,
.param .u64 map2_s_v_set_double_param_2,
.param .u32 map2_s_v_set_double_param_3,
.param .f64 map2_s_v_set_double_param_4,
.param .u64 map2_s_v_set_double_param_5,
.param .u32 map2_s_v_set_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<2>;
ld.param.u32 %r12, [map2_s_v_set_double_param_0];
ld.param.u32 %r13, [map2_s_v_set_double_param_1];
ld.param.u64 %rd3, [map2_s_v_set_double_param_2];
ld.param.u32 %r14, [map2_s_v_set_double_param_3];
ld.param.u64 %rd4, [map2_s_v_set_double_param_5];
ld.param.u32 %r15, [map2_s_v_set_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 120 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 1 120 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB73_6;
.loc 1 120 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 1 120 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 1 120 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB73_2:
.loc 1 120 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB73_5;
.loc 1 120 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB73_4:
.loc 1 120 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 120 1
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 120 1
ld.global.f64 %fd1, [%rd6];
st.global.f64 [%rd8], %fd1;
.loc 1 120 22
add.s32 %r10, %r5, %r9;
.loc 1 120 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB73_4;
BB73_5:
.loc 1 120 22
add.s32 %r25, %r4, %r25;
.loc 1 120 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB73_2;
BB73_6:
.loc 1 120 2
ret;
}
.visible .entry reduce_add_double(
.param .u32 reduce_add_double_param_0,
.param .u32 reduce_add_double_param_1,
.param .u64 reduce_add_double_param_2,
.param .u64 reduce_add_double_param_3,
.param .u32 reduce_add_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<51>;
.reg .s64 %rd<11>;
.reg .f64 %fd<15>;
ld.param.u32 %r16, [reduce_add_double_param_0];
ld.param.u32 %r17, [reduce_add_double_param_1];
ld.param.u64 %rd4, [reduce_add_double_param_2];
ld.param.u64 %rd5, [reduce_add_double_param_3];
ld.param.u32 %r18, [reduce_add_double_param_4];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 222 1
mov.u32 %r1, %ntid.y;
mov.u32 %r19, %ctaid.y;
mov.u32 %r20, %tid.y;
mad.lo.s32 %r48, %r1, %r19, %r20;
.loc 1 222 1
setp.lt.s32 %p1, %r48, %r17;
.loc 1 222 1
mov.u32 %r3, %ntid.x;
.loc 1 222 1
@%p1 bra BB74_2;
mov.f64 %fd14, 0d0000000000000000;
bra.uni BB74_7;
BB74_2:
.loc 1 222 1
mov.u32 %r21, %tid.x;
mov.u32 %r22, %ctaid.x;
mad.lo.s32 %r4, %r3, %r22, %r21;
.loc 1 222 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r5, %r23, %r1;
.loc 1 222 22
mov.u32 %r24, %nctaid.x;
mul.lo.s32 %r6, %r24, %r3;
mov.f64 %fd14, 0d0000000000000000;
BB74_3:
.loc 1 222 1
setp.ge.s32 %p2, %r4, %r16;
@%p2 bra BB74_6;
.loc 1 222 1
mul.lo.s32 %r8, %r48, %r18;
mov.u32 %r49, %r4;
BB74_5:
.loc 1 222 1
mov.u32 %r9, %r49;
add.s32 %r25, %r9, %r8;
mul.wide.s32 %rd6, %r25, 8;
add.s64 %rd7, %rd2, %rd6;
ld.global.f64 %fd11, [%rd7];
add.f64 %fd14, %fd14, %fd11;
.loc 1 222 22
add.s32 %r10, %r6, %r9;
.loc 1 222 1
setp.lt.s32 %p3, %r10, %r16;
mov.u32 %r49, %r10;
@%p3 bra BB74_5;
BB74_6:
.loc 1 222 22
add.s32 %r48, %r5, %r48;
.loc 1 222 1
setp.lt.s32 %p4, %r48, %r17;
@%p4 bra BB74_3;
BB74_7:
.loc 1 222 1
bar.sync 0;
.loc 1 222 1
setp.lt.u32 %p5, %r3, 2;
@%p5 bra BB74_10;
.loc 1 222 1
mov.u32 %r27, WARP_SZ;
.loc 1 124 1
cvta.global.u64 %rd3, $str;
mov.u32 %r28, 32;
.loc 4 109 1
sub.s32 %r29, %r28, %r27;
shl.b32 %r30, %r29, 8;
or.b32 %r13, %r30, 31;
mov.u32 %r50, 1;
BB74_9:
mov.u64 %rd8, 0;
// Callseq Start 19
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd3;
.param .b64 param1;
st.param.b64 [param1+0], %rd8;
.param .b32 retval0;
.loc 1 124 1
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r41, [retval0+0];
}
// Callseq End 19
.loc 1 126 1
// inline asm
mov.b64 { %r31, %r32 }, %fd14;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r33, %r32, %r50, %r13;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r37, %r31, %r50, %r13;
// inline asm
.loc 3 3330 10
mov.b64 %fd13, {%r37, %r33};
.loc 1 222 1
add.f64 %fd14, %fd14, %fd13;
.loc 1 222 40
shl.b32 %r50, %r50, 1;
.loc 1 222 1
setp.lt.u32 %p6, %r50, %r3;
@%p6 bra BB74_9;
BB74_10:
.loc 1 222 1
mov.u32 %r42, %tid.x;
and.b32 %r43, %r42, 31;
setp.ne.s32 %p7, %r43, 0;
@%p7 bra BB74_12;
.loc 1 222 1
mov.u32 %r44, %nctaid.y;
mov.u32 %r45, %ctaid.x;
mad.lo.s32 %r47, %r44, %r45, %r19;
mul.wide.u32 %rd9, %r47, 8;
add.s64 %rd10, %rd1, %rd9;
st.global.f64 [%rd10], %fd14;
BB74_12:
.loc 1 222 2
ret;
}
.visible .entry reduce_col_add_double(
.param .u32 reduce_col_add_double_param_0,
.param .u32 reduce_col_add_double_param_1,
.param .u64 reduce_col_add_double_param_2,
.param .u64 reduce_col_add_double_param_3,
.param .u32 reduce_col_add_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<41>;
.reg .s64 %rd<11>;
.reg .f64 %fd<13>;
ld.param.u32 %r15, [reduce_col_add_double_param_0];
ld.param.u32 %r16, [reduce_col_add_double_param_1];
ld.param.u64 %rd3, [reduce_col_add_double_param_2];
ld.param.u64 %rd4, [reduce_col_add_double_param_3];
ld.param.u32 %r17, [reduce_col_add_double_param_4];
cvta.to.global.u64 %rd1, %rd4;
.loc 1 222 1
mov.u32 %r1, %ntid.y;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.y;
mad.lo.s32 %r38, %r1, %r18, %r19;
.loc 1 222 1
setp.ge.s32 %p1, %r38, %r16;
@%p1 bra BB75_12;
.loc 1 222 1
mov.u32 %r3, %tid.x;
.loc 1 222 1
mov.u32 %r4, %ntid.x;
.loc 1 222 1
and.b32 %r5, %r3, 31;
.loc 1 222 22
mov.u32 %r20, %nctaid.x;
mul.lo.s32 %r6, %r20, %r1;
.loc 1 124 1
cvta.global.u64 %rd2, $str;
.loc 1 222 1
mov.u32 %r21, WARP_SZ;
mov.u32 %r22, 32;
.loc 4 109 1
sub.s32 %r23, %r22, %r21;
shl.b32 %r24, %r23, 8;
or.b32 %r7, %r24, 31;
cvta.to.global.u64 %rd8, %rd3;
BB75_2:
setp.lt.s32 %p2, %r3, %r15;
.loc 1 222 1
@%p2 bra BB75_4;
mov.f64 %fd12, 0d0000000000000000;
bra.uni BB75_6;
BB75_4:
.loc 1 222 1
mul.lo.s32 %r9, %r38, %r17;
mov.f64 %fd12, 0d0000000000000000;
mov.u32 %r39, %r3;
BB75_5:
.loc 1 222 1
mov.u32 %r10, %r39;
add.s32 %r25, %r10, %r9;
mul.wide.s32 %rd5, %r25, 8;
add.s64 %rd6, %rd1, %rd5;
ld.global.f64 %fd9, [%rd6];
add.f64 %fd12, %fd12, %fd9;
.loc 1 222 22
add.s32 %r11, %r4, %r10;
.loc 1 222 1
setp.lt.s32 %p3, %r11, %r15;
mov.u32 %r39, %r11;
@%p3 bra BB75_5;
BB75_6:
.loc 1 222 1
bar.sync 0;
.loc 1 222 1
setp.lt.u32 %p4, %r4, 2;
@%p4 bra BB75_9;
mov.u32 %r40, 1;
BB75_8:
mov.u64 %rd7, 0;
// Callseq Start 20
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b64 param1;
st.param.b64 [param1+0], %rd7;
.param .b32 retval0;
.loc 1 124 1
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r37, [retval0+0];
}
// Callseq End 20
.loc 1 126 1
// inline asm
mov.b64 { %r27, %r28 }, %fd12;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r29, %r28, %r40, %r7;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r33, %r27, %r40, %r7;
// inline asm
.loc 3 3330 10
mov.b64 %fd11, {%r33, %r29};
.loc 1 222 1
add.f64 %fd12, %fd12, %fd11;
.loc 1 222 40
shl.b32 %r40, %r40, 1;
.loc 1 222 1
setp.lt.u32 %p5, %r40, %r4;
@%p5 bra BB75_8;
BB75_9:
.loc 1 222 1
setp.ne.s32 %p6, %r5, 0;
@%p6 bra BB75_11;
.loc 1 222 1
mul.wide.s32 %rd9, %r38, 8;
add.s64 %rd10, %rd8, %rd9;
st.global.f64 [%rd10], %fd12;
BB75_11:
.loc 1 222 22
add.s32 %r38, %r6, %r38;
.loc 1 222 1
setp.lt.s32 %p7, %r38, %r16;
@%p7 bra BB75_2;
BB75_12:
.loc 1 222 2
ret;
}
.visible .entry reduce_row_add_double(
.param .u32 reduce_row_add_double_param_0,
.param .u32 reduce_row_add_double_param_1,
.param .u64 reduce_row_add_double_param_2,
.param .u64 reduce_row_add_double_param_3,
.param .u32 reduce_row_add_double_param_4
)
{
.reg .pred %p<5>;
.reg .s32 %r<18>;
.reg .s64 %rd<9>;
.reg .f64 %fd<8>;
ld.param.u32 %r7, [reduce_row_add_double_param_0];
ld.param.u32 %r8, [reduce_row_add_double_param_1];
ld.param.u64 %rd3, [reduce_row_add_double_param_2];
ld.param.u64 %rd4, [reduce_row_add_double_param_3];
ld.param.u32 %r9, [reduce_row_add_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 222 1
mov.u32 %r10, %nctaid.x;
mov.u32 %r11, %ntid.x;
mul.lo.s32 %r1, %r10, %r11;
.loc 1 222 1
mov.u32 %r12, %ctaid.x;
mov.u32 %r13, %tid.x;
mad.lo.s32 %r16, %r11, %r12, %r13;
.loc 1 222 1
setp.ge.s32 %p1, %r16, %r7;
@%p1 bra BB76_5;
BB76_1:
mov.f64 %fd7, 0d0000000000000000;
setp.gt.s32 %p2, %r8, 0;
.loc 1 222 1
@%p2 bra BB76_2;
bra.uni BB76_4;
BB76_2:
mov.u32 %r17, 0;
BB76_3:
.loc 1 222 1
mad.lo.s32 %r15, %r17, %r9, %r16;
mul.wide.s32 %rd5, %r15, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 222 1
ld.global.f64 %fd6, [%rd6];
add.f64 %fd7, %fd7, %fd6;
.loc 1 222 22
add.s32 %r17, %r17, 1;
.loc 1 222 1
setp.lt.s32 %p3, %r17, %r8;
@%p3 bra BB76_3;
BB76_4:
mul.wide.s32 %rd7, %r16, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 222 1
st.global.f64 [%rd8], %fd7;
.loc 1 222 22
add.s32 %r16, %r16, %r1;
.loc 1 222 1
setp.lt.s32 %p4, %r16, %r7;
@%p4 bra BB76_1;
BB76_5:
.loc 1 222 2
ret;
}
.visible .entry reduce_max_double(
.param .u32 reduce_max_double_param_0,
.param .u32 reduce_max_double_param_1,
.param .u64 reduce_max_double_param_2,
.param .u64 reduce_max_double_param_3,
.param .u32 reduce_max_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<51>;
.reg .s64 %rd<11>;
.reg .f64 %fd<15>;
ld.param.u32 %r16, [reduce_max_double_param_0];
ld.param.u32 %r17, [reduce_max_double_param_1];
ld.param.u64 %rd4, [reduce_max_double_param_2];
ld.param.u64 %rd5, [reduce_max_double_param_3];
ld.param.u32 %r18, [reduce_max_double_param_4];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 223 1
mov.u32 %r1, %ntid.y;
mov.u32 %r19, %ctaid.y;
mov.u32 %r20, %tid.y;
mad.lo.s32 %r48, %r1, %r19, %r20;
.loc 1 223 1
setp.lt.s32 %p1, %r48, %r17;
.loc 1 223 1
mov.u32 %r3, %ntid.x;
.loc 1 223 1
@%p1 bra BB77_2;
mov.f64 %fd14, 0dFFF0000000000000;
bra.uni BB77_7;
BB77_2:
.loc 1 223 1
mov.u32 %r21, %tid.x;
mov.u32 %r22, %ctaid.x;
mad.lo.s32 %r4, %r3, %r22, %r21;
.loc 1 223 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r5, %r23, %r1;
.loc 1 223 22
mov.u32 %r24, %nctaid.x;
mul.lo.s32 %r6, %r24, %r3;
mov.f64 %fd14, 0dFFF0000000000000;
BB77_3:
.loc 1 223 1
setp.ge.s32 %p2, %r4, %r16;
@%p2 bra BB77_6;
.loc 1 223 1
mul.lo.s32 %r8, %r48, %r18;
mov.u32 %r49, %r4;
BB77_5:
.loc 1 223 1
mov.u32 %r9, %r49;
add.s32 %r25, %r9, %r8;
mul.wide.s32 %rd6, %r25, 8;
add.s64 %rd7, %rd2, %rd6;
ld.global.f64 %fd11, [%rd7];
.loc 3 2785 10
max.f64 %fd14, %fd14, %fd11;
.loc 1 223 22
add.s32 %r10, %r6, %r9;
.loc 1 223 1
setp.lt.s32 %p3, %r10, %r16;
mov.u32 %r49, %r10;
@%p3 bra BB77_5;
BB77_6:
.loc 1 223 22
add.s32 %r48, %r5, %r48;
.loc 1 223 1
setp.lt.s32 %p4, %r48, %r17;
@%p4 bra BB77_3;
BB77_7:
.loc 1 223 1
bar.sync 0;
.loc 1 223 1
setp.lt.u32 %p5, %r3, 2;
@%p5 bra BB77_10;
.loc 1 223 1
mov.u32 %r27, WARP_SZ;
.loc 1 124 1
cvta.global.u64 %rd3, $str;
mov.u32 %r28, 32;
.loc 4 109 1
sub.s32 %r29, %r28, %r27;
shl.b32 %r30, %r29, 8;
or.b32 %r13, %r30, 31;
mov.u32 %r50, 1;
BB77_9:
mov.u64 %rd8, 0;
// Callseq Start 21
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd3;
.param .b64 param1;
st.param.b64 [param1+0], %rd8;
.param .b32 retval0;
.loc 1 124 1
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r41, [retval0+0];
}
// Callseq End 21
.loc 1 126 1
// inline asm
mov.b64 { %r31, %r32 }, %fd14;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r33, %r32, %r50, %r13;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r37, %r31, %r50, %r13;
// inline asm
.loc 3 3330 10
mov.b64 %fd13, {%r37, %r33};
.loc 3 2785 10
max.f64 %fd14, %fd14, %fd13;
.loc 1 223 40
shl.b32 %r50, %r50, 1;
.loc 1 223 1
setp.lt.u32 %p6, %r50, %r3;
@%p6 bra BB77_9;
BB77_10:
.loc 1 223 1
mov.u32 %r42, %tid.x;
and.b32 %r43, %r42, 31;
setp.ne.s32 %p7, %r43, 0;
@%p7 bra BB77_12;
.loc 1 223 1
mov.u32 %r44, %nctaid.y;
mov.u32 %r45, %ctaid.x;
mad.lo.s32 %r47, %r44, %r45, %r19;
mul.wide.u32 %rd9, %r47, 8;
add.s64 %rd10, %rd1, %rd9;
st.global.f64 [%rd10], %fd14;
BB77_12:
.loc 1 223 2
ret;
}
.visible .entry reduce_col_max_double(
.param .u32 reduce_col_max_double_param_0,
.param .u32 reduce_col_max_double_param_1,
.param .u64 reduce_col_max_double_param_2,
.param .u64 reduce_col_max_double_param_3,
.param .u32 reduce_col_max_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<41>;
.reg .s64 %rd<11>;
.reg .f64 %fd<13>;
ld.param.u32 %r15, [reduce_col_max_double_param_0];
ld.param.u32 %r16, [reduce_col_max_double_param_1];
ld.param.u64 %rd3, [reduce_col_max_double_param_2];
ld.param.u64 %rd4, [reduce_col_max_double_param_3];
ld.param.u32 %r17, [reduce_col_max_double_param_4];
cvta.to.global.u64 %rd1, %rd4;
.loc 1 223 1
mov.u32 %r1, %ntid.y;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.y;
mad.lo.s32 %r38, %r1, %r18, %r19;
.loc 1 223 1
setp.ge.s32 %p1, %r38, %r16;
@%p1 bra BB78_12;
.loc 1 223 1
mov.u32 %r3, %tid.x;
.loc 1 223 1
mov.u32 %r4, %ntid.x;
.loc 1 223 1
and.b32 %r5, %r3, 31;
.loc 1 223 22
mov.u32 %r20, %nctaid.x;
mul.lo.s32 %r6, %r20, %r1;
.loc 1 124 1
cvta.global.u64 %rd2, $str;
.loc 1 223 1
mov.u32 %r21, WARP_SZ;
mov.u32 %r22, 32;
.loc 4 109 1
sub.s32 %r23, %r22, %r21;
shl.b32 %r24, %r23, 8;
or.b32 %r7, %r24, 31;
cvta.to.global.u64 %rd8, %rd3;
BB78_2:
setp.lt.s32 %p2, %r3, %r15;
.loc 1 223 1
@%p2 bra BB78_4;
mov.f64 %fd12, 0dFFF0000000000000;
bra.uni BB78_6;
BB78_4:
.loc 1 223 1
mul.lo.s32 %r9, %r38, %r17;
mov.f64 %fd12, 0dFFF0000000000000;
mov.u32 %r39, %r3;
BB78_5:
.loc 1 223 1
mov.u32 %r10, %r39;
add.s32 %r25, %r10, %r9;
mul.wide.s32 %rd5, %r25, 8;
add.s64 %rd6, %rd1, %rd5;
ld.global.f64 %fd9, [%rd6];
.loc 3 2785 10
max.f64 %fd12, %fd12, %fd9;
.loc 1 223 22
add.s32 %r11, %r4, %r10;
.loc 1 223 1
setp.lt.s32 %p3, %r11, %r15;
mov.u32 %r39, %r11;
@%p3 bra BB78_5;
BB78_6:
.loc 1 223 1
bar.sync 0;
.loc 1 223 1
setp.lt.u32 %p4, %r4, 2;
@%p4 bra BB78_9;
mov.u32 %r40, 1;
BB78_8:
mov.u64 %rd7, 0;
// Callseq Start 22
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b64 param1;
st.param.b64 [param1+0], %rd7;
.param .b32 retval0;
.loc 1 124 1
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r37, [retval0+0];
}
// Callseq End 22
.loc 1 126 1
// inline asm
mov.b64 { %r27, %r28 }, %fd12;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r29, %r28, %r40, %r7;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r33, %r27, %r40, %r7;
// inline asm
.loc 3 3330 10
mov.b64 %fd11, {%r33, %r29};
.loc 3 2785 10
max.f64 %fd12, %fd12, %fd11;
.loc 1 223 40
shl.b32 %r40, %r40, 1;
.loc 1 223 1
setp.lt.u32 %p5, %r40, %r4;
@%p5 bra BB78_8;
BB78_9:
.loc 1 223 1
setp.ne.s32 %p6, %r5, 0;
@%p6 bra BB78_11;
.loc 1 223 1
mul.wide.s32 %rd9, %r38, 8;
add.s64 %rd10, %rd8, %rd9;
st.global.f64 [%rd10], %fd12;
BB78_11:
.loc 1 223 22
add.s32 %r38, %r6, %r38;
.loc 1 223 1
setp.lt.s32 %p7, %r38, %r16;
@%p7 bra BB78_2;
BB78_12:
.loc 1 223 2
ret;
}
.visible .entry reduce_row_max_double(
.param .u32 reduce_row_max_double_param_0,
.param .u32 reduce_row_max_double_param_1,
.param .u64 reduce_row_max_double_param_2,
.param .u64 reduce_row_max_double_param_3,
.param .u32 reduce_row_max_double_param_4
)
{
.reg .pred %p<5>;
.reg .s32 %r<18>;
.reg .s64 %rd<9>;
.reg .f64 %fd<8>;
ld.param.u32 %r7, [reduce_row_max_double_param_0];
ld.param.u32 %r8, [reduce_row_max_double_param_1];
ld.param.u64 %rd3, [reduce_row_max_double_param_2];
ld.param.u64 %rd4, [reduce_row_max_double_param_3];
ld.param.u32 %r9, [reduce_row_max_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 223 1
mov.u32 %r10, %nctaid.x;
mov.u32 %r11, %ntid.x;
mul.lo.s32 %r1, %r10, %r11;
.loc 1 223 1
mov.u32 %r12, %ctaid.x;
mov.u32 %r13, %tid.x;
mad.lo.s32 %r16, %r11, %r12, %r13;
.loc 1 223 1
setp.ge.s32 %p1, %r16, %r7;
@%p1 bra BB79_5;
BB79_1:
mov.f64 %fd7, 0dFFF0000000000000;
setp.gt.s32 %p2, %r8, 0;
.loc 1 223 1
@%p2 bra BB79_2;
bra.uni BB79_4;
BB79_2:
mov.u32 %r17, 0;
BB79_3:
.loc 1 223 1
mad.lo.s32 %r15, %r17, %r9, %r16;
mul.wide.s32 %rd5, %r15, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 223 1
ld.global.f64 %fd6, [%rd6];
.loc 3 2785 10
max.f64 %fd7, %fd7, %fd6;
.loc 1 223 22
add.s32 %r17, %r17, 1;
.loc 1 223 1
setp.lt.s32 %p3, %r17, %r8;
@%p3 bra BB79_3;
BB79_4:
mul.wide.s32 %rd7, %r16, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 223 1
st.global.f64 [%rd8], %fd7;
.loc 1 223 22
add.s32 %r16, %r16, %r1;
.loc 1 223 1
setp.lt.s32 %p4, %r16, %r7;
@%p4 bra BB79_1;
BB79_5:
.loc 1 223 2
ret;
}
.visible .entry reduce_min_double(
.param .u32 reduce_min_double_param_0,
.param .u32 reduce_min_double_param_1,
.param .u64 reduce_min_double_param_2,
.param .u64 reduce_min_double_param_3,
.param .u32 reduce_min_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<51>;
.reg .s64 %rd<11>;
.reg .f64 %fd<15>;
ld.param.u32 %r16, [reduce_min_double_param_0];
ld.param.u32 %r17, [reduce_min_double_param_1];
ld.param.u64 %rd4, [reduce_min_double_param_2];
ld.param.u64 %rd5, [reduce_min_double_param_3];
ld.param.u32 %r18, [reduce_min_double_param_4];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 224 1
mov.u32 %r1, %ntid.y;
mov.u32 %r19, %ctaid.y;
mov.u32 %r20, %tid.y;
mad.lo.s32 %r48, %r1, %r19, %r20;
.loc 1 224 1
setp.lt.s32 %p1, %r48, %r17;
.loc 1 224 1
mov.u32 %r3, %ntid.x;
.loc 1 224 1
@%p1 bra BB80_2;
mov.f64 %fd14, 0d7FF0000000000000;
bra.uni BB80_7;
BB80_2:
.loc 1 224 1
mov.u32 %r21, %tid.x;
mov.u32 %r22, %ctaid.x;
mad.lo.s32 %r4, %r3, %r22, %r21;
.loc 1 224 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r5, %r23, %r1;
.loc 1 224 22
mov.u32 %r24, %nctaid.x;
mul.lo.s32 %r6, %r24, %r3;
mov.f64 %fd14, 0d7FF0000000000000;
BB80_3:
.loc 1 224 1
setp.ge.s32 %p2, %r4, %r16;
@%p2 bra BB80_6;
.loc 1 224 1
mul.lo.s32 %r8, %r48, %r18;
mov.u32 %r49, %r4;
BB80_5:
.loc 1 224 1
mov.u32 %r9, %r49;
add.s32 %r25, %r9, %r8;
mul.wide.s32 %rd6, %r25, 8;
add.s64 %rd7, %rd2, %rd6;
ld.global.f64 %fd11, [%rd7];
.loc 3 2780 10
min.f64 %fd14, %fd14, %fd11;
.loc 1 224 22
add.s32 %r10, %r6, %r9;
.loc 1 224 1
setp.lt.s32 %p3, %r10, %r16;
mov.u32 %r49, %r10;
@%p3 bra BB80_5;
BB80_6:
.loc 1 224 22
add.s32 %r48, %r5, %r48;
.loc 1 224 1
setp.lt.s32 %p4, %r48, %r17;
@%p4 bra BB80_3;
BB80_7:
.loc 1 224 1
bar.sync 0;
.loc 1 224 1
setp.lt.u32 %p5, %r3, 2;
@%p5 bra BB80_10;
.loc 1 224 1
mov.u32 %r27, WARP_SZ;
.loc 1 124 1
cvta.global.u64 %rd3, $str;
mov.u32 %r28, 32;
.loc 4 109 1
sub.s32 %r29, %r28, %r27;
shl.b32 %r30, %r29, 8;
or.b32 %r13, %r30, 31;
mov.u32 %r50, 1;
BB80_9:
mov.u64 %rd8, 0;
// Callseq Start 23
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd3;
.param .b64 param1;
st.param.b64 [param1+0], %rd8;
.param .b32 retval0;
.loc 1 124 1
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r41, [retval0+0];
}
// Callseq End 23
.loc 1 126 1
// inline asm
mov.b64 { %r31, %r32 }, %fd14;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r33, %r32, %r50, %r13;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r37, %r31, %r50, %r13;
// inline asm
.loc 3 3330 10
mov.b64 %fd13, {%r37, %r33};
.loc 3 2780 10
min.f64 %fd14, %fd14, %fd13;
.loc 1 224 40
shl.b32 %r50, %r50, 1;
.loc 1 224 1
setp.lt.u32 %p6, %r50, %r3;
@%p6 bra BB80_9;
BB80_10:
.loc 1 224 1
mov.u32 %r42, %tid.x;
and.b32 %r43, %r42, 31;
setp.ne.s32 %p7, %r43, 0;
@%p7 bra BB80_12;
.loc 1 224 1
mov.u32 %r44, %nctaid.y;
mov.u32 %r45, %ctaid.x;
mad.lo.s32 %r47, %r44, %r45, %r19;
mul.wide.u32 %rd9, %r47, 8;
add.s64 %rd10, %rd1, %rd9;
st.global.f64 [%rd10], %fd14;
BB80_12:
.loc 1 224 2
ret;
}
.visible .entry reduce_col_min_double(
.param .u32 reduce_col_min_double_param_0,
.param .u32 reduce_col_min_double_param_1,
.param .u64 reduce_col_min_double_param_2,
.param .u64 reduce_col_min_double_param_3,
.param .u32 reduce_col_min_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<41>;
.reg .s64 %rd<11>;
.reg .f64 %fd<13>;
ld.param.u32 %r15, [reduce_col_min_double_param_0];
ld.param.u32 %r16, [reduce_col_min_double_param_1];
ld.param.u64 %rd3, [reduce_col_min_double_param_2];
ld.param.u64 %rd4, [reduce_col_min_double_param_3];
ld.param.u32 %r17, [reduce_col_min_double_param_4];
cvta.to.global.u64 %rd1, %rd4;
.loc 1 224 1
mov.u32 %r1, %ntid.y;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.y;
mad.lo.s32 %r38, %r1, %r18, %r19;
.loc 1 224 1
setp.ge.s32 %p1, %r38, %r16;
@%p1 bra BB81_12;
.loc 1 224 1
mov.u32 %r3, %tid.x;
.loc 1 224 1
mov.u32 %r4, %ntid.x;
.loc 1 224 1
and.b32 %r5, %r3, 31;
.loc 1 224 22
mov.u32 %r20, %nctaid.x;
mul.lo.s32 %r6, %r20, %r1;
.loc 1 124 1
cvta.global.u64 %rd2, $str;
.loc 1 224 1
mov.u32 %r21, WARP_SZ;
mov.u32 %r22, 32;
.loc 4 109 1
sub.s32 %r23, %r22, %r21;
shl.b32 %r24, %r23, 8;
or.b32 %r7, %r24, 31;
cvta.to.global.u64 %rd8, %rd3;
BB81_2:
setp.lt.s32 %p2, %r3, %r15;
.loc 1 224 1
@%p2 bra BB81_4;
mov.f64 %fd12, 0d7FF0000000000000;
bra.uni BB81_6;
BB81_4:
.loc 1 224 1
mul.lo.s32 %r9, %r38, %r17;
mov.f64 %fd12, 0d7FF0000000000000;
mov.u32 %r39, %r3;
BB81_5:
.loc 1 224 1
mov.u32 %r10, %r39;
add.s32 %r25, %r10, %r9;
mul.wide.s32 %rd5, %r25, 8;
add.s64 %rd6, %rd1, %rd5;
ld.global.f64 %fd9, [%rd6];
.loc 3 2780 10
min.f64 %fd12, %fd12, %fd9;
.loc 1 224 22
add.s32 %r11, %r4, %r10;
.loc 1 224 1
setp.lt.s32 %p3, %r11, %r15;
mov.u32 %r39, %r11;
@%p3 bra BB81_5;
BB81_6:
.loc 1 224 1
bar.sync 0;
.loc 1 224 1
setp.lt.u32 %p4, %r4, 2;
@%p4 bra BB81_9;
mov.u32 %r40, 1;
BB81_8:
mov.u64 %rd7, 0;
// Callseq Start 24
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b64 param1;
st.param.b64 [param1+0], %rd7;
.param .b32 retval0;
.loc 1 124 1
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r37, [retval0+0];
}
// Callseq End 24
.loc 1 126 1
// inline asm
mov.b64 { %r27, %r28 }, %fd12;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r29, %r28, %r40, %r7;
// inline asm
.loc 4 110 1
// inline asm
shfl.down.b32 %r33, %r27, %r40, %r7;
// inline asm
.loc 3 3330 10
mov.b64 %fd11, {%r33, %r29};
.loc 3 2780 10
min.f64 %fd12, %fd12, %fd11;
.loc 1 224 40
shl.b32 %r40, %r40, 1;
.loc 1 224 1
setp.lt.u32 %p5, %r40, %r4;
@%p5 bra BB81_8;
BB81_9:
.loc 1 224 1
setp.ne.s32 %p6, %r5, 0;
@%p6 bra BB81_11;
.loc 1 224 1
mul.wide.s32 %rd9, %r38, 8;
add.s64 %rd10, %rd8, %rd9;
st.global.f64 [%rd10], %fd12;
BB81_11:
.loc 1 224 22
add.s32 %r38, %r6, %r38;
.loc 1 224 1
setp.lt.s32 %p7, %r38, %r16;
@%p7 bra BB81_2;
BB81_12:
.loc 1 224 2
ret;
}
.visible .entry reduce_row_min_double(
.param .u32 reduce_row_min_double_param_0,
.param .u32 reduce_row_min_double_param_1,
.param .u64 reduce_row_min_double_param_2,
.param .u64 reduce_row_min_double_param_3,
.param .u32 reduce_row_min_double_param_4
)
{
.reg .pred %p<5>;
.reg .s32 %r<18>;
.reg .s64 %rd<9>;
.reg .f64 %fd<8>;
ld.param.u32 %r7, [reduce_row_min_double_param_0];
ld.param.u32 %r8, [reduce_row_min_double_param_1];
ld.param.u64 %rd3, [reduce_row_min_double_param_2];
ld.param.u64 %rd4, [reduce_row_min_double_param_3];
ld.param.u32 %r9, [reduce_row_min_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 1 224 1
mov.u32 %r10, %nctaid.x;
mov.u32 %r11, %ntid.x;
mul.lo.s32 %r1, %r10, %r11;
.loc 1 224 1
mov.u32 %r12, %ctaid.x;
mov.u32 %r13, %tid.x;
mad.lo.s32 %r16, %r11, %r12, %r13;
.loc 1 224 1
setp.ge.s32 %p1, %r16, %r7;
@%p1 bra BB82_5;
BB82_1:
mov.f64 %fd7, 0d7FF0000000000000;
setp.gt.s32 %p2, %r8, 0;
.loc 1 224 1
@%p2 bra BB82_2;
bra.uni BB82_4;
BB82_2:
mov.u32 %r17, 0;
BB82_3:
.loc 1 224 1
mad.lo.s32 %r15, %r17, %r9, %r16;
mul.wide.s32 %rd5, %r15, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 1 224 1
ld.global.f64 %fd6, [%rd6];
.loc 3 2780 10
min.f64 %fd7, %fd7, %fd6;
.loc 1 224 22
add.s32 %r17, %r17, 1;
.loc 1 224 1
setp.lt.s32 %p3, %r17, %r8;
@%p3 bra BB82_3;
BB82_4:
mul.wide.s32 %rd7, %r16, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 1 224 1
st.global.f64 [%rd8], %fd7;
.loc 1 224 22
add.s32 %r16, %r16, %r1;
.loc 1 224 1
setp.lt.s32 %p4, %r16, %r7;
@%p4 bra BB82_1;
BB82_5:
.loc 1 224 2
ret;
}
.visible .entry vec_addf(
.param .u64 vec_addf_param_0,
.param .u64 vec_addf_param_1,
.param .u64 vec_addf_param_2,
.param .u64 vec_addf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_addf_param_0];
ld.param.u64 %rd5, [vec_addf_param_1];
ld.param.u64 %rd6, [vec_addf_param_2];
ld.param.u64 %rd7, [vec_addf_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 234 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 235 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB83_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 237 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
add.f64 %fd3, %fd2, %fd1;
add.s64 %rd12, %rd1, %rd9;
.loc 1 237 1
st.global.f64 [%rd12], %fd3;
BB83_2:
.loc 1 239 2
ret;
}
.visible .entry vec_subf(
.param .u64 vec_subf_param_0,
.param .u64 vec_subf_param_1,
.param .u64 vec_subf_param_2,
.param .u64 vec_subf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_subf_param_0];
ld.param.u64 %rd5, [vec_subf_param_1];
ld.param.u64 %rd6, [vec_subf_param_2];
ld.param.u64 %rd7, [vec_subf_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 245 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 246 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB84_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 248 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
sub.f64 %fd3, %fd2, %fd1;
add.s64 %rd12, %rd1, %rd9;
.loc 1 248 1
st.global.f64 [%rd12], %fd3;
BB84_2:
.loc 1 250 2
ret;
}
.visible .entry vec_mulf(
.param .u64 vec_mulf_param_0,
.param .u64 vec_mulf_param_1,
.param .u64 vec_mulf_param_2,
.param .u64 vec_mulf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_mulf_param_0];
ld.param.u64 %rd5, [vec_mulf_param_1];
ld.param.u64 %rd6, [vec_mulf_param_2];
ld.param.u64 %rd7, [vec_mulf_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 256 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 257 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB85_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 259 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
mul.f64 %fd3, %fd2, %fd1;
add.s64 %rd12, %rd1, %rd9;
.loc 1 259 1
st.global.f64 [%rd12], %fd3;
BB85_2:
.loc 1 261 2
ret;
}
.visible .entry vec_divf(
.param .u64 vec_divf_param_0,
.param .u64 vec_divf_param_1,
.param .u64 vec_divf_param_2,
.param .u64 vec_divf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_divf_param_0];
ld.param.u64 %rd5, [vec_divf_param_1];
ld.param.u64 %rd6, [vec_divf_param_2];
ld.param.u64 %rd7, [vec_divf_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 267 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 268 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB86_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 270 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
.loc 3 3614 3
div.rn.f64 %fd3, %fd2, %fd1;
add.s64 %rd12, %rd1, %rd9;
.loc 1 270 61
st.global.f64 [%rd12], %fd3;
BB86_2:
.loc 1 272 2
ret;
}
.visible .entry vec_negatef(
.param .u64 vec_negatef_param_0,
.param .u64 vec_negatef_param_1,
.param .u64 vec_negatef_param_2
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<3>;
ld.param.u64 %rd3, [vec_negatef_param_0];
ld.param.u64 %rd4, [vec_negatef_param_1];
ld.param.u64 %rd5, [vec_negatef_param_2];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 277 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 278 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB87_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 280 1
ld.global.f64 %fd1, [%rd8];
neg.f64 %fd2, %fd1;
add.s64 %rd9, %rd1, %rd7;
.loc 1 280 1
st.global.f64 [%rd9], %fd2;
BB87_2:
.loc 1 282 2
ret;
}
.visible .entry vec_addScalarf(
.param .u64 vec_addScalarf_param_0,
.param .u64 vec_addScalarf_param_1,
.param .u64 vec_addScalarf_param_2,
.param .f64 vec_addScalarf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_addScalarf_param_0];
ld.param.u64 %rd4, [vec_addScalarf_param_1];
ld.param.u64 %rd5, [vec_addScalarf_param_2];
ld.param.f64 %fd1, [vec_addScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 292 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 293 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB88_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 295 1
ld.global.f64 %fd2, [%rd8];
add.f64 %fd3, %fd2, %fd1;
add.s64 %rd9, %rd1, %rd7;
.loc 1 295 1
st.global.f64 [%rd9], %fd3;
BB88_2:
.loc 1 297 2
ret;
}
.visible .entry vec_subScalarf(
.param .u64 vec_subScalarf_param_0,
.param .u64 vec_subScalarf_param_1,
.param .u64 vec_subScalarf_param_2,
.param .f64 vec_subScalarf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_subScalarf_param_0];
ld.param.u64 %rd4, [vec_subScalarf_param_1];
ld.param.u64 %rd5, [vec_subScalarf_param_2];
ld.param.f64 %fd1, [vec_subScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 303 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 304 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB89_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 306 1
ld.global.f64 %fd2, [%rd8];
sub.f64 %fd3, %fd2, %fd1;
add.s64 %rd9, %rd1, %rd7;
.loc 1 306 1
st.global.f64 [%rd9], %fd3;
BB89_2:
.loc 1 308 2
ret;
}
.visible .entry vec_mulScalarf(
.param .u64 vec_mulScalarf_param_0,
.param .u64 vec_mulScalarf_param_1,
.param .u64 vec_mulScalarf_param_2,
.param .f64 vec_mulScalarf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_mulScalarf_param_0];
ld.param.u64 %rd4, [vec_mulScalarf_param_1];
ld.param.u64 %rd5, [vec_mulScalarf_param_2];
ld.param.f64 %fd1, [vec_mulScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 314 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 315 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB90_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 317 1
ld.global.f64 %fd2, [%rd8];
mul.f64 %fd3, %fd2, %fd1;
add.s64 %rd9, %rd1, %rd7;
.loc 1 317 1
st.global.f64 [%rd9], %fd3;
BB90_2:
.loc 1 319 2
ret;
}
.visible .entry vec_divScalarf(
.param .u64 vec_divScalarf_param_0,
.param .u64 vec_divScalarf_param_1,
.param .u64 vec_divScalarf_param_2,
.param .f64 vec_divScalarf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_divScalarf_param_0];
ld.param.u64 %rd4, [vec_divScalarf_param_1];
ld.param.u64 %rd5, [vec_divScalarf_param_2];
ld.param.f64 %fd1, [vec_divScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 325 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 326 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB91_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 328 1
ld.global.f64 %fd2, [%rd8];
.loc 3 3614 3
div.rn.f64 %fd3, %fd2, %fd1;
add.s64 %rd9, %rd1, %rd7;
.loc 1 328 61
st.global.f64 [%rd9], %fd3;
BB91_2:
.loc 1 330 2
ret;
}
.visible .entry vec_scalarAddf(
.param .u64 vec_scalarAddf_param_0,
.param .u64 vec_scalarAddf_param_1,
.param .f64 vec_scalarAddf_param_2,
.param .u64 vec_scalarAddf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_scalarAddf_param_0];
ld.param.u64 %rd4, [vec_scalarAddf_param_1];
ld.param.f64 %fd1, [vec_scalarAddf_param_2];
ld.param.u64 %rd5, [vec_scalarAddf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 338 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 339 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB92_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 341 1
ld.global.f64 %fd2, [%rd8];
add.f64 %fd3, %fd2, %fd1;
add.s64 %rd9, %rd1, %rd7;
.loc 1 341 1
st.global.f64 [%rd9], %fd3;
BB92_2:
.loc 1 343 2
ret;
}
.visible .entry vec_scalarSubf(
.param .u64 vec_scalarSubf_param_0,
.param .u64 vec_scalarSubf_param_1,
.param .f64 vec_scalarSubf_param_2,
.param .u64 vec_scalarSubf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_scalarSubf_param_0];
ld.param.u64 %rd4, [vec_scalarSubf_param_1];
ld.param.f64 %fd1, [vec_scalarSubf_param_2];
ld.param.u64 %rd5, [vec_scalarSubf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 349 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 350 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB93_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 352 1
ld.global.f64 %fd2, [%rd8];
sub.f64 %fd3, %fd1, %fd2;
add.s64 %rd9, %rd1, %rd7;
.loc 1 352 1
st.global.f64 [%rd9], %fd3;
BB93_2:
.loc 1 354 2
ret;
}
.visible .entry vec_scalarMulf(
.param .u64 vec_scalarMulf_param_0,
.param .u64 vec_scalarMulf_param_1,
.param .f64 vec_scalarMulf_param_2,
.param .u64 vec_scalarMulf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_scalarMulf_param_0];
ld.param.u64 %rd4, [vec_scalarMulf_param_1];
ld.param.f64 %fd1, [vec_scalarMulf_param_2];
ld.param.u64 %rd5, [vec_scalarMulf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 360 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 361 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB94_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 363 1
ld.global.f64 %fd2, [%rd8];
mul.f64 %fd3, %fd2, %fd1;
add.s64 %rd9, %rd1, %rd7;
.loc 1 363 1
st.global.f64 [%rd9], %fd3;
BB94_2:
.loc 1 365 2
ret;
}
.visible .entry vec_scalarDivf(
.param .u64 vec_scalarDivf_param_0,
.param .u64 vec_scalarDivf_param_1,
.param .f64 vec_scalarDivf_param_2,
.param .u64 vec_scalarDivf_param_3
)
{
.reg .pred %p<2>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_scalarDivf_param_0];
ld.param.u64 %rd4, [vec_scalarDivf_param_1];
ld.param.f64 %fd1, [vec_scalarDivf_param_2];
ld.param.u64 %rd5, [vec_scalarDivf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 371 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 372 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB95_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 374 1
ld.global.f64 %fd2, [%rd8];
.loc 3 3614 3
div.rn.f64 %fd3, %fd1, %fd2;
add.s64 %rd9, %rd1, %rd7;
.loc 1 374 61
st.global.f64 [%rd9], %fd3;
BB95_2:
.loc 1 376 2
ret;
}
.visible .entry vec_ltf(
.param .u64 vec_ltf_param_0,
.param .u64 vec_ltf_param_1,
.param .u64 vec_ltf_param_2,
.param .u64 vec_ltf_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_ltf_param_0];
ld.param.u64 %rd5, [vec_ltf_param_1];
ld.param.u64 %rd6, [vec_ltf_param_2];
ld.param.u64 %rd7, [vec_ltf_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 393 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 394 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB96_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 396 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
setp.lt.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d3FF0000000000000, 0d0000000000000000, %p2;
add.s64 %rd12, %rd1, %rd9;
.loc 1 396 1
st.global.f64 [%rd12], %fd3;
BB96_2:
.loc 1 398 2
ret;
}
.visible .entry vec_ltef(
.param .u64 vec_ltef_param_0,
.param .u64 vec_ltef_param_1,
.param .u64 vec_ltef_param_2,
.param .u64 vec_ltef_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_ltef_param_0];
ld.param.u64 %rd5, [vec_ltef_param_1];
ld.param.u64 %rd6, [vec_ltef_param_2];
ld.param.u64 %rd7, [vec_ltef_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 404 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 405 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB97_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 407 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
setp.gtu.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d0000000000000000, 0d3FF0000000000000, %p2;
add.s64 %rd12, %rd1, %rd9;
.loc 1 407 1
st.global.f64 [%rd12], %fd3;
BB97_2:
.loc 1 409 2
ret;
}
.visible .entry vec_eqf(
.param .u64 vec_eqf_param_0,
.param .u64 vec_eqf_param_1,
.param .u64 vec_eqf_param_2,
.param .u64 vec_eqf_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_eqf_param_0];
ld.param.u64 %rd5, [vec_eqf_param_1];
ld.param.u64 %rd6, [vec_eqf_param_2];
ld.param.u64 %rd7, [vec_eqf_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 415 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 416 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB98_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 418 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
setp.eq.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d3FF0000000000000, 0d0000000000000000, %p2;
add.s64 %rd12, %rd1, %rd9;
.loc 1 418 1
st.global.f64 [%rd12], %fd3;
BB98_2:
.loc 1 420 2
ret;
}
.visible .entry vec_gtef(
.param .u64 vec_gtef_param_0,
.param .u64 vec_gtef_param_1,
.param .u64 vec_gtef_param_2,
.param .u64 vec_gtef_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_gtef_param_0];
ld.param.u64 %rd5, [vec_gtef_param_1];
ld.param.u64 %rd6, [vec_gtef_param_2];
ld.param.u64 %rd7, [vec_gtef_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 426 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 427 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB99_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 429 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
setp.ltu.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d0000000000000000, 0d3FF0000000000000, %p2;
add.s64 %rd12, %rd1, %rd9;
.loc 1 429 1
st.global.f64 [%rd12], %fd3;
BB99_2:
.loc 1 431 2
ret;
}
.visible .entry vec_gtf(
.param .u64 vec_gtf_param_0,
.param .u64 vec_gtf_param_1,
.param .u64 vec_gtf_param_2,
.param .u64 vec_gtf_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_gtf_param_0];
ld.param.u64 %rd5, [vec_gtf_param_1];
ld.param.u64 %rd6, [vec_gtf_param_2];
ld.param.u64 %rd7, [vec_gtf_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 437 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 438 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB100_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 440 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
setp.gt.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d3FF0000000000000, 0d0000000000000000, %p2;
add.s64 %rd12, %rd1, %rd9;
.loc 1 440 1
st.global.f64 [%rd12], %fd3;
BB100_2:
.loc 1 442 2
ret;
}
.visible .entry vec_nef(
.param .u64 vec_nef_param_0,
.param .u64 vec_nef_param_1,
.param .u64 vec_nef_param_2,
.param .u64 vec_nef_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u64 %rd4, [vec_nef_param_0];
ld.param.u64 %rd5, [vec_nef_param_1];
ld.param.u64 %rd6, [vec_nef_param_2];
ld.param.u64 %rd7, [vec_nef_param_3];
cvta.to.global.u64 %rd1, %rd5;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd6;
.loc 1 449 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 450 1
cvt.s64.s32 %rd8, %r1;
setp.ge.u64 %p1, %rd8, %rd4;
@%p1 bra BB101_2;
mul.wide.s32 %rd9, %r1, 8;
add.s64 %rd10, %rd3, %rd9;
add.s64 %rd11, %rd2, %rd9;
.loc 1 452 1
ld.global.f64 %fd1, [%rd11];
ld.global.f64 %fd2, [%rd10];
setp.neu.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d3FF0000000000000, 0d0000000000000000, %p2;
add.s64 %rd12, %rd1, %rd9;
.loc 1 452 1
st.global.f64 [%rd12], %fd3;
BB101_2:
.loc 1 454 2
ret;
}
.visible .entry vec_ltScalarf(
.param .u64 vec_ltScalarf_param_0,
.param .u64 vec_ltScalarf_param_1,
.param .u64 vec_ltScalarf_param_2,
.param .f64 vec_ltScalarf_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_ltScalarf_param_0];
ld.param.u64 %rd4, [vec_ltScalarf_param_1];
ld.param.u64 %rd5, [vec_ltScalarf_param_2];
ld.param.f64 %fd1, [vec_ltScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 464 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 465 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB102_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 467 1
ld.global.f64 %fd2, [%rd8];
setp.lt.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d3FF0000000000000, 0d0000000000000000, %p2;
add.s64 %rd9, %rd1, %rd7;
.loc 1 467 1
st.global.f64 [%rd9], %fd3;
BB102_2:
.loc 1 469 2
ret;
}
.visible .entry vec_lteScalarf(
.param .u64 vec_lteScalarf_param_0,
.param .u64 vec_lteScalarf_param_1,
.param .u64 vec_lteScalarf_param_2,
.param .f64 vec_lteScalarf_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_lteScalarf_param_0];
ld.param.u64 %rd4, [vec_lteScalarf_param_1];
ld.param.u64 %rd5, [vec_lteScalarf_param_2];
ld.param.f64 %fd1, [vec_lteScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 475 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 476 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB103_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 478 1
ld.global.f64 %fd2, [%rd8];
setp.gtu.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d0000000000000000, 0d3FF0000000000000, %p2;
add.s64 %rd9, %rd1, %rd7;
.loc 1 478 1
st.global.f64 [%rd9], %fd3;
BB103_2:
.loc 1 480 2
ret;
}
.visible .entry vec_eqScalarf(
.param .u64 vec_eqScalarf_param_0,
.param .u64 vec_eqScalarf_param_1,
.param .u64 vec_eqScalarf_param_2,
.param .f64 vec_eqScalarf_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_eqScalarf_param_0];
ld.param.u64 %rd4, [vec_eqScalarf_param_1];
ld.param.u64 %rd5, [vec_eqScalarf_param_2];
ld.param.f64 %fd1, [vec_eqScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 486 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 487 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB104_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 489 1
ld.global.f64 %fd2, [%rd8];
setp.eq.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d3FF0000000000000, 0d0000000000000000, %p2;
add.s64 %rd9, %rd1, %rd7;
.loc 1 489 1
st.global.f64 [%rd9], %fd3;
BB104_2:
.loc 1 491 2
ret;
}
.visible .entry vec_gteScalarf(
.param .u64 vec_gteScalarf_param_0,
.param .u64 vec_gteScalarf_param_1,
.param .u64 vec_gteScalarf_param_2,
.param .f64 vec_gteScalarf_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_gteScalarf_param_0];
ld.param.u64 %rd4, [vec_gteScalarf_param_1];
ld.param.u64 %rd5, [vec_gteScalarf_param_2];
ld.param.f64 %fd1, [vec_gteScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 497 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 498 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB105_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 500 1
ld.global.f64 %fd2, [%rd8];
setp.ltu.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d0000000000000000, 0d3FF0000000000000, %p2;
add.s64 %rd9, %rd1, %rd7;
.loc 1 500 1
st.global.f64 [%rd9], %fd3;
BB105_2:
.loc 1 502 2
ret;
}
.visible .entry vec_gtScalarf(
.param .u64 vec_gtScalarf_param_0,
.param .u64 vec_gtScalarf_param_1,
.param .u64 vec_gtScalarf_param_2,
.param .f64 vec_gtScalarf_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_gtScalarf_param_0];
ld.param.u64 %rd4, [vec_gtScalarf_param_1];
ld.param.u64 %rd5, [vec_gtScalarf_param_2];
ld.param.f64 %fd1, [vec_gtScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 508 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 509 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB106_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 511 1
ld.global.f64 %fd2, [%rd8];
setp.gt.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d3FF0000000000000, 0d0000000000000000, %p2;
add.s64 %rd9, %rd1, %rd7;
.loc 1 511 1
st.global.f64 [%rd9], %fd3;
BB106_2:
.loc 1 513 2
ret;
}
.visible .entry vec_neScalarf(
.param .u64 vec_neScalarf_param_0,
.param .u64 vec_neScalarf_param_1,
.param .u64 vec_neScalarf_param_2,
.param .f64 vec_neScalarf_param_3
)
{
.reg .pred %p<3>;
.reg .s32 %r<5>;
.reg .s64 %rd<10>;
.reg .f64 %fd<4>;
ld.param.u64 %rd3, [vec_neScalarf_param_0];
ld.param.u64 %rd4, [vec_neScalarf_param_1];
ld.param.u64 %rd5, [vec_neScalarf_param_2];
ld.param.f64 %fd1, [vec_neScalarf_param_3];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 1 519 1
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %tid.x;
mad.lo.s32 %r1, %r2, %r3, %r4;
.loc 1 520 1
cvt.s64.s32 %rd6, %r1;
setp.ge.u64 %p1, %rd6, %rd3;
@%p1 bra BB107_2;
mul.wide.s32 %rd7, %r1, 8;
add.s64 %rd8, %rd2, %rd7;
.loc 1 522 1
ld.global.f64 %fd2, [%rd8];
setp.neu.f64 %p2, %fd2, %fd1;
selp.f64 %fd3, 0d3FF0000000000000, 0d0000000000000000, %p2;
add.s64 %rd9, %rd1, %rd7;
.loc 1 522 1
st.global.f64 [%rd9], %fd3;
BB107_2:
.loc 1 524 2
ret;
}
.func (.param .b64 func_retval0) __internal_trig_reduction_slowpathd(
.param .b64 __internal_trig_reduction_slowpathd_param_0,
.param .b64 __internal_trig_reduction_slowpathd_param_1
)
{
.local .align 8 .b8 __local_depot108[40];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<8>;
.reg .s32 %r<48>;
.reg .s64 %rd<90>;
.reg .f64 %fd<3>;
mov.u64 %SPL, __local_depot108;
ld.param.f64 %fd1, [__internal_trig_reduction_slowpathd_param_0];
ld.param.u64 %rd30, [__internal_trig_reduction_slowpathd_param_1];
add.u64 %rd31, %SPL, 0;
{
.reg .b32 %temp;
mov.b64 {%temp, %r1}, %fd1;
}
and.b32 %r46, %r1, -2147483648;
shr.u32 %r3, %r1, 20;
and.b32 %r16, %r3, 2047;
add.s32 %r17, %r16, -1024;
mov.b64 %rd32, %fd1;
shl.b64 %rd33, %rd32, 11;
or.b64 %rd2, %rd33, -9223372036854775808;
shr.u32 %r18, %r17, 6;
mov.u32 %r19, 16;
sub.s32 %r4, %r19, %r18;
mov.u32 %r20, 15;
sub.s32 %r45, %r20, %r18;
mov.u32 %r21, 19;
sub.s32 %r22, %r21, %r18;
mov.u32 %r23, 18;
min.s32 %r6, %r23, %r22;
setp.lt.s32 %p1, %r45, %r6;
@%p1 bra BB108_2;
mov.u64 %rd84, 0;
bra.uni BB108_4;
BB108_2:
sub.s32 %r29, %r20, %r18;
mul.wide.s32 %rd36, %r29, 8;
mov.u64 %rd37, __cudart_i2opi_d;
add.s64 %rd83, %rd37, %rd36;
mov.u64 %rd84, 0;
mov.u64 %rd82, %rd31;
BB108_3:
.pragma "nounroll";
mov.u64 %rd4, %rd82;
ld.const.u64 %rd40, [%rd83];
// inline asm
{
.reg .u32 r0, r1, r2, r3, alo, ahi, blo, bhi, clo, chi;
mov.b64 {alo,ahi}, %rd40;
mov.b64 {blo,bhi}, %rd2;
mov.b64 {clo,chi}, %rd84;
mad.lo.cc.u32 r0, alo, blo, clo;
madc.hi.cc.u32 r1, alo, blo, chi;
madc.hi.u32 r2, alo, bhi, 0;
mad.lo.cc.u32 r1, alo, bhi, r1;
madc.hi.cc.u32 r2, ahi, blo, r2;
madc.hi.u32 r3, ahi, bhi, 0;
mad.lo.cc.u32 r1, ahi, blo, r1;
madc.lo.cc.u32 r2, ahi, bhi, r2;
addc.u32 r3, r3, 0;
mov.b64 %rd38, {r0,r1};
mov.b64 %rd39, {r2,r3};
}
// inline asm
st.local.u64 [%rd4], %rd38;
add.s64 %rd83, %rd83, 8;
add.s64 %rd9, %rd4, 8;
add.s32 %r45, %r45, 1;
setp.lt.s32 %p2, %r45, %r6;
mov.u64 %rd84, %rd39;
mov.u64 %rd82, %rd9;
@%p2 bra BB108_3;
BB108_4:
mov.u32 %r30, 1;
sub.s32 %r31, %r30, %r4;
add.s32 %r32, %r31, %r45;
mul.wide.s32 %rd43, %r32, 8;
add.s64 %rd44, %rd31, %rd43;
st.local.u64 [%rd44], %rd84;
ld.local.u64 %rd85, [%rd31+16];
ld.local.u64 %rd86, [%rd31+24];
and.b32 %r10, %r3, 63;
setp.eq.s32 %p3, %r10, 0;
@%p3 bra BB108_6;
mov.u32 %r33, 64;
sub.s32 %r34, %r33, %r10;
shl.b64 %rd45, %rd86, %r10;
shr.u64 %rd46, %rd85, %r34;
or.b64 %rd86, %rd45, %rd46;
shl.b64 %rd47, %rd85, %r10;
ld.local.u64 %rd48, [%rd31+8];
shr.u64 %rd49, %rd48, %r34;
or.b64 %rd85, %rd49, %rd47;
BB108_6:
shr.u64 %rd50, %rd86, 62;
cvt.u32.u64 %r35, %rd50;
shr.u64 %rd51, %rd85, 62;
shl.b64 %rd52, %rd86, 2;
or.b64 %rd88, %rd52, %rd51;
shl.b64 %rd87, %rd85, 2;
shr.u64 %rd53, %rd86, 61;
cvt.u32.u64 %r36, %rd53;
and.b32 %r37, %r36, 1;
add.s32 %r38, %r37, %r35;
neg.s32 %r39, %r38;
setp.eq.s32 %p4, %r46, 0;
selp.b32 %r40, %r38, %r39, %p4;
st.u32 [%rd30], %r40;
setp.eq.s32 %p5, %r37, 0;
@%p5 bra BB108_8;
mov.u64 %rd57, 0;
// inline asm
{
.reg .u32 r0, r1, r2, r3, a0, a1, a2, a3, b0, b1, b2, b3;
mov.b64 {a0,a1}, %rd57;
mov.b64 {a2,a3}, %rd57;
mov.b64 {b0,b1}, %rd87;
mov.b64 {b2,b3}, %rd88;
sub.cc.u32 r0, a0, b0;
subc.cc.u32 r1, a1, b1;
subc.cc.u32 r2, a2, b2;
subc.u32 r3, a3, b3;
mov.b64 %rd54, {r0,r1};
mov.b64 %rd55, {r2,r3};
}
// inline asm
xor.b32 %r46, %r46, -2147483648;
mov.u64 %rd88, %rd55;
mov.u64 %rd87, %rd54;
BB108_8:
clz.b64 %r47, %rd88;
setp.eq.s32 %p6, %r47, 0;
@%p6 bra BB108_10;
shl.b64 %rd60, %rd88, %r47;
mov.u32 %r41, 64;
sub.s32 %r42, %r41, %r47;
shr.u64 %rd61, %rd87, %r42;
or.b64 %rd88, %rd61, %rd60;
BB108_10:
mov.u64 %rd65, -3958705157555305931;
// inline asm
{
.reg .u32 r0, r1, r2, r3, alo, ahi, blo, bhi;
mov.b64 {alo,ahi}, %rd88;
mov.b64 {blo,bhi}, %rd65;
mul.lo.u32 r0, alo, blo;
mul.hi.u32 r1, alo, blo;
mad.lo.cc.u32 r1, alo, bhi, r1;
madc.hi.u32 r2, alo, bhi, 0;
mad.lo.cc.u32 r1, ahi, blo, r1;
madc.hi.cc.u32 r2, ahi, blo, r2;
madc.hi.u32 r3, ahi, bhi, 0;
mad.lo.cc.u32 r2, ahi, bhi, r2;
addc.u32 r3, r3, 0;
mov.b64 %rd62, {r0,r1};
mov.b64 %rd63, {r2,r3};
}
// inline asm
setp.lt.s64 %p7, %rd63, 1;
mov.u64 %rd89, %rd63;
@%p7 bra BB108_12;
// inline asm
{
.reg .u32 r0, r1, r2, r3, a0, a1, a2, a3, b0, b1, b2, b3;
mov.b64 {a0,a1}, %rd62;
mov.b64 {a2,a3}, %rd63;
mov.b64 {b0,b1}, %rd62;
mov.b64 {b2,b3}, %rd63;
add.cc.u32 r0, a0, b0;
addc.cc.u32 r1, a1, b1;
addc.cc.u32 r2, a2, b2;
addc.u32 r3, a3, b3;
mov.b64 %rd66, {r0,r1};
mov.b64 %rd67, {r2,r3};
}
// inline asm
add.s32 %r47, %r47, 1;
mov.u64 %rd89, %rd67;
BB108_12:
cvt.u64.u32 %rd72, %r46;
shl.b64 %rd73, %rd72, 32;
mov.u32 %r43, 1022;
sub.s32 %r44, %r43, %r47;
cvt.u64.u32 %rd74, %r44;
shl.b64 %rd75, %rd74, 52;
add.s64 %rd76, %rd89, 1;
shr.u64 %rd77, %rd76, 10;
add.s64 %rd78, %rd77, 1;
shr.u64 %rd79, %rd78, 1;
add.s64 %rd80, %rd75, %rd79;
or.b64 %rd81, %rd80, %rd73;
mov.b64 %fd2, %rd81;
st.param.f64 [func_retval0+0], %fd2;
ret;
}
.func (.param .b64 func_retval0) __internal_accurate_pow(
.param .b64 __internal_accurate_pow_param_0,
.param .b64 __internal_accurate_pow_param_1
)
{
.reg .pred %p<11>;
.reg .f32 %f<5>;
.reg .s32 %r<39>;
.reg .f64 %fd<144>;
ld.param.f64 %fd14, [__internal_accurate_pow_param_0];
ld.param.f64 %fd15, [__internal_accurate_pow_param_1];
{
.reg .b32 %temp;
mov.b64 {%temp, %r35}, %fd14;
}
{
.reg .b32 %temp;
mov.b64 {%r34, %temp}, %fd14;
}
shr.u32 %r18, %r35, 20;
and.b32 %r36, %r18, 2047;
setp.ne.s32 %p1, %r36, 0;
@%p1 bra BB109_2;
mul.f64 %fd16, %fd14, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r35}, %fd16;
}
{
.reg .b32 %temp;
mov.b64 {%r34, %temp}, %fd16;
}
shr.u32 %r19, %r35, 20;
and.b32 %r20, %r19, 2047;
add.s32 %r36, %r20, -54;
BB109_2:
add.s32 %r37, %r36, -1023;
and.b32 %r21, %r35, -2146435073;
or.b32 %r22, %r21, 1072693248;
mov.b64 %fd141, {%r34, %r22};
setp.lt.u32 %p2, %r22, 1073127583;
@%p2 bra BB109_4;
{
.reg .b32 %temp;
mov.b64 {%r23, %temp}, %fd141;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r24}, %fd141;
}
add.s32 %r25, %r24, -1048576;
mov.b64 %fd141, {%r23, %r25};
add.s32 %r37, %r36, -1022;
BB109_4:
add.f64 %fd17, %fd141, 0d3FF0000000000000;
mov.f64 %fd19, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd17;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd18,%f2;
// inline asm
neg.f64 %fd20, %fd17;
fma.rn.f64 %fd21, %fd20, %fd18, %fd19;
fma.rn.f64 %fd22, %fd21, %fd21, %fd21;
fma.rn.f64 %fd23, %fd22, %fd18, %fd18;
add.f64 %fd24, %fd141, 0dBFF0000000000000;
mul.f64 %fd25, %fd24, %fd23;
fma.rn.f64 %fd26, %fd24, %fd23, %fd25;
mul.f64 %fd27, %fd26, %fd26;
mov.f64 %fd28, 0d3ED0F5D241AD3B5A;
mov.f64 %fd29, 0d3EB0F5FF7D2CAFE2;
fma.rn.f64 %fd30, %fd29, %fd27, %fd28;
mov.f64 %fd31, 0d3EF3B20A75488A3F;
fma.rn.f64 %fd32, %fd30, %fd27, %fd31;
mov.f64 %fd33, 0d3F1745CDE4FAECD5;
fma.rn.f64 %fd34, %fd32, %fd27, %fd33;
mov.f64 %fd35, 0d3F3C71C7258A578B;
fma.rn.f64 %fd36, %fd34, %fd27, %fd35;
mov.f64 %fd37, 0d3F6249249242B910;
fma.rn.f64 %fd38, %fd36, %fd27, %fd37;
mov.f64 %fd39, 0d3F89999999999DFB;
fma.rn.f64 %fd40, %fd38, %fd27, %fd39;
sub.f64 %fd41, %fd24, %fd26;
add.f64 %fd42, %fd41, %fd41;
neg.f64 %fd43, %fd26;
fma.rn.f64 %fd44, %fd43, %fd24, %fd42;
mul.f64 %fd45, %fd23, %fd44;
fma.rn.f64 %fd46, %fd40, %fd27, 0d3FB5555555555555;
mov.f64 %fd47, 0d3FB5555555555555;
sub.f64 %fd48, %fd47, %fd46;
fma.rn.f64 %fd49, %fd40, %fd27, %fd48;
add.f64 %fd50, %fd49, 0d0000000000000000;
add.f64 %fd51, %fd50, 0dBC46A4CB00B9E7B0;
add.f64 %fd52, %fd46, %fd51;
sub.f64 %fd53, %fd46, %fd52;
add.f64 %fd54, %fd53, %fd51;
mul.rn.f64 %fd55, %fd52, %fd26;
neg.f64 %fd56, %fd55;
fma.rn.f64 %fd57, %fd52, %fd26, %fd56;
fma.rn.f64 %fd58, %fd52, %fd45, %fd57;
fma.rn.f64 %fd59, %fd54, %fd26, %fd58;
add.f64 %fd60, %fd55, %fd59;
sub.f64 %fd61, %fd55, %fd60;
add.f64 %fd62, %fd61, %fd59;
mul.rn.f64 %fd63, %fd60, %fd26;
neg.f64 %fd64, %fd63;
fma.rn.f64 %fd65, %fd60, %fd26, %fd64;
fma.rn.f64 %fd66, %fd60, %fd45, %fd65;
fma.rn.f64 %fd67, %fd62, %fd26, %fd66;
add.f64 %fd68, %fd63, %fd67;
sub.f64 %fd69, %fd63, %fd68;
add.f64 %fd70, %fd69, %fd67;
mul.rn.f64 %fd71, %fd68, %fd26;
neg.f64 %fd72, %fd71;
fma.rn.f64 %fd73, %fd68, %fd26, %fd72;
fma.rn.f64 %fd74, %fd68, %fd45, %fd73;
fma.rn.f64 %fd75, %fd70, %fd26, %fd74;
add.f64 %fd76, %fd71, %fd75;
sub.f64 %fd77, %fd71, %fd76;
add.f64 %fd78, %fd77, %fd75;
add.f64 %fd79, %fd26, %fd76;
sub.f64 %fd80, %fd26, %fd79;
add.f64 %fd81, %fd80, %fd76;
add.f64 %fd82, %fd81, %fd78;
add.f64 %fd83, %fd82, %fd45;
add.f64 %fd84, %fd79, %fd83;
sub.f64 %fd85, %fd79, %fd84;
add.f64 %fd86, %fd85, %fd83;
cvt.rn.f64.s32 %fd87, %r37;
mov.f64 %fd88, 0d3FE62E42FEFA3000;
mul.rn.f64 %fd89, %fd87, %fd88;
mov.f64 %fd90, 0d3D53DE6AF278ECE6;
mul.rn.f64 %fd91, %fd87, %fd90;
add.f64 %fd92, %fd89, %fd84;
sub.f64 %fd93, %fd89, %fd92;
add.f64 %fd94, %fd93, %fd84;
add.f64 %fd95, %fd94, %fd86;
add.f64 %fd96, %fd95, %fd91;
add.f64 %fd97, %fd92, %fd96;
sub.f64 %fd98, %fd92, %fd97;
add.f64 %fd99, %fd98, %fd96;
abs.f64 %fd100, %fd15;
setp.gt.f64 %p3, %fd100, 0d7F0D2A1BE4048F90;
mul.f64 %fd101, %fd15, 0d3F20000000000000;
selp.f64 %fd102, %fd101, %fd15, %p3;
mul.rn.f64 %fd103, %fd97, %fd102;
neg.f64 %fd104, %fd103;
fma.rn.f64 %fd105, %fd97, %fd102, %fd104;
fma.rn.f64 %fd106, %fd99, %fd102, %fd105;
add.f64 %fd4, %fd103, %fd106;
sub.f64 %fd107, %fd103, %fd4;
add.f64 %fd5, %fd107, %fd106;
{
.reg .b32 %temp;
mov.b64 {%temp, %r13}, %fd4;
}
setp.lt.u32 %p4, %r13, 1082535491;
setp.lt.s32 %p5, %r13, -1064875759;
or.pred %p6, %p4, %p5;
@%p6 bra BB109_6;
setp.lt.s32 %p7, %r13, 0;
selp.f64 %fd108, 0d0000000000000000, 0d7FF0000000000000, %p7;
abs.f64 %fd109, %fd4;
setp.gtu.f64 %p8, %fd109, 0d7FF0000000000000;
add.f64 %fd110, %fd4, %fd4;
selp.f64 %fd143, %fd110, %fd108, %p8;
bra.uni BB109_10;
BB109_6:
mul.f64 %fd111, %fd4, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd112, %fd111;
cvt.rzi.s32.f64 %r14, %fd112;
mov.f64 %fd113, 0dBFE62E42FEFA39EF;
fma.rn.f64 %fd114, %fd112, %fd113, %fd4;
mov.f64 %fd115, 0dBC7ABC9E3B39803F;
fma.rn.f64 %fd116, %fd112, %fd115, %fd114;
mov.f64 %fd117, 0d3E928A27E30F5561;
mov.f64 %fd118, 0d3E5AE6449C0686C0;
fma.rn.f64 %fd119, %fd118, %fd116, %fd117;
mov.f64 %fd120, 0d3EC71DE8E6486D6B;
fma.rn.f64 %fd121, %fd119, %fd116, %fd120;
mov.f64 %fd122, 0d3EFA019A6B2464C5;
fma.rn.f64 %fd123, %fd121, %fd116, %fd122;
mov.f64 %fd124, 0d3F2A01A0171064A5;
fma.rn.f64 %fd125, %fd123, %fd116, %fd124;
mov.f64 %fd126, 0d3F56C16C17F29C8D;
fma.rn.f64 %fd127, %fd125, %fd116, %fd126;
mov.f64 %fd128, 0d3F8111111111A24E;
fma.rn.f64 %fd129, %fd127, %fd116, %fd128;
mov.f64 %fd130, 0d3FA555555555211D;
fma.rn.f64 %fd131, %fd129, %fd116, %fd130;
mov.f64 %fd132, 0d3FC5555555555530;
fma.rn.f64 %fd133, %fd131, %fd116, %fd132;
mov.f64 %fd134, 0d3FE0000000000005;
fma.rn.f64 %fd135, %fd133, %fd116, %fd134;
fma.rn.f64 %fd137, %fd135, %fd116, %fd19;
fma.rn.f64 %fd142, %fd137, %fd116, %fd19;
abs.s32 %r26, %r14;
setp.lt.s32 %p9, %r26, 1023;
@%p9 bra BB109_8;
add.s32 %r27, %r14, 2046;
shl.b32 %r28, %r27, 19;
and.b32 %r29, %r28, -1048576;
shl.b32 %r30, %r27, 20;
sub.s32 %r38, %r30, %r29;
mov.u32 %r31, 0;
mov.b64 %fd138, {%r31, %r29};
mul.f64 %fd142, %fd142, %fd138;
bra.uni BB109_9;
BB109_8:
shl.b32 %r32, %r14, 20;
add.s32 %r38, %r32, 1072693248;
BB109_9:
mov.u32 %r33, 0;
mov.b64 %fd139, {%r33, %r38};
mul.f64 %fd143, %fd142, %fd139;
BB109_10:
abs.f64 %fd140, %fd143;
setp.eq.f64 %p10, %fd140, 0d7FF0000000000000;
@%p10 bra BB109_12;
fma.rn.f64 %fd143, %fd143, %fd5, %fd143;
BB109_12:
st.param.f64 [func_retval0+0], %fd143;
ret;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy